From bb16991fe073643b59bd4195863cafc08032f04b Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Mon, 18 Sep 2023 20:00:38 +0200 Subject: [PATCH 01/22] parallels: fix formatting in bdrv_parallels initialization Old code is ugly and contains tabulations. There are no functional changes in this patch. Signed-off-by: Denis V. Lunev Reviewed-by: Alexander Ivanov --- block/parallels.c | 36 +++++++++++++++++++----------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index 48c32d6821..2ebd8e1301 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -1249,23 +1249,25 @@ static void parallels_close(BlockDriverState *bs) } static BlockDriver bdrv_parallels = { - .format_name = "parallels", - .instance_size = sizeof(BDRVParallelsState), - .bdrv_probe = parallels_probe, - .bdrv_open = parallels_open, - .bdrv_close = parallels_close, - .bdrv_child_perm = bdrv_default_perms, - .bdrv_co_block_status = parallels_co_block_status, - .bdrv_has_zero_init = bdrv_has_zero_init_1, - .bdrv_co_flush_to_os = parallels_co_flush_to_os, - .bdrv_co_readv = parallels_co_readv, - .bdrv_co_writev = parallels_co_writev, - .is_format = true, - .supports_backing = true, - .bdrv_co_create = parallels_co_create, - .bdrv_co_create_opts = parallels_co_create_opts, - .bdrv_co_check = parallels_co_check, - .create_opts = ¶llels_create_opts, + .format_name = "parallels", + .instance_size = sizeof(BDRVParallelsState), + .create_opts = ¶llels_create_opts, + .is_format = true, + .supports_backing = true, + + .bdrv_has_zero_init = bdrv_has_zero_init_1, + + .bdrv_probe = parallels_probe, + .bdrv_open = parallels_open, + .bdrv_close = parallels_close, + .bdrv_child_perm = bdrv_default_perms, + .bdrv_co_block_status = parallels_co_block_status, + .bdrv_co_flush_to_os = parallels_co_flush_to_os, + .bdrv_co_readv = parallels_co_readv, + .bdrv_co_writev = parallels_co_writev, + .bdrv_co_create = parallels_co_create, + .bdrv_co_create_opts = parallels_co_create_opts, + .bdrv_co_check = parallels_co_check, }; static void bdrv_parallels_init(void) From 73f3e136922be5864dace55e744d23914f21e358 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Mon, 18 Sep 2023 20:00:39 +0200 Subject: [PATCH 02/22] parallels: mark driver as supporting CBT Parallels driver indeed support Parallels Dirty Bitmap Feature in read-only mode. The patch adds bdrv_supports_persistent_dirty_bitmap() callback which always return 1 to indicate that. This will allow to copy CBT from Parallels image with qemu-img. Note: read-write support is signalled through bdrv_co_can_store_new_dirty_bitmap() and is different. Signed-off-by: Denis V. Lunev Reviewed-by: Alexander Ivanov --- block/parallels.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/block/parallels.c b/block/parallels.c index 2ebd8e1301..428f72de1c 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -1248,6 +1248,11 @@ static void parallels_close(BlockDriverState *bs) error_free(s->migration_blocker); } +static bool parallels_is_support_dirty_bitmaps(BlockDriverState *bs) +{ + return 1; +} + static BlockDriver bdrv_parallels = { .format_name = "parallels", .instance_size = sizeof(BDRVParallelsState), @@ -1256,6 +1261,7 @@ static BlockDriver bdrv_parallels = { .supports_backing = true, .bdrv_has_zero_init = bdrv_has_zero_init_1, + .bdrv_supports_persistent_dirty_bitmap = parallels_is_support_dirty_bitmaps, .bdrv_probe = parallels_probe, .bdrv_open = parallels_open, From c74cd7bd328001c85cd6f74f63b52ae1263b4c22 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Mon, 18 Sep 2023 20:00:40 +0200 Subject: [PATCH 03/22] parallels: fix memory leak in parallels_open() We should free opts allocated through qemu_opts_create() at the end. Signed-off-by: Denis V. Lunev Reviewed-by: Alexander Ivanov --- block/parallels.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/parallels.c b/block/parallels.c index 428f72de1c..af7be427c9 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -1217,6 +1217,7 @@ fail_format: fail_options: ret = -EINVAL; fail: + qemu_opts_del(opts); /* * "s" object was allocated by g_malloc0 so we can safely * try to free its fields even they were not allocated. From 93361b7e33ab7046e7c4c2064503ff199ae1cef9 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Mon, 18 Sep 2023 20:00:42 +0200 Subject: [PATCH 04/22] parallels: invent parallels_opts_prealloc() helper to parse prealloc opts This patch creates above mentioned helper and moves its usage to the beginning of parallels_open(). This simplifies parallels_open() a bit. The patch also ensures that we store prealloc_size on block driver state always in sectors. This makes code cleaner and avoids wrong opinion at the assignment that the value is in bytes. Signed-off-by: Denis V. Lunev Reviewed-by: Alexander Ivanov --- block/parallels.c | 72 +++++++++++++++++++++++++++++------------------ 1 file changed, 44 insertions(+), 28 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index af7be427c9..ae006e7fc7 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -1025,6 +1025,44 @@ static int parallels_update_header(BlockDriverState *bs) return bdrv_pwrite_sync(bs->file, 0, size, s->header, 0); } + +static int parallels_opts_prealloc(BlockDriverState *bs, QDict *options, + Error **errp) +{ + int err; + char *buf; + int64_t bytes; + BDRVParallelsState *s = bs->opaque; + Error *local_err = NULL; + QemuOpts *opts = qemu_opts_create(¶llels_runtime_opts, NULL, 0, errp); + if (!opts) { + return -ENOMEM; + } + + err = -EINVAL; + if (!qemu_opts_absorb_qdict(opts, options, errp)) { + goto done; + } + + bytes = qemu_opt_get_size_del(opts, PARALLELS_OPT_PREALLOC_SIZE, 0); + s->prealloc_size = bytes >> BDRV_SECTOR_BITS; + buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE); + /* prealloc_mode can be downgraded later during allocate_clusters */ + s->prealloc_mode = qapi_enum_parse(&prealloc_mode_lookup, buf, + PRL_PREALLOC_MODE_FALLOCATE, + &local_err); + g_free(buf); + if (local_err != NULL) { + error_propagate(errp, local_err); + goto done; + } + err = 0; + +done: + qemu_opts_del(opts); + return err; +} + static int parallels_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { @@ -1033,11 +1071,13 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, int ret, size, i; int64_t file_nb_sectors, sector; uint32_t data_start; - QemuOpts *opts = NULL; - Error *local_err = NULL; - char *buf; bool data_off_is_correct; + ret = parallels_opts_prealloc(bs, options, errp); + if (ret < 0) { + return ret; + } + ret = bdrv_open_file_child(NULL, options, "file", bs, errp); if (ret < 0) { return ret; @@ -1078,6 +1118,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, ret = -EFBIG; goto fail; } + s->prealloc_size = MAX(s->tracks, s->prealloc_size); s->cluster_size = s->tracks << BDRV_SECTOR_BITS; s->bat_size = le32_to_cpu(ph.bat_entries); @@ -1117,29 +1158,6 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, s->header_size = size; } - opts = qemu_opts_create(¶llels_runtime_opts, NULL, 0, errp); - if (!opts) { - goto fail_options; - } - - if (!qemu_opts_absorb_qdict(opts, options, errp)) { - goto fail_options; - } - - s->prealloc_size = - qemu_opt_get_size_del(opts, PARALLELS_OPT_PREALLOC_SIZE, 0); - s->prealloc_size = MAX(s->tracks, s->prealloc_size >> BDRV_SECTOR_BITS); - buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE); - /* prealloc_mode can be downgraded later during allocate_clusters */ - s->prealloc_mode = qapi_enum_parse(&prealloc_mode_lookup, buf, - PRL_PREALLOC_MODE_FALLOCATE, - &local_err); - g_free(buf); - if (local_err != NULL) { - error_propagate(errp, local_err); - goto fail_options; - } - if (ph.ext_off) { if (flags & BDRV_O_RDWR) { /* @@ -1214,10 +1232,8 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, fail_format: error_setg(errp, "Image not in Parallels format"); -fail_options: ret = -EINVAL; fail: - qemu_opts_del(opts); /* * "s" object was allocated by g_malloc0 so we can safely * try to free its fields even they were not allocated. From 9c398781361b6f77a0da1aa9389098ccec16760b Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Mon, 18 Sep 2023 20:00:43 +0200 Subject: [PATCH 05/22] parallels: return earler in fail_format branch in parallels_open() We do not need to perform any deallocation/cleanup if wrong format is detected. Signed-off-by: Denis V. Lunev Reviewed-by: Alexander Ivanov --- block/parallels.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/block/parallels.c b/block/parallels.c index ae006e7fc7..12f38cf70b 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -1232,7 +1232,8 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, fail_format: error_setg(errp, "Image not in Parallels format"); - ret = -EINVAL; + return -EINVAL; + fail: /* * "s" object was allocated by g_malloc0 so we can safely From e17b9d08d90bdbbe2bc4d63d758ae2cb9ba806a6 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Mon, 18 Sep 2023 20:00:44 +0200 Subject: [PATCH 06/22] parallels: return earlier from parallels_open() function on error At the beginning of the function we can return immediately until we really allocate s->header. Signed-off-by: Denis V. Lunev Reviewed-by: Alexander Ivanov --- block/parallels.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index 12f38cf70b..bd26c8db63 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -1090,7 +1090,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, ret = bdrv_pread(bs->file, 0, sizeof(ph), &ph, 0); if (ret < 0) { - goto fail; + return ret; } bs->total_sectors = le64_to_cpu(ph.nb_sectors); @@ -1110,13 +1110,11 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, s->tracks = le32_to_cpu(ph.tracks); if (s->tracks == 0) { error_setg(errp, "Invalid image: Zero sectors per track"); - ret = -EINVAL; - goto fail; + return -EINVAL; } if (s->tracks > INT32_MAX/513) { error_setg(errp, "Invalid image: Too big cluster"); - ret = -EFBIG; - goto fail; + return -EFBIG; } s->prealloc_size = MAX(s->tracks, s->prealloc_size); s->cluster_size = s->tracks << BDRV_SECTOR_BITS; @@ -1124,16 +1122,14 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, s->bat_size = le32_to_cpu(ph.bat_entries); if (s->bat_size > INT_MAX / sizeof(uint32_t)) { error_setg(errp, "Catalog too large"); - ret = -EFBIG; - goto fail; + return -EFBIG; } size = bat_entry_off(s->bat_size); s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file->bs)); s->header = qemu_try_blockalign(bs->file->bs, s->header_size); if (s->header == NULL) { - ret = -ENOMEM; - goto fail; + return -ENOMEM; } ret = bdrv_pread(bs->file, 0, s->header_size, s->header, 0); From 8f5f5326250791ceb597b486ff52d8ea83d6c2b0 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Mon, 18 Sep 2023 20:00:45 +0200 Subject: [PATCH 07/22] parallels: refactor path when we need to re-check image in parallels_open More conditions follows thus the check should be more scalable. Signed-off-by: Denis V. Lunev Reviewed-by: Alexander Ivanov --- block/parallels.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index bd26c8db63..af3b4894d7 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -1071,7 +1071,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, int ret, size, i; int64_t file_nb_sectors, sector; uint32_t data_start; - bool data_off_is_correct; + bool need_check = false; ret = parallels_opts_prealloc(bs, options, errp); if (ret < 0) { @@ -1139,11 +1139,14 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, s->bat_bitmap = (uint32_t *)(s->header + 1); if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) { - s->header_unclean = true; + need_check = s->header_unclean = true; + } + + { + bool ok = parallels_test_data_off(s, file_nb_sectors, &data_start); + need_check = need_check || !ok; } - data_off_is_correct = parallels_test_data_off(s, file_nb_sectors, - &data_start); s->data_start = data_start; s->data_end = s->data_start; if (s->data_end < (s->header_size >> BDRV_SECTOR_BITS)) { @@ -1200,6 +1203,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, s->data_end = sector + s->tracks; } } + need_check = need_check || s->data_end > file_nb_sectors; /* * We don't repair the image here if it's opened for checks. Also we don't @@ -1209,12 +1213,8 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, return 0; } - /* - * Repair the image if it's dirty or - * out-of-image corruption was detected. - */ - if (s->data_end > file_nb_sectors || s->header_unclean - || !data_off_is_correct) { + /* Repair the image if corruption was detected. */ + if (need_check) { BdrvCheckResult res; ret = bdrv_check(bs, &res, BDRV_FIX_ERRORS | BDRV_FIX_LEAKS); if (ret < 0) { @@ -1223,7 +1223,6 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } } - return 0; fail_format: From a398275e88103c0c0a7f3784b30683f3bd3f640b Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Mon, 18 Sep 2023 20:00:46 +0200 Subject: [PATCH 08/22] parallels: create mark_used() helper which sets bit in used bitmap This functionality is used twice already and next patch will add more code with it. Signed-off-by: Denis V. Lunev Reviewed-by: Alexander Ivanov --- block/parallels.c | 34 +++++++++++++++++++++++++--------- 1 file changed, 25 insertions(+), 9 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index af3b4894d7..66c86d87e3 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -178,6 +178,21 @@ static void parallels_set_bat_entry(BDRVParallelsState *s, bitmap_set(s->bat_dirty_bmap, bat_entry_off(index) / s->bat_dirty_block, 1); } +static int mark_used(BlockDriverState *bs, + unsigned long *bitmap, uint32_t bitmap_size, int64_t off) +{ + BDRVParallelsState *s = bs->opaque; + uint32_t cluster_index = host_cluster_index(s, off); + if (cluster_index >= bitmap_size) { + return -E2BIG; + } + if (test_bit(cluster_index, bitmap)) { + return -EBUSY; + } + bitmap_set(bitmap, cluster_index, 1); + return 0; +} + static int64_t coroutine_fn GRAPH_RDLOCK allocate_clusters(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) @@ -621,7 +636,7 @@ parallels_check_duplicate(BlockDriverState *bs, BdrvCheckResult *res, BDRVParallelsState *s = bs->opaque; int64_t host_off, host_sector, guest_sector; unsigned long *bitmap; - uint32_t i, bitmap_size, cluster_index, bat_entry; + uint32_t i, bitmap_size, bat_entry; int n, ret = 0; uint64_t *buf = NULL; bool fixed = false; @@ -655,10 +670,9 @@ parallels_check_duplicate(BlockDriverState *bs, BdrvCheckResult *res, continue; } - cluster_index = host_cluster_index(s, host_off); - assert(cluster_index < bitmap_size); - if (!test_bit(cluster_index, bitmap)) { - bitmap_set(bitmap, cluster_index, 1); + ret = mark_used(bs, bitmap, bitmap_size, host_off); + assert(ret != -E2BIG); + if (ret == 0) { continue; } @@ -713,11 +727,13 @@ parallels_check_duplicate(BlockDriverState *bs, BdrvCheckResult *res, * consistent for the new allocated clusters too. * * Note, clusters allocated outside the current image are not - * considered, and the bitmap size doesn't change. + * considered, and the bitmap size doesn't change. This specifically + * means that -E2BIG is OK. */ - cluster_index = host_cluster_index(s, host_off); - if (cluster_index < bitmap_size) { - bitmap_set(bitmap, cluster_index, 1); + ret = mark_used(bs, bitmap, bitmap_size, host_off); + if (ret == -EBUSY) { + res->check_errors++; + goto out_repair_bat; } fixed = true; From f025a99e614f730b3d8902158f4c6c2d947eb855 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Mon, 18 Sep 2023 20:00:47 +0200 Subject: [PATCH 09/22] tests: ensure that image validation will not cure the corruption Since commit cfce1091d55322789582480798a891cbaf66924e Author: Alexander Ivanov Date: Tue Jul 18 12:44:29 2023 +0200 parallels: Image repairing in parallels_open() there is a potential pit fall with calling qemu-io -c "read" The image is opened in read-write mode and thus could be potentially repaired. This could ruin testing process. The patch forces read-only opening for reads. In that case repairing is impossible. Signed-off-by: Denis V. Lunev Reviewed-by: Alexander Ivanov --- tests/qemu-iotests/tests/parallels-checks | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/tests/qemu-iotests/tests/parallels-checks b/tests/qemu-iotests/tests/parallels-checks index a7a1b357b5..5917ee079d 100755 --- a/tests/qemu-iotests/tests/parallels-checks +++ b/tests/qemu-iotests/tests/parallels-checks @@ -91,7 +91,7 @@ file_size=`stat --printf="%s" "$TEST_IMG"` echo "file size: $file_size" echo "== check last cluster ==" -{ $QEMU_IO -c "read -P 0x11 $LAST_CLUSTER_OFF $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -r -c "read -P 0x11 $LAST_CLUSTER_OFF $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir # Clear image _make_test_img $SIZE @@ -105,19 +105,20 @@ echo "== write another pattern to second cluster ==" { $QEMU_IO -c "write -P 0x55 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir echo "== check second cluster ==" -{ $QEMU_IO -c "read -P 0x55 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -r -c "read -P 0x55 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + echo "== corrupt image ==" poke_file "$TEST_IMG" "$(($BAT_OFFSET + 4))" "\x01\x00\x00\x00" echo "== check second cluster ==" -{ $QEMU_IO -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir echo "== repair image ==" _check_test_img -r all echo "== check second cluster ==" -{ $QEMU_IO -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir echo "== check first cluster on host ==" printf "content: 0x%02x\n" `peek_file_le $TEST_IMG $(($CLUSTER_SIZE)) 1` From 6f2206b0cb12c1c6f6344b1c44036020b363fa9f Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Mon, 18 Sep 2023 20:00:48 +0200 Subject: [PATCH 10/22] parallels: fix broken parallels_check_data_off() Once we have repaired data_off field in the header we should update s->data_start which is calculated on the base of it. Signed-off-by: Denis V. Lunev Reviewed-by: Alexander Ivanov --- block/parallels.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/parallels.c b/block/parallels.c index 66c86d87e3..2b5f2b54a0 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -531,6 +531,7 @@ parallels_check_data_off(BlockDriverState *bs, BdrvCheckResult *res, res->corruptions++; if (fix & BDRV_FIX_ERRORS) { s->header->data_off = cpu_to_le32(data_off); + s->data_start = data_off; res->corruptions_fixed++; } From 9425a9441ae1ff633ae574b879b0e7fc99fba0cb Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Mon, 18 Sep 2023 20:00:49 +0200 Subject: [PATCH 11/22] parallels: add test which will validate data_off fixes through repair We have only check through self-repair and that proven to be not enough. Signed-off-by: Denis V. Lunev Reviewed-by: Alexander Ivanov --- tests/qemu-iotests/tests/parallels-checks | 17 +++++++++++++++++ tests/qemu-iotests/tests/parallels-checks.out | 18 ++++++++++++++++++ 2 files changed, 35 insertions(+) diff --git a/tests/qemu-iotests/tests/parallels-checks b/tests/qemu-iotests/tests/parallels-checks index 5917ee079d..f4ca50295e 100755 --- a/tests/qemu-iotests/tests/parallels-checks +++ b/tests/qemu-iotests/tests/parallels-checks @@ -140,6 +140,23 @@ poke_file "$TEST_IMG" "$DATA_OFF_OFFSET" "\xff\xff\xff\xff" echo "== check first cluster ==" { $QEMU_IO -c "read -P 0x55 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +# Clear image +_make_test_img $SIZE + +echo "== TEST DATA_OFF THROUGH REPAIR ==" + +echo "== write pattern to first cluster ==" +{ $QEMU_IO -c "write -P 0x55 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== spoil data_off field ==" +poke_file "$TEST_IMG" "$DATA_OFF_OFFSET" "\xff\xff\xff\xff" + +echo "== repair image ==" +_check_test_img -r all + +echo "== check first cluster ==" +{ $QEMU_IO -r -c "read -P 0x55 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + # success, all done echo "*** done" rm -f $seq.full diff --git a/tests/qemu-iotests/tests/parallels-checks.out b/tests/qemu-iotests/tests/parallels-checks.out index 98a3a7f55e..74a5e29260 100644 --- a/tests/qemu-iotests/tests/parallels-checks.out +++ b/tests/qemu-iotests/tests/parallels-checks.out @@ -72,4 +72,22 @@ wrote 1048576/1048576 bytes at offset 0 Repairing data_off field has incorrect value read 1048576/1048576 bytes at offset 0 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304 +== TEST DATA_OFF THROUGH REPAIR == +== write pattern to first cluster == +wrote 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== spoil data_off field == +== repair image == +Repairing data_off field has incorrect value +The following inconsistencies were found and repaired: + + 0 leaked clusters + 1 corruptions + +Double checking the fixed image now... +No errors were found on the image. +== check first cluster == +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) *** done From e185100fec3819dd51403c0abcdc899341cc67c1 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Mon, 18 Sep 2023 20:00:50 +0200 Subject: [PATCH 12/22] parallels: collect bitmap of used clusters at open If the operation is failed, we need to check image consistency if the problem is not about memory allocation. Bitmap adjustments in allocate_cluster are not performed yet. They worth to be separate. This was proven useful during debug of this series. Kept as is for future bissecting. It should be specifically noted that used bitmap must be recalculated if data_off has been fixed during image consistency check. Signed-off-by: Denis V. Lunev Reviewed-by: Alexander Ivanov --- block/parallels.c | 73 +++++++++++++++++++++++++++++++++++++++++++++++ block/parallels.h | 3 ++ 2 files changed, 76 insertions(+) diff --git a/block/parallels.c b/block/parallels.c index 2b5f2b54a0..a997880c34 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -193,6 +193,58 @@ static int mark_used(BlockDriverState *bs, return 0; } +/* + * Collect used bitmap. The image can contain errors, we should fill the + * bitmap anyway, as much as we can. This information will be used for + * error resolution. + */ +static int parallels_fill_used_bitmap(BlockDriverState *bs) +{ + BDRVParallelsState *s = bs->opaque; + int64_t payload_bytes; + uint32_t i; + int err = 0; + + payload_bytes = bdrv_getlength(bs->file->bs); + if (payload_bytes < 0) { + return payload_bytes; + } + payload_bytes -= s->data_start * BDRV_SECTOR_SIZE; + if (payload_bytes < 0) { + return -EINVAL; + } + + s->used_bmap_size = DIV_ROUND_UP(payload_bytes, s->cluster_size); + if (s->used_bmap_size == 0) { + return 0; + } + s->used_bmap = bitmap_try_new(s->used_bmap_size); + if (s->used_bmap == NULL) { + return -ENOMEM; + } + + for (i = 0; i < s->bat_size; i++) { + int err2; + int64_t host_off = bat2sect(s, i) << BDRV_SECTOR_BITS; + if (host_off == 0) { + continue; + } + + err2 = mark_used(bs, s->used_bmap, s->used_bmap_size, host_off); + if (err2 < 0 && err == 0) { + err = err2; + } + } + return err; +} + +static void parallels_free_used_bitmap(BlockDriverState *bs) +{ + BDRVParallelsState *s = bs->opaque; + s->used_bmap_size = 0; + g_free(s->used_bmap); +} + static int64_t coroutine_fn GRAPH_RDLOCK allocate_clusters(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) @@ -530,8 +582,17 @@ parallels_check_data_off(BlockDriverState *bs, BdrvCheckResult *res, res->corruptions++; if (fix & BDRV_FIX_ERRORS) { + int err; s->header->data_off = cpu_to_le32(data_off); s->data_start = data_off; + + parallels_free_used_bitmap(bs); + err = parallels_fill_used_bitmap(bs); + if (err == -ENOMEM) { + res->check_errors++; + return err; + } + res->corruptions_fixed++; } @@ -1222,6 +1283,14 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, } need_check = need_check || s->data_end > file_nb_sectors; + if (!need_check) { + ret = parallels_fill_used_bitmap(bs); + if (ret == -ENOMEM) { + goto fail; + } + need_check = need_check || ret < 0; /* These are correctable errors */ + } + /* * We don't repair the image here if it's opened for checks. Also we don't * want to change inactive images and can't change readonly images. @@ -1251,6 +1320,8 @@ fail: * "s" object was allocated by g_malloc0 so we can safely * try to free its fields even they were not allocated. */ + parallels_free_used_bitmap(bs); + error_free(s->migration_blocker); g_free(s->bat_dirty_bmap); qemu_vfree(s->header); @@ -1271,6 +1342,8 @@ static void parallels_close(BlockDriverState *bs) PREALLOC_MODE_OFF, 0, NULL); } + parallels_free_used_bitmap(bs); + g_free(s->bat_dirty_bmap); qemu_vfree(s->header); diff --git a/block/parallels.h b/block/parallels.h index 4e53e9572d..6b199443cf 100644 --- a/block/parallels.h +++ b/block/parallels.h @@ -72,6 +72,9 @@ typedef struct BDRVParallelsState { unsigned long *bat_dirty_bmap; unsigned int bat_dirty_block; + unsigned long *used_bmap; + unsigned long used_bmap_size; + uint32_t *bat_bitmap; unsigned int bat_size; From e6d3486d9a4020a65f831ea413a3cd3323fa0abd Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Mon, 18 Sep 2023 20:00:51 +0200 Subject: [PATCH 13/22] tests: fix broken deduplication check in parallels format test Original check is broken as supposed reading from 2 different clusters results in read from the same file offset twice. This is definitely wrong. We should be sure that * the content of both clusters is correct after repair * clusters are at the different offsets after repair In order to check the latter we write some content into the first one and validate that fact. Signed-off-by: Denis V. Lunev Reviewed-by: Alexander Ivanov --- tests/qemu-iotests/tests/parallels-checks | 14 ++++++++++---- tests/qemu-iotests/tests/parallels-checks.out | 16 ++++++++++++---- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/tests/qemu-iotests/tests/parallels-checks b/tests/qemu-iotests/tests/parallels-checks index f4ca50295e..df99558486 100755 --- a/tests/qemu-iotests/tests/parallels-checks +++ b/tests/qemu-iotests/tests/parallels-checks @@ -117,14 +117,20 @@ echo "== check second cluster ==" echo "== repair image ==" _check_test_img -r all +echo "== check the first cluster ==" +{ $QEMU_IO -r -c "read -P 0x11 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + echo "== check second cluster ==" { $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir -echo "== check first cluster on host ==" -printf "content: 0x%02x\n" `peek_file_le $TEST_IMG $(($CLUSTER_SIZE)) 1` +echo "== write another pattern to the first clusters ==" +{ $QEMU_IO -c "write -P 0x66 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir -echo "== check second cluster on host ==" -printf "content: 0x%02x\n" `peek_file_le $TEST_IMG $(($CLUSTER_SIZE)) 1` +echo "== check the first cluster ==" +{ $QEMU_IO -r -c "read -P 0x66 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== check the second cluster (deduplicated) ==" +{ $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir # Clear image _make_test_img $SIZE diff --git a/tests/qemu-iotests/tests/parallels-checks.out b/tests/qemu-iotests/tests/parallels-checks.out index 74a5e29260..1325d2b611 100644 --- a/tests/qemu-iotests/tests/parallels-checks.out +++ b/tests/qemu-iotests/tests/parallels-checks.out @@ -55,13 +55,21 @@ The following inconsistencies were found and repaired: Double checking the fixed image now... No errors were found on the image. +== check the first cluster == +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) == check second cluster == read 1048576/1048576 bytes at offset 1048576 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) -== check first cluster on host == -content: 0x11 -== check second cluster on host == -content: 0x11 +== write another pattern to the first clusters == +wrote 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check the first cluster == +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check the second cluster (deduplicated) == +read 1048576/1048576 bytes at offset 1048576 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304 == TEST DATA_OFF CHECK == == write pattern to first cluster == From 4e828bf4c24436baf5bf5383349b84cc294414d2 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Mon, 18 Sep 2023 20:00:52 +0200 Subject: [PATCH 14/22] tests: test self-cure of parallels image with duplicated clusters The test is quite similar with the original one for duplicated clusters. There is the only difference in the operation which should fix the image. Signed-off-by: Denis V. Lunev Reviewed-by: Alexander Ivanov --- tests/qemu-iotests/tests/parallels-checks | 36 +++++++++++++++++++ tests/qemu-iotests/tests/parallels-checks.out | 31 ++++++++++++++++ 2 files changed, 67 insertions(+) diff --git a/tests/qemu-iotests/tests/parallels-checks b/tests/qemu-iotests/tests/parallels-checks index df99558486..b281246a42 100755 --- a/tests/qemu-iotests/tests/parallels-checks +++ b/tests/qemu-iotests/tests/parallels-checks @@ -135,6 +135,42 @@ echo "== check the second cluster (deduplicated) ==" # Clear image _make_test_img $SIZE +echo "== TEST DUPLICATION SELF-CURE ==" + +echo "== write pattern to whole image ==" +{ $QEMU_IO -c "write -P 0x11 0 $SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== write another pattern to second cluster ==" +{ $QEMU_IO -c "write -P 0x55 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== check second cluster ==" +{ $QEMU_IO -r -c "read -P 0x55 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + + +echo "== corrupt image ==" +poke_file "$TEST_IMG" "$(($BAT_OFFSET + 4))" "\x01\x00\x00\x00" + +echo "== check second cluster ==" +{ $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== check the first cluster with self-repair ==" +{ $QEMU_IO -c "read -P 0x11 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== check second cluster ==" +{ $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== write another pattern to the first clusters ==" +{ $QEMU_IO -c "write -P 0x66 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== check the first cluster ==" +{ $QEMU_IO -r -c "read -P 0x66 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== check the second cluster (deduplicated) ==" +{ $QEMU_IO -r -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +# Clear image +_make_test_img $SIZE + echo "== TEST DATA_OFF CHECK ==" echo "== write pattern to first cluster ==" diff --git a/tests/qemu-iotests/tests/parallels-checks.out b/tests/qemu-iotests/tests/parallels-checks.out index 1325d2b611..9793423111 100644 --- a/tests/qemu-iotests/tests/parallels-checks.out +++ b/tests/qemu-iotests/tests/parallels-checks.out @@ -71,6 +71,37 @@ read 1048576/1048576 bytes at offset 0 read 1048576/1048576 bytes at offset 1048576 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304 +== TEST DUPLICATION SELF-CURE == +== write pattern to whole image == +wrote 4194304/4194304 bytes at offset 0 +4 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== write another pattern to second cluster == +wrote 1048576/1048576 bytes at offset 1048576 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check second cluster == +read 1048576/1048576 bytes at offset 1048576 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== corrupt image == +== check second cluster == +read 1048576/1048576 bytes at offset 1048576 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check the first cluster with self-repair == +Repairing duplicate offset in BAT entry 1 +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check second cluster == +read 1048576/1048576 bytes at offset 1048576 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== write another pattern to the first clusters == +wrote 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check the first cluster == +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check the second cluster (deduplicated) == +read 1048576/1048576 bytes at offset 1048576 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=4194304 == TEST DATA_OFF CHECK == == write pattern to first cluster == wrote 1048576/1048576 bytes at offset 0 From 73194d3f54d04088f1821f98280da5eb6fb0a4c3 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Mon, 18 Sep 2023 20:00:53 +0200 Subject: [PATCH 15/22] parallels: accept multiple clusters in mark_used() This would be useful in the next patch in allocate_clusters(). This change would not imply serious performance drawbacks as usually image is full of data or are at the end of the bitmap. Signed-off-by: Denis V. Lunev Reviewed-by: Alexander Ivanov --- block/parallels.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index a997880c34..3df73aa8a0 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -178,18 +178,20 @@ static void parallels_set_bat_entry(BDRVParallelsState *s, bitmap_set(s->bat_dirty_bmap, bat_entry_off(index) / s->bat_dirty_block, 1); } -static int mark_used(BlockDriverState *bs, - unsigned long *bitmap, uint32_t bitmap_size, int64_t off) +static int mark_used(BlockDriverState *bs, unsigned long *bitmap, + uint32_t bitmap_size, int64_t off, uint32_t count) { BDRVParallelsState *s = bs->opaque; uint32_t cluster_index = host_cluster_index(s, off); - if (cluster_index >= bitmap_size) { + unsigned long next_used; + if (cluster_index + count > bitmap_size) { return -E2BIG; } - if (test_bit(cluster_index, bitmap)) { + next_used = find_next_bit(bitmap, bitmap_size, cluster_index); + if (next_used < cluster_index + count) { return -EBUSY; } - bitmap_set(bitmap, cluster_index, 1); + bitmap_set(bitmap, cluster_index, count); return 0; } @@ -230,7 +232,7 @@ static int parallels_fill_used_bitmap(BlockDriverState *bs) continue; } - err2 = mark_used(bs, s->used_bmap, s->used_bmap_size, host_off); + err2 = mark_used(bs, s->used_bmap, s->used_bmap_size, host_off, 1); if (err2 < 0 && err == 0) { err = err2; } @@ -732,7 +734,7 @@ parallels_check_duplicate(BlockDriverState *bs, BdrvCheckResult *res, continue; } - ret = mark_used(bs, bitmap, bitmap_size, host_off); + ret = mark_used(bs, bitmap, bitmap_size, host_off, 1); assert(ret != -E2BIG); if (ret == 0) { continue; @@ -792,7 +794,7 @@ parallels_check_duplicate(BlockDriverState *bs, BdrvCheckResult *res, * considered, and the bitmap size doesn't change. This specifically * means that -E2BIG is OK. */ - ret = mark_used(bs, bitmap, bitmap_size, host_off); + ret = mark_used(bs, bitmap, bitmap_size, host_off, 1); if (ret == -EBUSY) { res->check_errors++; goto out_repair_bat; From eeb1e6dc49e7a550c8f1d843a707b93e11cabf8b Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Mon, 18 Sep 2023 20:00:54 +0200 Subject: [PATCH 16/22] parallels: update used bitmap in allocate_cluster We should extend the bitmap if the file is extended and set the bit in the image used bitmap once the cluster is allocated. Sanity check at that moment also looks like a good idea. Signed-off-by: Denis V. Lunev Reviewed-by: Alexander Ivanov --- block/parallels.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/block/parallels.c b/block/parallels.c index 3df73aa8a0..ec35237119 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -282,6 +282,8 @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, return len; } if (s->data_end + space > (len >> BDRV_SECTOR_BITS)) { + uint32_t new_usedsize; + space += s->prealloc_size; /* * We require the expanded size to read back as zero. If the @@ -305,6 +307,12 @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, if (ret < 0) { return ret; } + + new_usedsize = s->used_bmap_size + + (space << BDRV_SECTOR_BITS) / s->cluster_size; + s->used_bmap = bitmap_zero_extend(s->used_bmap, s->used_bmap_size, + new_usedsize); + s->used_bmap_size = new_usedsize; } /* @@ -336,6 +344,12 @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, } } + ret = mark_used(bs, s->used_bmap, s->used_bmap_size, + s->data_end << BDRV_SECTOR_BITS, to_allocate); + if (ret < 0) { + /* Image consistency is broken. Alarm! */ + return ret; + } for (i = 0; i < to_allocate; i++) { parallels_set_bat_entry(s, idx + i, s->data_end / s->off_multiplier); s->data_end += s->tracks; From 3ac10d19091f4e9375b66442e3edaab466d38617 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Mon, 18 Sep 2023 20:00:55 +0200 Subject: [PATCH 17/22] parallels: naive implementation of allocate_clusters with used bitmap The access to the bitmap is not optimized completely. Signed-off-by: Denis V. Lunev Reviewed-by: Alexander Ivanov --- block/parallels.c | 51 ++++++++++++++++++++++++++++++++++++----------- 1 file changed, 39 insertions(+), 12 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index ec35237119..ebcdff8736 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -253,7 +253,7 @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, { int ret = 0; BDRVParallelsState *s = bs->opaque; - int64_t pos, space, idx, to_allocate, i, len; + int64_t i, pos, idx, to_allocate, first_free, host_off; pos = block_status(s, sector_num, nb_sectors, pnum); if (pos > 0) { @@ -276,15 +276,13 @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, */ assert(idx < s->bat_size && idx + to_allocate <= s->bat_size); - space = to_allocate * s->tracks; - len = bdrv_co_getlength(bs->file->bs); - if (len < 0) { - return len; - } - if (s->data_end + space > (len >> BDRV_SECTOR_BITS)) { + first_free = find_first_zero_bit(s->used_bmap, s->used_bmap_size); + if (first_free == s->used_bmap_size) { uint32_t new_usedsize; + int64_t space = to_allocate * s->tracks + s->prealloc_size; + + host_off = s->data_end * BDRV_SECTOR_SIZE; - space += s->prealloc_size; /* * We require the expanded size to read back as zero. If the * user permitted truncation, we try that; but if it fails, we @@ -313,6 +311,32 @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, s->used_bmap = bitmap_zero_extend(s->used_bmap, s->used_bmap_size, new_usedsize); s->used_bmap_size = new_usedsize; + } else { + int64_t next_used; + next_used = find_next_bit(s->used_bmap, s->used_bmap_size, first_free); + + /* Not enough continuous clusters in the middle, adjust the size */ + if (next_used - first_free < to_allocate) { + to_allocate = next_used - first_free; + *pnum = (idx + to_allocate) * s->tracks - sector_num; + } + + host_off = s->data_start * BDRV_SECTOR_SIZE; + host_off += first_free * s->cluster_size; + + /* + * No need to preallocate if we are using tail area from the above + * branch. In the other case we are likely re-using hole. Preallocate + * the space if required by the prealloc_mode. + */ + if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE && + host_off < s->data_end * BDRV_SECTOR_SIZE) { + ret = bdrv_co_pwrite_zeroes(bs->file, host_off, + s->cluster_size * to_allocate, 0); + if (ret < 0) { + return ret; + } + } } /* @@ -344,15 +368,18 @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, } } - ret = mark_used(bs, s->used_bmap, s->used_bmap_size, - s->data_end << BDRV_SECTOR_BITS, to_allocate); + ret = mark_used(bs, s->used_bmap, s->used_bmap_size, host_off, to_allocate); if (ret < 0) { /* Image consistency is broken. Alarm! */ return ret; } for (i = 0; i < to_allocate; i++) { - parallels_set_bat_entry(s, idx + i, s->data_end / s->off_multiplier); - s->data_end += s->tracks; + parallels_set_bat_entry(s, idx + i, + host_off / BDRV_SECTOR_SIZE / s->off_multiplier); + host_off += s->cluster_size; + } + if (host_off > s->data_end * BDRV_SECTOR_SIZE) { + s->data_end = host_off / BDRV_SECTOR_SIZE; } return bat2sect(s, idx) + sector_num % s->tracks; From 66d201dd5a9077caaedee66261ea10ed01944477 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Mon, 18 Sep 2023 20:00:56 +0200 Subject: [PATCH 18/22] parallels: improve readability of allocate_clusters Replace 'space' representing the amount of data to preallocate with 'bytes'. Rationale: * 'space' at each place is converted to bytes * the unit is more close to the variable name Signed-off-by: Denis V. Lunev Reviewed-by: Alexander Ivanov --- block/parallels.c | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index ebcdff8736..a97fb8b506 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -279,7 +279,8 @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, first_free = find_first_zero_bit(s->used_bmap, s->used_bmap_size); if (first_free == s->used_bmap_size) { uint32_t new_usedsize; - int64_t space = to_allocate * s->tracks + s->prealloc_size; + int64_t bytes = to_allocate * s->cluster_size; + bytes += s->prealloc_size * BDRV_SECTOR_SIZE; host_off = s->data_end * BDRV_SECTOR_SIZE; @@ -289,8 +290,7 @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, * force the safer-but-slower fallocate. */ if (s->prealloc_mode == PRL_PREALLOC_MODE_TRUNCATE) { - ret = bdrv_co_truncate(bs->file, - (s->data_end + space) << BDRV_SECTOR_BITS, + ret = bdrv_co_truncate(bs->file, host_off + bytes, false, PREALLOC_MODE_OFF, BDRV_REQ_ZERO_WRITE, NULL); if (ret == -ENOTSUP) { @@ -298,16 +298,13 @@ allocate_clusters(BlockDriverState *bs, int64_t sector_num, } } if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) { - ret = bdrv_co_pwrite_zeroes(bs->file, - s->data_end << BDRV_SECTOR_BITS, - space << BDRV_SECTOR_BITS, 0); + ret = bdrv_co_pwrite_zeroes(bs->file, host_off, bytes, 0); } if (ret < 0) { return ret; } - new_usedsize = s->used_bmap_size + - (space << BDRV_SECTOR_BITS) / s->cluster_size; + new_usedsize = s->used_bmap_size + bytes / s->cluster_size; s->used_bmap = bitmap_zero_extend(s->used_bmap, s->used_bmap_size, new_usedsize); s->used_bmap_size = new_usedsize; From c54fb435072376ec9a6ddd4fed56543a4a735e82 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Mon, 18 Sep 2023 20:00:57 +0200 Subject: [PATCH 19/22] parallels: naive implementation of parallels_co_pdiscard * Discarding with backing stores is not supported by the format. * There is no buffering/queueing of the discard operation. * Only operations aligned to the cluster are supported. Signed-off-by: Denis V. Lunev Reviewed-by: Alexander Ivanov --- block/parallels.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) diff --git a/block/parallels.c b/block/parallels.c index a97fb8b506..1808029f14 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -537,6 +537,51 @@ parallels_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, return ret; } + +static int coroutine_fn GRAPH_RDLOCK +parallels_co_pdiscard(BlockDriverState *bs, int64_t offset, int64_t bytes) +{ + int ret = 0; + uint32_t cluster, count; + BDRVParallelsState *s = bs->opaque; + + /* + * The image does not support ZERO mark inside the BAT, which means that + * stale data could be exposed from the backing file. + */ + if (bs->backing) { + return -ENOTSUP; + } + + if (!QEMU_IS_ALIGNED(offset, s->cluster_size)) { + return -ENOTSUP; + } else if (!QEMU_IS_ALIGNED(bytes, s->cluster_size)) { + return -ENOTSUP; + } + + cluster = offset / s->cluster_size; + count = bytes / s->cluster_size; + + qemu_co_mutex_lock(&s->lock); + for (; count > 0; cluster++, count--) { + int64_t host_off = bat2sect(s, cluster) << BDRV_SECTOR_BITS; + if (host_off == 0) { + continue; + } + + ret = bdrv_co_pdiscard(bs->file, host_off, s->cluster_size); + if (ret < 0) { + goto done; + } + + parallels_set_bat_entry(s, cluster, 0); + bitmap_clear(s->used_bmap, host_cluster_index(s, host_off), 1); + } +done: + qemu_co_mutex_unlock(&s->lock); + return ret; +} + static void parallels_check_unclean(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) @@ -1417,6 +1462,7 @@ static BlockDriver bdrv_parallels = { .bdrv_co_create = parallels_co_create, .bdrv_co_create_opts = parallels_co_create_opts, .bdrv_co_check = parallels_co_check, + .bdrv_co_pdiscard = parallels_co_pdiscard, }; static void bdrv_parallels_init(void) From 4248e34be9c16f93b4720d455977f782615a4d63 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Mon, 18 Sep 2023 20:00:58 +0200 Subject: [PATCH 20/22] tests: extend test 131 to cover availability of the discard operation This patch contains test which minimally tests discard and new cluster allocation logic. The following checks are added: * write 2 clusters, discard the first allocated * write another cluster, check that the hole is filled * write 2 clusters, discard the first allocated, write 1 cluster at non-aligned to cluster offset (2 new clusters should be allocated) Signed-off-by: Denis V. Lunev Reviewed-by: Alexander Ivanov --- tests/qemu-iotests/131 | 31 +++++++++++++++++++++++++++++++ tests/qemu-iotests/131.out | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/tests/qemu-iotests/131 b/tests/qemu-iotests/131 index 304bbb3f61..324008b3f6 100755 --- a/tests/qemu-iotests/131 +++ b/tests/qemu-iotests/131 @@ -74,6 +74,37 @@ poke_file "$TEST_IMG" "$inuse_offset" "\x59\x6e\x6f\x74" echo "== read corrupted image with repairing ==" { $QEMU_IO -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +echo "== check discard ==" + +# Clear image +_make_test_img $size + +{ $QEMU_IO -c "write -P 0x11 0 $CLUSTER_DBL_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IMG map "$TEST_IMG"; } 2>&1 | _filter_qemu_img_map +{ $QEMU_IO -c "discard 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IMG map "$TEST_IMG"; } 2>&1 | _filter_qemu_img_map +{ $QEMU_IO -c "read -P 0 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== check simple allocation over the discarded hole ==" + +{ $QEMU_IO -c "write -P 0x11 $CLUSTER_DBL_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IMG map "$TEST_IMG"; } 2>&1 | _filter_qemu_img_map +{ $QEMU_IO -c "read -P 0x11 $CLUSTER_DBL_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== check more complex allocation over the discard hole ==" + +# Clear image +_make_test_img $size + +{ $QEMU_IO -c "write -P 0x11 $CLUSTER_DBL_SIZE $CLUSTER_DBL_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "discard $CLUSTER_DBL_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +# There is 1 cluster hole. Fill it fully and allocate 1 cluster at the end +{ $QEMU_IO -c "write -P 0x12 $CLUSTER_HALF_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IMG map "$TEST_IMG"; } 2>&1 | _filter_qemu_img_map +{ $QEMU_IO -c "read -P 0x12 $CLUSTER_HALF_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "read -P 0 0 $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "read -P 0 $((CLUSTER_SIZE + CLUSTER_HALF_SIZE)) $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + echo "== allocate with backing ==" # Verify that allocating clusters works fine even when there is a backing image. # Regression test for a bug where we would pass a buffer read from the backing diff --git a/tests/qemu-iotests/131.out b/tests/qemu-iotests/131.out index d2904578df..27df91ca97 100644 --- a/tests/qemu-iotests/131.out +++ b/tests/qemu-iotests/131.out @@ -26,6 +26,44 @@ read 524288/524288 bytes at offset 0 Repairing image was not closed correctly read 1048576/1048576 bytes at offset 1048576 1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check discard == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +wrote 2097152/2097152 bytes at offset 0 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Offset Length File +0 0x200000 TEST_DIR/t.IMGFMT +discard 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Offset Length File +0x100000 0x100000 TEST_DIR/t.IMGFMT +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check simple allocation over the discarded hole == +wrote 1048576/1048576 bytes at offset 2097152 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Offset Length File +0x100000 0x100000 TEST_DIR/t.IMGFMT +0x200000 0x100000 TEST_DIR/t.IMGFMT +read 1048576/1048576 bytes at offset 2097152 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check more complex allocation over the discard hole == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +wrote 2097152/2097152 bytes at offset 2097152 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +discard 1048576/1048576 bytes at offset 2097152 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 1048576/1048576 bytes at offset 524288 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Offset Length File +0 0x100000 TEST_DIR/t.IMGFMT +0x100000 0x100000 TEST_DIR/t.IMGFMT +0x300000 0x100000 TEST_DIR/t.IMGFMT +read 1048576/1048576 bytes at offset 524288 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 524288/524288 bytes at offset 0 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 524288/524288 bytes at offset 1572864 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) == allocate with backing == Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864 From a98b260a525b85be929de76affc06370c4ecbc52 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Mon, 18 Sep 2023 20:00:59 +0200 Subject: [PATCH 21/22] parallels: naive implementation of parallels_co_pwrite_zeroes The zero flag is missed in the Parallels format specification. We can resort to discard if we have no backing file. Signed-off-by: Denis V. Lunev Reviewed-by: Alexander Ivanov --- block/parallels.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/block/parallels.c b/block/parallels.c index 1808029f14..d026ce9e2f 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -582,6 +582,19 @@ done: return ret; } +static int coroutine_fn GRAPH_RDLOCK +parallels_co_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int64_t bytes, + BdrvRequestFlags flags) +{ + /* + * The zero flag is missed in the Parallels format specification. We can + * resort to discard if we have no backing file (this condition is checked + * inside parallels_co_pdiscard(). + */ + return parallels_co_pdiscard(bs, offset, bytes); +} + + static void parallels_check_unclean(BlockDriverState *bs, BdrvCheckResult *res, BdrvCheckMode fix) @@ -1463,6 +1476,7 @@ static BlockDriver bdrv_parallels = { .bdrv_co_create_opts = parallels_co_create_opts, .bdrv_co_check = parallels_co_check, .bdrv_co_pdiscard = parallels_co_pdiscard, + .bdrv_co_pwrite_zeroes = parallels_co_pwrite_zeroes, }; static void bdrv_parallels_init(void) From 1dba99e34d1bcb3c0de69c4270f9587b01e225fe Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Mon, 18 Sep 2023 20:01:00 +0200 Subject: [PATCH 22/22] tests: extend test 131 to cover availability of the write-zeroes This patch contains test which minimally tests write-zeroes on top of working discard. The following checks are added: * write 2 clusters, write-zero to the first allocated cluster * write 2 cluster, write-zero to the half the first allocated cluster Signed-off-by: Denis V. Lunev Reviewed-by: Alexander Ivanov --- tests/qemu-iotests/131 | 21 +++++++++++++++++++++ tests/qemu-iotests/131.out | 22 ++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/tests/qemu-iotests/131 b/tests/qemu-iotests/131 index 324008b3f6..3119100e78 100755 --- a/tests/qemu-iotests/131 +++ b/tests/qemu-iotests/131 @@ -105,6 +105,27 @@ _make_test_img $size { $QEMU_IO -c "read -P 0 0 $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir { $QEMU_IO -c "read -P 0 $((CLUSTER_SIZE + CLUSTER_HALF_SIZE)) $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +echo "== check write-zeroes ==" + +# Clear image +_make_test_img $size + +{ $QEMU_IO -c "write -P 0x11 0 $CLUSTER_DBL_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "write -z 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IMG map "$TEST_IMG"; } 2>&1 | _filter_qemu_img_map +{ $QEMU_IO -c "read -P 0 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "read -P 0x11 $CLUSTER_SIZE $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +echo "== check cluster-partial write-zeroes ==" + +# Clear image +_make_test_img $size + +{ $QEMU_IO -c "write -P 0x11 0 $CLUSTER_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "write -z 0 $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "read -P 0 0 $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "read -P 0x11 $CLUSTER_HALF_SIZE $CLUSTER_HALF_SIZE" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + echo "== allocate with backing ==" # Verify that allocating clusters works fine even when there is a backing image. # Regression test for a bug where we would pass a buffer read from the backing diff --git a/tests/qemu-iotests/131.out b/tests/qemu-iotests/131.out index 27df91ca97..86a2d2a49b 100644 --- a/tests/qemu-iotests/131.out +++ b/tests/qemu-iotests/131.out @@ -64,6 +64,28 @@ read 524288/524288 bytes at offset 0 512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) read 524288/524288 bytes at offset 1572864 512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check write-zeroes == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +wrote 2097152/2097152 bytes at offset 0 +2 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +Offset Length File +0x100000 0x100000 TEST_DIR/t.IMGFMT +read 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1048576/1048576 bytes at offset 1048576 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check cluster-partial write-zeroes == +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +wrote 1048576/1048576 bytes at offset 0 +1 MiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 524288/524288 bytes at offset 0 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 524288/524288 bytes at offset 0 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 524288/524288 bytes at offset 524288 +512 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) == allocate with backing == Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 Formatting 'TEST_DIR/t.IMGFMT.base', fmt=IMGFMT size=67108864