From d134cf73b10e9d0283e1d2531299c8f9ab13b5eb Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 28 Apr 2015 10:46:34 +0300 Subject: [PATCH 01/38] iotests, parallels: quote TEST_IMG in 076 test to be path-safe suggested by Jeff Cody Signed-off-by: Denis V. Lunev Reviewed-by: Roman Kagan Reviewed-by: Stefan Hajnoczi Message-id: 1430207220-24458-2-git-send-email-den@openvz.org CC: Kevin Wolf Signed-off-by: Stefan Hajnoczi --- tests/qemu-iotests/076 | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/tests/qemu-iotests/076 b/tests/qemu-iotests/076 index ed2be3581e..0139976d29 100755 --- a/tests/qemu-iotests/076 +++ b/tests/qemu-iotests/076 @@ -49,31 +49,31 @@ nb_sectors_offset=$((0x24)) echo echo "== Read from a valid v1 image ==" _use_sample_img parallels-v1.bz2 -{ $QEMU_IO -c "read -P 0x11 0 64k" $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "read -P 0x11 0 64k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir echo echo "== Negative catalog size ==" _use_sample_img parallels-v1.bz2 poke_file "$TEST_IMG" "$catalog_entries_offset" "\xff\xff\xff\xff" -{ $QEMU_IO -c "read 0 512" $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "read 0 512" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir echo echo "== Overflow in catalog allocation ==" _use_sample_img parallels-v1.bz2 poke_file "$TEST_IMG" "$nb_sectors_offset" "\xff\xff\xff\xff" poke_file "$TEST_IMG" "$catalog_entries_offset" "\x01\x00\x00\x40" -{ $QEMU_IO -c "read 64M 64M" $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "read 64M 64M" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir echo echo "== Zero sectors per track ==" _use_sample_img parallels-v1.bz2 poke_file "$TEST_IMG" "$tracks_offset" "\x00\x00\x00\x00" -{ $QEMU_IO -c "read 0 512" $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "read 0 512" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir echo echo "== Read from a valid v2 image ==" _use_sample_img parallels-v2.bz2 -{ $QEMU_IO -c "read -P 0x11 0 64k" $TEST_IMG; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "read -P 0x11 0 64k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir # success, all done echo "*** done" From 0789890467d30e2ab10d84b5398bdc903db8cb91 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 28 Apr 2015 10:46:35 +0300 Subject: [PATCH 02/38] block/parallels: rename parallels_header to ParallelsHeader this follows QEMU coding convention Signed-off-by: Denis V. Lunev Reviewed-by: Roman Kagan Reviewed-by: Stefan Hajnoczi Message-id: 1430207220-24458-3-git-send-email-den@openvz.org CC: Kevin Wolf Signed-off-by: Stefan Hajnoczi --- block/parallels.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index 4f9cd8dd23..dca0df6f77 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -35,7 +35,7 @@ #define HEADER_SIZE 64 // always little-endian -struct parallels_header { +typedef struct ParallelsHeader { char magic[16]; // "WithoutFreeSpace" uint32_t version; uint32_t heads; @@ -46,7 +46,7 @@ struct parallels_header { uint32_t inuse; uint32_t data_off; char padding[12]; -} QEMU_PACKED; +} QEMU_PACKED ParallelsHeader; typedef struct BDRVParallelsState { CoMutex lock; @@ -61,7 +61,7 @@ typedef struct BDRVParallelsState { static int parallels_probe(const uint8_t *buf, int buf_size, const char *filename) { - const struct parallels_header *ph = (const void *)buf; + const ParallelsHeader *ph = (const void *)buf; if (buf_size < HEADER_SIZE) return 0; @@ -79,7 +79,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, { BDRVParallelsState *s = bs->opaque; int i; - struct parallels_header ph; + ParallelsHeader ph; int ret; bs->read_only = 1; // no write support yet From 2944256997dd8b080f8b0cc062e4b663cb2ec09c Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Tue, 28 Apr 2015 10:46:36 +0300 Subject: [PATCH 03/38] block/parallels: switch to bdrv_read Switch the .bdrv_read method implementation from using bdrv_pread() to bdrv_read() on the underlying file, since the latter is subject to i/o throttling while the former is not. Besides, since bdrv_read() operates in sectors rather than bytes, adjust the helper functions to do so too. Signed-off-by: Roman Kagan Reviewed-by: Denis V. Lunev Signed-off-by: Denis V. Lunev Message-id: 1430207220-24458-4-git-send-email-den@openvz.org CC: Kevin Wolf CC: Stefan Hajnoczi Signed-off-by: Stefan Hajnoczi --- block/parallels.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index dca0df6f77..baefd3eb9b 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -146,9 +146,8 @@ fail: return ret; } -static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num) +static int64_t seek_to_sector(BDRVParallelsState *s, int64_t sector_num) { - BDRVParallelsState *s = bs->opaque; uint32_t index, offset; index = sector_num / s->tracks; @@ -157,24 +156,27 @@ static int64_t seek_to_sector(BlockDriverState *bs, int64_t sector_num) /* not allocated */ if ((index >= s->catalog_size) || (s->catalog_bitmap[index] == 0)) return -1; - return - ((uint64_t)s->catalog_bitmap[index] * s->off_multiplier + offset) * 512; + return (uint64_t)s->catalog_bitmap[index] * s->off_multiplier + offset; } static int parallels_read(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, int nb_sectors) { + BDRVParallelsState *s = bs->opaque; + while (nb_sectors > 0) { - int64_t position = seek_to_sector(bs, sector_num); + int64_t position = seek_to_sector(s, sector_num); if (position >= 0) { - if (bdrv_pread(bs->file, position, buf, 512) != 512) - return -1; + int ret = bdrv_read(bs->file, position, buf, 1); + if (ret < 0) { + return ret; + } } else { - memset(buf, 0, 512); + memset(buf, 0, BDRV_SECTOR_SIZE); } nb_sectors--; sector_num++; - buf += 512; + buf += BDRV_SECTOR_SIZE; } return 0; } From 9de9da17d8304576e8402697bcee72c88ce499b8 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Tue, 28 Apr 2015 10:46:37 +0300 Subject: [PATCH 04/38] block/parallels: read up to cluster end in one go Teach parallels_read() to do reads in coarser granularity than just a single sector: if requested, read up to the cluster end in one go. Signed-off-by: Roman Kagan Reviewed-by: Denis V. Lunev Signed-off-by: Denis V. Lunev Reviewed-by: Stefan Hajnoczi Message-id: 1430207220-24458-5-git-send-email-den@openvz.org CC: Kevin Wolf Signed-off-by: Stefan Hajnoczi --- block/parallels.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index baefd3eb9b..8770c82351 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -159,6 +159,13 @@ static int64_t seek_to_sector(BDRVParallelsState *s, int64_t sector_num) return (uint64_t)s->catalog_bitmap[index] * s->off_multiplier + offset; } +static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num, + int nb_sectors) +{ + int ret = s->tracks - sector_num % s->tracks; + return MIN(nb_sectors, ret); +} + static int parallels_read(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, int nb_sectors) { @@ -166,17 +173,18 @@ static int parallels_read(BlockDriverState *bs, int64_t sector_num, while (nb_sectors > 0) { int64_t position = seek_to_sector(s, sector_num); + int n = cluster_remainder(s, sector_num, nb_sectors); if (position >= 0) { - int ret = bdrv_read(bs->file, position, buf, 1); + int ret = bdrv_read(bs->file, position, buf, n); if (ret < 0) { return ret; } } else { - memset(buf, 0, BDRV_SECTOR_SIZE); + memset(buf, 0, n << BDRV_SECTOR_BITS); } - nb_sectors--; - sector_num++; - buf += BDRV_SECTOR_SIZE; + nb_sectors -= n; + sector_num += n; + buf += n << BDRV_SECTOR_BITS; } return 0; } From dd3bed16ff229496b30cc77224b0c0ae645c4dae Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Tue, 28 Apr 2015 10:46:38 +0300 Subject: [PATCH 05/38] block/parallels: add get_block_status Implement VFS method for get_block_status to Parallels format driver. qemu_co_mutex_lock is not necessary yet (the driver is read-only) but will be necessary very soon when write will be supported. Signed-off-by: Roman Kagan Reviewed-by: Denis V. Lunev Signed-off-by: Denis V. Lunev Message-id: 1430207220-24458-6-git-send-email-den@openvz.org CC: Kevin Wolf CC: Stefan Hajnoczi Signed-off-by: Stefan Hajnoczi --- block/parallels.c | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/block/parallels.c b/block/parallels.c index 8770c82351..b469984ef6 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -166,6 +166,26 @@ static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num, return MIN(nb_sectors, ret); } +static int64_t coroutine_fn parallels_co_get_block_status(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, int *pnum) +{ + BDRVParallelsState *s = bs->opaque; + int64_t offset; + + qemu_co_mutex_lock(&s->lock); + offset = seek_to_sector(s, sector_num); + qemu_co_mutex_unlock(&s->lock); + + *pnum = cluster_remainder(s, sector_num, nb_sectors); + + if (offset < 0) { + return 0; + } + + return (offset << BDRV_SECTOR_BITS) | + BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; +} + static int parallels_read(BlockDriverState *bs, int64_t sector_num, uint8_t *buf, int nb_sectors) { @@ -213,6 +233,7 @@ static BlockDriver bdrv_parallels = { .bdrv_open = parallels_open, .bdrv_read = parallels_co_read, .bdrv_close = parallels_close, + .bdrv_co_get_block_status = parallels_co_get_block_status, }; static void bdrv_parallels_init(void) From 481fb9cf18925eab19e4af8a44bd86b82eb897ad Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 28 Apr 2015 10:46:39 +0300 Subject: [PATCH 06/38] block/parallels: provide _co_readv routine for parallels format driver Main approach is taken from qcow2_co_readv. The patch drops coroutine lock for the duration of IO operation and peforms normal scatter-gather IO using standard QEMU backend. The patch also adds comment about locking considerations in the driver. Signed-off-by: Denis V. Lunev Reviewed-by: Roman Kagan Signed-off-by: Roman Kagan Message-id: 1430207220-24458-7-git-send-email-den@openvz.org CC: Roman Kagan CC: Kevin Wolf CC: Stefan Hajnoczi Signed-off-by: Stefan Hajnoczi --- block/parallels.c | 56 ++++++++++++++++++++++++++++------------------- 1 file changed, 34 insertions(+), 22 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index b469984ef6..f3ffecec34 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -49,6 +49,10 @@ typedef struct ParallelsHeader { } QEMU_PACKED ParallelsHeader; typedef struct BDRVParallelsState { + /** Locking is conservative, the lock protects + * - image file extending (truncate, fallocate) + * - any access to block allocation table + */ CoMutex lock; uint32_t *catalog_bitmap; @@ -186,37 +190,45 @@ static int64_t coroutine_fn parallels_co_get_block_status(BlockDriverState *bs, BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; } -static int parallels_read(BlockDriverState *bs, int64_t sector_num, - uint8_t *buf, int nb_sectors) +static coroutine_fn int parallels_co_readv(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { BDRVParallelsState *s = bs->opaque; + uint64_t bytes_done = 0; + QEMUIOVector hd_qiov; + int ret = 0; + + qemu_iovec_init(&hd_qiov, qiov->niov); while (nb_sectors > 0) { - int64_t position = seek_to_sector(s, sector_num); - int n = cluster_remainder(s, sector_num, nb_sectors); - if (position >= 0) { - int ret = bdrv_read(bs->file, position, buf, n); - if (ret < 0) { - return ret; - } + int64_t position; + int n, nbytes; + + qemu_co_mutex_lock(&s->lock); + position = seek_to_sector(s, sector_num); + qemu_co_mutex_unlock(&s->lock); + + n = cluster_remainder(s, sector_num, nb_sectors); + nbytes = n << BDRV_SECTOR_BITS; + + if (position < 0) { + qemu_iovec_memset(qiov, bytes_done, 0, nbytes); } else { - memset(buf, 0, n << BDRV_SECTOR_BITS); + qemu_iovec_reset(&hd_qiov); + qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes); + + ret = bdrv_co_readv(bs->file, position, n, &hd_qiov); + if (ret < 0) { + break; + } } + nb_sectors -= n; sector_num += n; - buf += n << BDRV_SECTOR_BITS; + bytes_done += nbytes; } - return 0; -} -static coroutine_fn int parallels_co_read(BlockDriverState *bs, int64_t sector_num, - uint8_t *buf, int nb_sectors) -{ - int ret; - BDRVParallelsState *s = bs->opaque; - qemu_co_mutex_lock(&s->lock); - ret = parallels_read(bs, sector_num, buf, nb_sectors); - qemu_co_mutex_unlock(&s->lock); + qemu_iovec_destroy(&hd_qiov); return ret; } @@ -231,9 +243,9 @@ static BlockDriver bdrv_parallels = { .instance_size = sizeof(BDRVParallelsState), .bdrv_probe = parallels_probe, .bdrv_open = parallels_open, - .bdrv_read = parallels_co_read, .bdrv_close = parallels_close, .bdrv_co_get_block_status = parallels_co_get_block_status, + .bdrv_co_readv = parallels_co_readv, }; static void bdrv_parallels_init(void) From 912f31281a683a24b552a8cc6c293ab389b62013 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 28 Apr 2015 10:46:40 +0300 Subject: [PATCH 07/38] block/parallels: replace magic constants 4, 64 with proper sizeofs simple purification.. Signed-off-by: Denis V. Lunev Reviewed-by: Roman Kagan Reviewed-by: Stefan Hajnoczi Signed-off-by: Roman Kagan Message-id: 1430207220-24458-8-git-send-email-den@openvz.org CC: Kevin Wolf Signed-off-by: Stefan Hajnoczi --- block/parallels.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index f3ffecec34..138e61812c 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -32,7 +32,6 @@ #define HEADER_MAGIC "WithoutFreeSpace" #define HEADER_MAGIC2 "WithouFreSpacExt" #define HEADER_VERSION 2 -#define HEADER_SIZE 64 // always little-endian typedef struct ParallelsHeader { @@ -67,7 +66,7 @@ static int parallels_probe(const uint8_t *buf, int buf_size, const char *filenam { const ParallelsHeader *ph = (const void *)buf; - if (buf_size < HEADER_SIZE) + if (buf_size < sizeof(ParallelsHeader)) return 0; if ((!memcmp(ph->magic, HEADER_MAGIC, 16) || @@ -120,7 +119,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, } s->catalog_size = le32_to_cpu(ph.catalog_entries); - if (s->catalog_size > INT_MAX / 4) { + if (s->catalog_size > INT_MAX / sizeof(uint32_t)) { error_setg(errp, "Catalog too large"); ret = -EFBIG; goto fail; @@ -131,7 +130,8 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } - ret = bdrv_pread(bs->file, 64, s->catalog_bitmap, s->catalog_size * 4); + ret = bdrv_pread(bs->file, sizeof(ParallelsHeader), + s->catalog_bitmap, s->catalog_size * sizeof(uint32_t)); if (ret < 0) { goto fail; } From d0e61ce56d1520cade573eb344fdb136993d2279 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 28 Apr 2015 10:46:41 +0300 Subject: [PATCH 08/38] block/parallels: mark parallels format driver as zero inited From the guest point of view unallocated blocks are zeroed. Signed-off-by: Denis V. Lunev Reviewed-by: Roman Kagan Signed-off-by: Roman Kagan Message-id: 1430207220-24458-9-git-send-email-den@openvz.org CC: Roman Kagan CC: Kevin Wolf CC: Stefan Hajnoczi Signed-off-by: Stefan Hajnoczi --- block/parallels.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/parallels.c b/block/parallels.c index 138e61812c..ae64ce5d11 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -245,6 +245,7 @@ static BlockDriver bdrv_parallels = { .bdrv_open = parallels_open, .bdrv_close = parallels_close, .bdrv_co_get_block_status = parallels_co_get_block_status, + .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_co_readv = parallels_co_readv, }; From 5a41e1fa95f379e236883f38dacda292f6c48e6f Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 28 Apr 2015 10:46:42 +0300 Subject: [PATCH 09/38] block/parallels: _co_writev callback for Parallels format Support write on Parallels images. The code is almost the same as one in the previous patch implemented scatter-gather IO for read. Signed-off-by: Denis V. Lunev Reviewed-by: Roman Kagan Signed-off-by: Roman Kagan Message-id: 1430207220-24458-10-git-send-email-den@openvz.org CC: Roman Kagan CC: Kevin Wolf CC: Stefan Hajnoczi Signed-off-by: Stefan Hajnoczi --- block/parallels.c | 90 +++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 88 insertions(+), 2 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index ae64ce5d11..8d738033c3 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -60,6 +60,8 @@ typedef struct BDRVParallelsState { unsigned int tracks; unsigned int off_multiplier; + + bool has_truncate; } BDRVParallelsState; static int parallels_probe(const uint8_t *buf, int buf_size, const char *filename) @@ -85,8 +87,6 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, ParallelsHeader ph; int ret; - bs->read_only = 1; // no write support yet - ret = bdrv_pread(bs->file, 0, &ph, sizeof(ph)); if (ret < 0) { goto fail; @@ -139,6 +139,9 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, for (i = 0; i < s->catalog_size; i++) le32_to_cpus(&s->catalog_bitmap[i]); + s->has_truncate = bdrv_has_zero_init(bs->file) && + bdrv_truncate(bs->file, bdrv_getlength(bs->file)) == 0; + qemu_co_mutex_init(&s->lock); return 0; @@ -170,6 +173,46 @@ static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num, return MIN(nb_sectors, ret); } +static int64_t allocate_cluster(BlockDriverState *bs, int64_t sector_num) +{ + BDRVParallelsState *s = bs->opaque; + uint32_t idx, offset, tmp; + int64_t pos; + int ret; + + idx = sector_num / s->tracks; + offset = sector_num % s->tracks; + + if (idx >= s->catalog_size) { + return -EINVAL; + } + if (s->catalog_bitmap[idx] != 0) { + return (uint64_t)s->catalog_bitmap[idx] * s->off_multiplier + offset; + } + + pos = bdrv_getlength(bs->file) >> BDRV_SECTOR_BITS; + if (s->has_truncate) { + ret = bdrv_truncate(bs->file, (pos + s->tracks) << BDRV_SECTOR_BITS); + } else { + ret = bdrv_write_zeroes(bs->file, pos, s->tracks, 0); + } + if (ret < 0) { + return ret; + } + + s->catalog_bitmap[idx] = pos / s->off_multiplier; + + tmp = cpu_to_le32(s->catalog_bitmap[idx]); + + ret = bdrv_pwrite(bs->file, + sizeof(ParallelsHeader) + idx * sizeof(tmp), &tmp, sizeof(tmp)); + if (ret < 0) { + s->catalog_bitmap[idx] = 0; + return ret; + } + return (uint64_t)s->catalog_bitmap[idx] * s->off_multiplier + offset; +} + static int64_t coroutine_fn parallels_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) { @@ -190,6 +233,48 @@ static int64_t coroutine_fn parallels_co_get_block_status(BlockDriverState *bs, BDRV_BLOCK_DATA | BDRV_BLOCK_OFFSET_VALID; } +static coroutine_fn int parallels_co_writev(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) +{ + BDRVParallelsState *s = bs->opaque; + uint64_t bytes_done = 0; + QEMUIOVector hd_qiov; + int ret = 0; + + qemu_iovec_init(&hd_qiov, qiov->niov); + + while (nb_sectors > 0) { + int64_t position; + int n, nbytes; + + qemu_co_mutex_lock(&s->lock); + position = allocate_cluster(bs, sector_num); + qemu_co_mutex_unlock(&s->lock); + if (position < 0) { + ret = (int)position; + break; + } + + n = cluster_remainder(s, sector_num, nb_sectors); + nbytes = n << BDRV_SECTOR_BITS; + + qemu_iovec_reset(&hd_qiov); + qemu_iovec_concat(&hd_qiov, qiov, bytes_done, nbytes); + + ret = bdrv_co_writev(bs->file, position, n, &hd_qiov); + if (ret < 0) { + break; + } + + nb_sectors -= n; + sector_num += n; + bytes_done += nbytes; + } + + qemu_iovec_destroy(&hd_qiov); + return ret; +} + static coroutine_fn int parallels_co_readv(BlockDriverState *bs, int64_t sector_num, int nb_sectors, QEMUIOVector *qiov) { @@ -247,6 +332,7 @@ static BlockDriver bdrv_parallels = { .bdrv_co_get_block_status = parallels_co_get_block_status, .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_co_readv = parallels_co_readv, + .bdrv_co_writev = parallels_co_writev, }; static void bdrv_parallels_init(void) From 50ffd8fd3cfceede87cec1f7f9a04cd7b9147271 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 28 Apr 2015 10:46:43 +0300 Subject: [PATCH 10/38] iotests, parallels: test for write into Parallels image Signed-off-by: Denis V. Lunev Reviewed-by: Roman Kagan Reviewed-by: Stefan Hajnoczi Signed-off-by: Roman Kagan Message-id: 1430207220-24458-11-git-send-email-den@openvz.org CC: Kevin Wolf Signed-off-by: Stefan Hajnoczi --- tests/qemu-iotests/076 | 5 +++++ tests/qemu-iotests/076.out | 10 ++++++++++ 2 files changed, 15 insertions(+) diff --git a/tests/qemu-iotests/076 b/tests/qemu-iotests/076 index 0139976d29..c9b55a9801 100755 --- a/tests/qemu-iotests/076 +++ b/tests/qemu-iotests/076 @@ -74,6 +74,11 @@ echo echo "== Read from a valid v2 image ==" _use_sample_img parallels-v2.bz2 { $QEMU_IO -c "read -P 0x11 0 64k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "write -P 0x21 1024k 1k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "write -P 0x22 1025k 1k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "read -P 0x21 1024k 1k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "read -P 0x22 1025k 1k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +{ $QEMU_IO -c "read -P 0 1026k 62k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir # success, all done echo "*** done" diff --git a/tests/qemu-iotests/076.out b/tests/qemu-iotests/076.out index 32ade08565..b0000aeedd 100644 --- a/tests/qemu-iotests/076.out +++ b/tests/qemu-iotests/076.out @@ -19,4 +19,14 @@ no file open, try 'help open' == Read from a valid v2 image == read 65536/65536 bytes at offset 0 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 1024/1024 bytes at offset 1048576 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 1024/1024 bytes at offset 1049600 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1024/1024 bytes at offset 1048576 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 1024/1024 bytes at offset 1049600 +1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 63488/63488 bytes at offset 1050624 +62 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) *** done From 74cf6c5026fef6e327f09786445f626df02cbdf0 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 28 Apr 2015 10:46:44 +0300 Subject: [PATCH 11/38] block/parallels: support parallels image creation Do not even care to create WithoutFreeSpace image, it is obsolete. Always create WithouFreSpacExt one. The code also does not spend a lot of efforts to fill cylinders and heads fields, they are not used actually in a real life neither in QEMU nor in Parallels products. Signed-off-by: Denis V. Lunev Reviewed-by: Roman Kagan Reviewed-by: Stefan Hajnoczi Signed-off-by: Roman Kagan Message-id: 1430207220-24458-12-git-send-email-den@openvz.org CC: Kevin Wolf Signed-off-by: Stefan Hajnoczi --- block/parallels.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) diff --git a/block/parallels.c b/block/parallels.c index 8d738033c3..15f6cb32ad 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -33,6 +33,9 @@ #define HEADER_MAGIC2 "WithouFreSpacExt" #define HEADER_VERSION 2 +#define DEFAULT_CLUSTER_SIZE 1048576 /* 1 MiB */ + + // always little-endian typedef struct ParallelsHeader { char magic[16]; // "WithoutFreeSpace" @@ -317,12 +320,103 @@ static coroutine_fn int parallels_co_readv(BlockDriverState *bs, return ret; } +static int parallels_create(const char *filename, QemuOpts *opts, Error **errp) +{ + int64_t total_size, cl_size; + uint8_t tmp[BDRV_SECTOR_SIZE]; + Error *local_err = NULL; + BlockDriverState *file; + uint32_t cat_entries, cat_sectors; + ParallelsHeader header; + int ret; + + total_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_SIZE, 0), + BDRV_SECTOR_SIZE); + cl_size = ROUND_UP(qemu_opt_get_size_del(opts, BLOCK_OPT_CLUSTER_SIZE, + DEFAULT_CLUSTER_SIZE), BDRV_SECTOR_SIZE); + + ret = bdrv_create_file(filename, opts, &local_err); + if (ret < 0) { + error_propagate(errp, local_err); + return ret; + } + + file = NULL; + ret = bdrv_open(&file, filename, NULL, NULL, + BDRV_O_RDWR | BDRV_O_PROTOCOL, NULL, &local_err); + if (ret < 0) { + error_propagate(errp, local_err); + return ret; + } + ret = bdrv_truncate(file, 0); + if (ret < 0) { + goto exit; + } + + cat_entries = DIV_ROUND_UP(total_size, cl_size); + cat_sectors = DIV_ROUND_UP(cat_entries * sizeof(uint32_t) + + sizeof(ParallelsHeader), cl_size); + cat_sectors = (cat_sectors * cl_size) >> BDRV_SECTOR_BITS; + + memset(&header, 0, sizeof(header)); + memcpy(header.magic, HEADER_MAGIC2, sizeof(header.magic)); + header.version = cpu_to_le32(HEADER_VERSION); + /* don't care much about geometry, it is not used on image level */ + header.heads = cpu_to_le32(16); + header.cylinders = cpu_to_le32(total_size / BDRV_SECTOR_SIZE / 16 / 32); + header.tracks = cpu_to_le32(cl_size >> BDRV_SECTOR_BITS); + header.catalog_entries = cpu_to_le32(cat_entries); + header.nb_sectors = cpu_to_le64(DIV_ROUND_UP(total_size, BDRV_SECTOR_SIZE)); + header.data_off = cpu_to_le32(cat_sectors); + + /* write all the data */ + memset(tmp, 0, sizeof(tmp)); + memcpy(tmp, &header, sizeof(header)); + + ret = bdrv_pwrite(file, 0, tmp, BDRV_SECTOR_SIZE); + if (ret < 0) { + goto exit; + } + ret = bdrv_write_zeroes(file, 1, cat_sectors - 1, 0); + if (ret < 0) { + goto exit; + } + ret = 0; + +done: + bdrv_unref(file); + return ret; + +exit: + error_setg_errno(errp, -ret, "Failed to create Parallels image"); + goto done; +} + static void parallels_close(BlockDriverState *bs) { BDRVParallelsState *s = bs->opaque; g_free(s->catalog_bitmap); } +static QemuOptsList parallels_create_opts = { + .name = "parallels-create-opts", + .head = QTAILQ_HEAD_INITIALIZER(parallels_create_opts.head), + .desc = { + { + .name = BLOCK_OPT_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Virtual disk size", + }, + { + .name = BLOCK_OPT_CLUSTER_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Parallels image cluster size", + .def_value_str = stringify(DEFAULT_CLUSTER_SIZE), + }, + { /* end of list */ } + } +}; + static BlockDriver bdrv_parallels = { .format_name = "parallels", .instance_size = sizeof(BDRVParallelsState), @@ -333,6 +427,9 @@ static BlockDriver bdrv_parallels = { .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_co_readv = parallels_co_readv, .bdrv_co_writev = parallels_co_writev, + + .bdrv_create = parallels_create, + .create_opts = ¶llels_create_opts, }; static void bdrv_parallels_init(void) From ca9c4e0675f9cb98138e1069605114f45746d985 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 28 Apr 2015 10:46:45 +0300 Subject: [PATCH 12/38] iotests, parallels: test for newly created parallels image via qemu-img Signed-off-by: Denis V. Lunev Reviewed-by: Roman Kagan Reviewed-by: Stefan Hajnoczi Signed-off-by: Roman Kagan Message-id: 1430207220-24458-13-git-send-email-den@openvz.org CC: Kevin Wolf Signed-off-by: Stefan Hajnoczi --- tests/qemu-iotests/131 | 68 ++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/131.out | 24 ++++++++++++++ tests/qemu-iotests/group | 1 + 3 files changed, 93 insertions(+) create mode 100755 tests/qemu-iotests/131 create mode 100644 tests/qemu-iotests/131.out diff --git a/tests/qemu-iotests/131 b/tests/qemu-iotests/131 new file mode 100755 index 0000000000..f45afa72d6 --- /dev/null +++ b/tests/qemu-iotests/131 @@ -0,0 +1,68 @@ +#!/bin/bash +# +# parallels format validation tests (created by QEMU) +# +# Copyright (C) 2014 Denis V. Lunev +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +# creator +owner=den@openvz.org + +seq=`basename $0` +echo "QA output created by $seq" + +here=`pwd` +tmp=/tmp/$$ +status=1 # failure is the default! + +_cleanup() +{ + _cleanup_test_img +} +trap "_cleanup; exit \$status" 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common.rc +. ./common.filter + +_supported_fmt parallels +_supported_proto file +_supported_os Linux + +size=64M +CLUSTER_SIZE=64k +IMGFMT=parallels +_make_test_img $size + +echo == read empty image == +{ $QEMU_IO -c "read -P 0 32k 64k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +echo == write more than 1 block in a row == +{ $QEMU_IO -c "write -P 0x11 32k 128k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +echo == read less than block == +{ $QEMU_IO -c "read -P 0x11 32k 32k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +echo == read exactly 1 block == +{ $QEMU_IO -c "read -P 0x11 64k 64k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +echo == read more than 1 block == +{ $QEMU_IO -c "read -P 0x11 32k 128k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +echo == check that there is no trash after written == +{ $QEMU_IO -c "read -P 0 160k 32k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +echo == check that there is no trash before written == +{ $QEMU_IO -c "read -P 0 0 32k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + +# success, all done +echo "*** done" +rm -f $seq.full +status=0 diff --git a/tests/qemu-iotests/131.out b/tests/qemu-iotests/131.out new file mode 100644 index 0000000000..4158a2f520 --- /dev/null +++ b/tests/qemu-iotests/131.out @@ -0,0 +1,24 @@ +QA output created by 131 +Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 +== read empty image == +read 65536/65536 bytes at offset 32768 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== write more than 1 block in a row == +wrote 131072/131072 bytes at offset 32768 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== read less than block == +read 32768/32768 bytes at offset 32768 +32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== read exactly 1 block == +read 65536/65536 bytes at offset 65536 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== read more than 1 block == +read 131072/131072 bytes at offset 32768 +128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check that there is no trash after written == +read 32768/32768 bytes at offset 163840 +32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== check that there is no trash before written == +read 32768/32768 bytes at offset 0 +32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +*** done diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index 6ca3466ec5..34b16cb81a 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -128,3 +128,4 @@ 128 rw auto quick 129 rw auto quick 130 rw auto quick +131 rw auto quick From cc5690f20fcc075940a213380b362ae2054c03ba Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 28 Apr 2015 10:46:46 +0300 Subject: [PATCH 13/38] parallels: change copyright information in the image header Signed-off-by: Denis V. Lunev Reviewed-by: Roman Kagan Reviewed-by: Stefan Hajnoczi Signed-off-by: Roman Kagan Message-id: 1430207220-24458-14-git-send-email-den@openvz.org CC: Kevin Wolf Signed-off-by: Stefan Hajnoczi --- block/parallels.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/block/parallels.c b/block/parallels.c index 15f6cb32ad..f615f4420c 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -2,8 +2,12 @@ * Block driver for Parallels disk image format * * Copyright (c) 2007 Alex Beregszaszi + * Copyright (c) 2015 Denis V. Lunev * - * This code is based on comparing different disk images created by Parallels. + * This code was originally based on comparing different disk images created + * by Parallels. Currently it is based on opened OpenVZ sources + * available at + * http://git.openvz.org/?p=ploop;a=summary * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to deal From 369f7de9d57e4dd2f312255fc12271d5749c0a4e Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 28 Apr 2015 10:46:47 +0300 Subject: [PATCH 14/38] block/parallels: rename catalog_ names to bat_ BAT means 'block allocation table'. Thus this name is clean and shorter on writing. Some obvious formatting fixes in the old code were made to make checkpatch happy. Signed-off-by: Denis V. Lunev Reviewed-by: Roman Kagan Reviewed-by: Stefan Hajnoczi Signed-off-by: Roman Kagan Message-id: 1430207220-24458-15-git-send-email-den@openvz.org CC: Kevin Wolf Signed-off-by: Stefan Hajnoczi --- block/parallels.c | 58 ++++++++++++++++++++++++----------------------- 1 file changed, 30 insertions(+), 28 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index f615f4420c..16fbdf4442 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -47,7 +47,7 @@ typedef struct ParallelsHeader { uint32_t heads; uint32_t cylinders; uint32_t tracks; - uint32_t catalog_entries; + uint32_t bat_entries; uint64_t nb_sectors; uint32_t inuse; uint32_t data_off; @@ -61,8 +61,8 @@ typedef struct BDRVParallelsState { */ CoMutex lock; - uint32_t *catalog_bitmap; - unsigned int catalog_size; + uint32_t *bat_bitmap; + unsigned int bat_size; unsigned int tracks; @@ -125,26 +125,27 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } - s->catalog_size = le32_to_cpu(ph.catalog_entries); - if (s->catalog_size > INT_MAX / sizeof(uint32_t)) { + s->bat_size = le32_to_cpu(ph.bat_entries); + if (s->bat_size > INT_MAX / sizeof(uint32_t)) { error_setg(errp, "Catalog too large"); ret = -EFBIG; goto fail; } - s->catalog_bitmap = g_try_new(uint32_t, s->catalog_size); - if (s->catalog_size && s->catalog_bitmap == NULL) { + s->bat_bitmap = g_try_new(uint32_t, s->bat_size); + if (s->bat_size && s->bat_bitmap == NULL) { ret = -ENOMEM; goto fail; } ret = bdrv_pread(bs->file, sizeof(ParallelsHeader), - s->catalog_bitmap, s->catalog_size * sizeof(uint32_t)); + s->bat_bitmap, s->bat_size * sizeof(uint32_t)); if (ret < 0) { goto fail; } - for (i = 0; i < s->catalog_size; i++) - le32_to_cpus(&s->catalog_bitmap[i]); + for (i = 0; i < s->bat_size; i++) { + le32_to_cpus(&s->bat_bitmap[i]); + } s->has_truncate = bdrv_has_zero_init(bs->file) && bdrv_truncate(bs->file, bdrv_getlength(bs->file)) == 0; @@ -156,7 +157,7 @@ fail_format: error_setg(errp, "Image not in Parallels format"); ret = -EINVAL; fail: - g_free(s->catalog_bitmap); + g_free(s->bat_bitmap); return ret; } @@ -168,9 +169,10 @@ static int64_t seek_to_sector(BDRVParallelsState *s, int64_t sector_num) offset = sector_num % s->tracks; /* not allocated */ - if ((index >= s->catalog_size) || (s->catalog_bitmap[index] == 0)) + if ((index >= s->bat_size) || (s->bat_bitmap[index] == 0)) { return -1; - return (uint64_t)s->catalog_bitmap[index] * s->off_multiplier + offset; + } + return (uint64_t)s->bat_bitmap[index] * s->off_multiplier + offset; } static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num, @@ -190,11 +192,11 @@ static int64_t allocate_cluster(BlockDriverState *bs, int64_t sector_num) idx = sector_num / s->tracks; offset = sector_num % s->tracks; - if (idx >= s->catalog_size) { + if (idx >= s->bat_size) { return -EINVAL; } - if (s->catalog_bitmap[idx] != 0) { - return (uint64_t)s->catalog_bitmap[idx] * s->off_multiplier + offset; + if (s->bat_bitmap[idx] != 0) { + return (uint64_t)s->bat_bitmap[idx] * s->off_multiplier + offset; } pos = bdrv_getlength(bs->file) >> BDRV_SECTOR_BITS; @@ -207,17 +209,17 @@ static int64_t allocate_cluster(BlockDriverState *bs, int64_t sector_num) return ret; } - s->catalog_bitmap[idx] = pos / s->off_multiplier; + s->bat_bitmap[idx] = pos / s->off_multiplier; - tmp = cpu_to_le32(s->catalog_bitmap[idx]); + tmp = cpu_to_le32(s->bat_bitmap[idx]); ret = bdrv_pwrite(bs->file, sizeof(ParallelsHeader) + idx * sizeof(tmp), &tmp, sizeof(tmp)); if (ret < 0) { - s->catalog_bitmap[idx] = 0; + s->bat_bitmap[idx] = 0; return ret; } - return (uint64_t)s->catalog_bitmap[idx] * s->off_multiplier + offset; + return (uint64_t)s->bat_bitmap[idx] * s->off_multiplier + offset; } static int64_t coroutine_fn parallels_co_get_block_status(BlockDriverState *bs, @@ -330,7 +332,7 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp) uint8_t tmp[BDRV_SECTOR_SIZE]; Error *local_err = NULL; BlockDriverState *file; - uint32_t cat_entries, cat_sectors; + uint32_t bat_entries, bat_sectors; ParallelsHeader header; int ret; @@ -357,10 +359,10 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp) goto exit; } - cat_entries = DIV_ROUND_UP(total_size, cl_size); - cat_sectors = DIV_ROUND_UP(cat_entries * sizeof(uint32_t) + + bat_entries = DIV_ROUND_UP(total_size, cl_size); + bat_sectors = DIV_ROUND_UP(bat_entries * sizeof(uint32_t) + sizeof(ParallelsHeader), cl_size); - cat_sectors = (cat_sectors * cl_size) >> BDRV_SECTOR_BITS; + bat_sectors = (bat_sectors * cl_size) >> BDRV_SECTOR_BITS; memset(&header, 0, sizeof(header)); memcpy(header.magic, HEADER_MAGIC2, sizeof(header.magic)); @@ -369,9 +371,9 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp) header.heads = cpu_to_le32(16); header.cylinders = cpu_to_le32(total_size / BDRV_SECTOR_SIZE / 16 / 32); header.tracks = cpu_to_le32(cl_size >> BDRV_SECTOR_BITS); - header.catalog_entries = cpu_to_le32(cat_entries); + header.bat_entries = cpu_to_le32(bat_entries); header.nb_sectors = cpu_to_le64(DIV_ROUND_UP(total_size, BDRV_SECTOR_SIZE)); - header.data_off = cpu_to_le32(cat_sectors); + header.data_off = cpu_to_le32(bat_sectors); /* write all the data */ memset(tmp, 0, sizeof(tmp)); @@ -381,7 +383,7 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp) if (ret < 0) { goto exit; } - ret = bdrv_write_zeroes(file, 1, cat_sectors - 1, 0); + ret = bdrv_write_zeroes(file, 1, bat_sectors - 1, 0); if (ret < 0) { goto exit; } @@ -399,7 +401,7 @@ exit: static void parallels_close(BlockDriverState *bs) { BDRVParallelsState *s = bs->opaque; - g_free(s->catalog_bitmap); + g_free(s->bat_bitmap); } static QemuOptsList parallels_create_opts = { From 555cc9d9fc5c71be6bd3f288eaf1e5628732088f Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 28 Apr 2015 10:46:48 +0300 Subject: [PATCH 15/38] block/parallels: create bat2sect helper deduplicate copy/paste arithmetcs Signed-off-by: Denis V. Lunev Reviewed-by: Roman Kagan Reviewed-by: Stefan Hajnoczi Signed-off-by: Roman Kagan Message-id: 1430207220-24458-16-git-send-email-den@openvz.org CC: Kevin Wolf Signed-off-by: Stefan Hajnoczi --- block/parallels.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index 16fbdf4442..1540c2186a 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -161,6 +161,12 @@ fail: return ret; } + +static int64_t bat2sect(BDRVParallelsState *s, uint32_t idx) +{ + return (uint64_t)s->bat_bitmap[idx] * s->off_multiplier; +} + static int64_t seek_to_sector(BDRVParallelsState *s, int64_t sector_num) { uint32_t index, offset; @@ -172,7 +178,7 @@ static int64_t seek_to_sector(BDRVParallelsState *s, int64_t sector_num) if ((index >= s->bat_size) || (s->bat_bitmap[index] == 0)) { return -1; } - return (uint64_t)s->bat_bitmap[index] * s->off_multiplier + offset; + return bat2sect(s, index) + offset; } static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num, @@ -196,7 +202,7 @@ static int64_t allocate_cluster(BlockDriverState *bs, int64_t sector_num) return -EINVAL; } if (s->bat_bitmap[idx] != 0) { - return (uint64_t)s->bat_bitmap[idx] * s->off_multiplier + offset; + return bat2sect(s, idx) + offset; } pos = bdrv_getlength(bs->file) >> BDRV_SECTOR_BITS; @@ -219,7 +225,7 @@ static int64_t allocate_cluster(BlockDriverState *bs, int64_t sector_num) s->bat_bitmap[idx] = 0; return ret; } - return (uint64_t)s->bat_bitmap[idx] * s->off_multiplier + offset; + return bat2sect(s, idx) + offset; } static int64_t coroutine_fn parallels_co_get_block_status(BlockDriverState *bs, From dd97cdc064f24484a2ebc141a4ec6bba35f56007 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 28 Apr 2015 10:46:49 +0300 Subject: [PATCH 16/38] block/parallels: keep BAT bitmap data in little endian in memory This will allow to use this data as buffer to BAT update directly without any intermediate buffers. Signed-off-by: Denis V. Lunev Reviewed-by: Roman Kagan Reviewed-by: Stefan Hajnoczi Signed-off-by: Roman Kagan Message-id: 1430207220-24458-17-git-send-email-den@openvz.org CC: Kevin Wolf Signed-off-by: Stefan Hajnoczi --- block/parallels.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index 1540c2186a..431adf1ceb 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -90,7 +90,6 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { BDRVParallelsState *s = bs->opaque; - int i; ParallelsHeader ph; int ret; @@ -143,10 +142,6 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } - for (i = 0; i < s->bat_size; i++) { - le32_to_cpus(&s->bat_bitmap[i]); - } - s->has_truncate = bdrv_has_zero_init(bs->file) && bdrv_truncate(bs->file, bdrv_getlength(bs->file)) == 0; @@ -164,7 +159,7 @@ fail: static int64_t bat2sect(BDRVParallelsState *s, uint32_t idx) { - return (uint64_t)s->bat_bitmap[idx] * s->off_multiplier; + return (uint64_t)le32_to_cpu(s->bat_bitmap[idx]) * s->off_multiplier; } static int64_t seek_to_sector(BDRVParallelsState *s, int64_t sector_num) @@ -191,7 +186,7 @@ static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num, static int64_t allocate_cluster(BlockDriverState *bs, int64_t sector_num) { BDRVParallelsState *s = bs->opaque; - uint32_t idx, offset, tmp; + uint32_t idx, offset; int64_t pos; int ret; @@ -215,12 +210,10 @@ static int64_t allocate_cluster(BlockDriverState *bs, int64_t sector_num) return ret; } - s->bat_bitmap[idx] = pos / s->off_multiplier; - - tmp = cpu_to_le32(s->bat_bitmap[idx]); - + s->bat_bitmap[idx] = cpu_to_le32(pos / s->off_multiplier); ret = bdrv_pwrite(bs->file, - sizeof(ParallelsHeader) + idx * sizeof(tmp), &tmp, sizeof(tmp)); + sizeof(ParallelsHeader) + idx * sizeof(s->bat_bitmap[idx]), + s->bat_bitmap + idx, sizeof(s->bat_bitmap[idx])); if (ret < 0) { s->bat_bitmap[idx] = 0; return ret; From 9eae9cca95e76afc2f2288a665e08a64953f2820 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 28 Apr 2015 10:46:50 +0300 Subject: [PATCH 17/38] block/parallels: read parallels image header and BAT into single buffer This metadata cache would allow to properly batch BAT updates to disk in next patches. These updates will be properly aligned to avoid read-modify-write transactions on block level. Signed-off-by: Denis V. Lunev Reviewed-by: Roman Kagan Reviewed-by: Stefan Hajnoczi Signed-off-by: Roman Kagan Message-id: 1430207220-24458-18-git-send-email-den@openvz.org CC: Kevin Wolf Signed-off-by: Stefan Hajnoczi --- block/parallels.c | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index 431adf1ceb..f8a99810ed 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -61,6 +61,8 @@ typedef struct BDRVParallelsState { */ CoMutex lock; + ParallelsHeader *header; + uint32_t header_size; uint32_t *bat_bitmap; unsigned int bat_size; @@ -91,7 +93,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, { BDRVParallelsState *s = bs->opaque; ParallelsHeader ph; - int ret; + int ret, size; ret = bdrv_pread(bs->file, 0, &ph, sizeof(ph)); if (ret < 0) { @@ -130,17 +132,25 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, ret = -EFBIG; goto fail; } - s->bat_bitmap = g_try_new(uint32_t, s->bat_size); - if (s->bat_size && s->bat_bitmap == NULL) { + + size = sizeof(ParallelsHeader) + sizeof(uint32_t) * s->bat_size; + s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file)); + s->header = qemu_try_blockalign(bs->file, s->header_size); + if (s->header == NULL) { ret = -ENOMEM; goto fail; } + if (le32_to_cpu(ph.data_off) < s->header_size) { + /* there is not enough unused space to fit to block align between BAT + and actual data. We can't avoid read-modify-write... */ + s->header_size = size; + } - ret = bdrv_pread(bs->file, sizeof(ParallelsHeader), - s->bat_bitmap, s->bat_size * sizeof(uint32_t)); + ret = bdrv_pread(bs->file, 0, s->header, s->header_size); if (ret < 0) { goto fail; } + s->bat_bitmap = (uint32_t *)(s->header + 1); s->has_truncate = bdrv_has_zero_init(bs->file) && bdrv_truncate(bs->file, bdrv_getlength(bs->file)) == 0; @@ -152,7 +162,7 @@ fail_format: error_setg(errp, "Image not in Parallels format"); ret = -EINVAL; fail: - g_free(s->bat_bitmap); + qemu_vfree(s->header); return ret; } @@ -400,7 +410,7 @@ exit: static void parallels_close(BlockDriverState *bs) { BDRVParallelsState *s = bs->opaque; - g_free(s->bat_bitmap); + qemu_vfree(s->header); } static QemuOptsList parallels_create_opts = { From 23d6bd3bd1225e8c8ade6ed829eabcf90ddfa6f7 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 28 Apr 2015 10:46:51 +0300 Subject: [PATCH 18/38] block/parallels: move parallels_open/probe to the very end of the file This will help to avoid forward declarations for upcoming parallels_check Some very obvious formatting fixes were made to the moved code to make checkpatch happy. Signed-off-by: Denis V. Lunev Reviewed-by: Roman Kagan Reviewed-by: Stefan Hajnoczi Signed-off-by: Roman Kagan Message-id: 1430207220-24458-19-git-send-email-den@openvz.org CC: Kevin Wolf Signed-off-by: Stefan Hajnoczi --- block/parallels.c | 191 ++++++++++++++++++++++++---------------------- 1 file changed, 98 insertions(+), 93 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index f8a99810ed..35ffb6f398 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -73,99 +73,6 @@ typedef struct BDRVParallelsState { bool has_truncate; } BDRVParallelsState; -static int parallels_probe(const uint8_t *buf, int buf_size, const char *filename) -{ - const ParallelsHeader *ph = (const void *)buf; - - if (buf_size < sizeof(ParallelsHeader)) - return 0; - - if ((!memcmp(ph->magic, HEADER_MAGIC, 16) || - !memcmp(ph->magic, HEADER_MAGIC2, 16)) && - (le32_to_cpu(ph->version) == HEADER_VERSION)) - return 100; - - return 0; -} - -static int parallels_open(BlockDriverState *bs, QDict *options, int flags, - Error **errp) -{ - BDRVParallelsState *s = bs->opaque; - ParallelsHeader ph; - int ret, size; - - ret = bdrv_pread(bs->file, 0, &ph, sizeof(ph)); - if (ret < 0) { - goto fail; - } - - bs->total_sectors = le64_to_cpu(ph.nb_sectors); - - if (le32_to_cpu(ph.version) != HEADER_VERSION) { - goto fail_format; - } - if (!memcmp(ph.magic, HEADER_MAGIC, 16)) { - s->off_multiplier = 1; - bs->total_sectors = 0xffffffff & bs->total_sectors; - } else if (!memcmp(ph.magic, HEADER_MAGIC2, 16)) { - s->off_multiplier = le32_to_cpu(ph.tracks); - } else { - goto fail_format; - } - - s->tracks = le32_to_cpu(ph.tracks); - if (s->tracks == 0) { - error_setg(errp, "Invalid image: Zero sectors per track"); - ret = -EINVAL; - goto fail; - } - if (s->tracks > INT32_MAX/513) { - error_setg(errp, "Invalid image: Too big cluster"); - ret = -EFBIG; - goto fail; - } - - s->bat_size = le32_to_cpu(ph.bat_entries); - if (s->bat_size > INT_MAX / sizeof(uint32_t)) { - error_setg(errp, "Catalog too large"); - ret = -EFBIG; - goto fail; - } - - size = sizeof(ParallelsHeader) + sizeof(uint32_t) * s->bat_size; - s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file)); - s->header = qemu_try_blockalign(bs->file, s->header_size); - if (s->header == NULL) { - ret = -ENOMEM; - goto fail; - } - if (le32_to_cpu(ph.data_off) < s->header_size) { - /* there is not enough unused space to fit to block align between BAT - and actual data. We can't avoid read-modify-write... */ - s->header_size = size; - } - - ret = bdrv_pread(bs->file, 0, s->header, s->header_size); - if (ret < 0) { - goto fail; - } - s->bat_bitmap = (uint32_t *)(s->header + 1); - - s->has_truncate = bdrv_has_zero_init(bs->file) && - bdrv_truncate(bs->file, bdrv_getlength(bs->file)) == 0; - - qemu_co_mutex_init(&s->lock); - return 0; - -fail_format: - error_setg(errp, "Image not in Parallels format"); - ret = -EINVAL; -fail: - qemu_vfree(s->header); - return ret; -} - static int64_t bat2sect(BDRVParallelsState *s, uint32_t idx) { @@ -407,6 +314,104 @@ exit: goto done; } + +static int parallels_probe(const uint8_t *buf, int buf_size, + const char *filename) +{ + const ParallelsHeader *ph = (const void *)buf; + + if (buf_size < sizeof(ParallelsHeader)) { + return 0; + } + + if ((!memcmp(ph->magic, HEADER_MAGIC, 16) || + !memcmp(ph->magic, HEADER_MAGIC2, 16)) && + (le32_to_cpu(ph->version) == HEADER_VERSION)) { + return 100; + } + + return 0; +} + +static int parallels_open(BlockDriverState *bs, QDict *options, int flags, + Error **errp) +{ + BDRVParallelsState *s = bs->opaque; + ParallelsHeader ph; + int ret, size; + + ret = bdrv_pread(bs->file, 0, &ph, sizeof(ph)); + if (ret < 0) { + goto fail; + } + + bs->total_sectors = le64_to_cpu(ph.nb_sectors); + + if (le32_to_cpu(ph.version) != HEADER_VERSION) { + goto fail_format; + } + if (!memcmp(ph.magic, HEADER_MAGIC, 16)) { + s->off_multiplier = 1; + bs->total_sectors = 0xffffffff & bs->total_sectors; + } else if (!memcmp(ph.magic, HEADER_MAGIC2, 16)) { + s->off_multiplier = le32_to_cpu(ph.tracks); + } else { + goto fail_format; + } + + s->tracks = le32_to_cpu(ph.tracks); + if (s->tracks == 0) { + error_setg(errp, "Invalid image: Zero sectors per track"); + ret = -EINVAL; + goto fail; + } + if (s->tracks > INT32_MAX/513) { + error_setg(errp, "Invalid image: Too big cluster"); + ret = -EFBIG; + goto fail; + } + + s->bat_size = le32_to_cpu(ph.bat_entries); + if (s->bat_size > INT_MAX / sizeof(uint32_t)) { + error_setg(errp, "Catalog too large"); + ret = -EFBIG; + goto fail; + } + + size = sizeof(ParallelsHeader) + sizeof(uint32_t) * s->bat_size; + s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file)); + s->header = qemu_try_blockalign(bs->file, s->header_size); + if (s->header == NULL) { + ret = -ENOMEM; + goto fail; + } + if (le32_to_cpu(ph.data_off) < s->header_size) { + /* there is not enough unused space to fit to block align between BAT + and actual data. We can't avoid read-modify-write... */ + s->header_size = size; + } + + ret = bdrv_pread(bs->file, 0, s->header, s->header_size); + if (ret < 0) { + goto fail; + } + s->bat_bitmap = (uint32_t *)(s->header + 1); + + s->has_truncate = bdrv_has_zero_init(bs->file) && + bdrv_truncate(bs->file, bdrv_getlength(bs->file)) == 0; + + qemu_co_mutex_init(&s->lock); + return 0; + +fail_format: + error_setg(errp, "Image not in Parallels format"); + ret = -EINVAL; +fail: + qemu_vfree(s->header); + return ret; +} + + static void parallels_close(BlockDriverState *bs) { BDRVParallelsState *s = bs->opaque; From 49ad6467313d17486af9029413debb709dc971a8 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 28 Apr 2015 10:46:52 +0300 Subject: [PATCH 19/38] block/parallels: implement parallels_check method of block driver The check is very simple at the moment. It calculates necessary stats and fix only the following errors: - space leak at the end of the image. This would happens due to preallocation - clusters outside the image are zeroed. Nothing else could be done here Signed-off-by: Denis V. Lunev Reviewed-by: Roman Kagan Reviewed-by: Stefan Hajnoczi Signed-off-by: Roman Kagan Message-id: 1430207220-24458-20-git-send-email-den@openvz.org CC: Kevin Wolf Signed-off-by: Stefan Hajnoczi --- block/parallels.c | 85 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/block/parallels.c b/block/parallels.c index 35ffb6f398..35f231af88 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -242,6 +242,90 @@ static coroutine_fn int parallels_co_readv(BlockDriverState *bs, return ret; } + +static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res, + BdrvCheckMode fix) +{ + BDRVParallelsState *s = bs->opaque; + int64_t size, prev_off, high_off; + int ret; + uint32_t i; + bool flush_bat = false; + int cluster_size = s->tracks << BDRV_SECTOR_BITS; + + size = bdrv_getlength(bs->file); + if (size < 0) { + res->check_errors++; + return size; + } + + res->bfi.total_clusters = s->bat_size; + res->bfi.compressed_clusters = 0; /* compression is not supported */ + + high_off = 0; + prev_off = 0; + for (i = 0; i < s->bat_size; i++) { + int64_t off = bat2sect(s, i) << BDRV_SECTOR_BITS; + if (off == 0) { + prev_off = 0; + continue; + } + + /* cluster outside the image */ + if (off > size) { + fprintf(stderr, "%s cluster %u is outside image\n", + fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR", i); + res->corruptions++; + if (fix & BDRV_FIX_ERRORS) { + prev_off = 0; + s->bat_bitmap[i] = 0; + res->corruptions_fixed++; + flush_bat = true; + continue; + } + } + + res->bfi.allocated_clusters++; + if (off > high_off) { + high_off = off; + } + + if (prev_off != 0 && (prev_off + cluster_size) != off) { + res->bfi.fragmented_clusters++; + } + prev_off = off; + } + + if (flush_bat) { + ret = bdrv_pwrite_sync(bs->file, 0, s->header, s->header_size); + if (ret < 0) { + res->check_errors++; + return ret; + } + } + + res->image_end_offset = high_off + cluster_size; + if (size > res->image_end_offset) { + int64_t count; + count = DIV_ROUND_UP(size - res->image_end_offset, cluster_size); + fprintf(stderr, "%s space leaked at the end of the image %" PRId64 "\n", + fix & BDRV_FIX_LEAKS ? "Repairing" : "ERROR", + size - res->image_end_offset); + res->leaks += count; + if (fix & BDRV_FIX_LEAKS) { + ret = bdrv_truncate(bs->file, res->image_end_offset); + if (ret < 0) { + res->check_errors++; + return ret; + } + res->leaks_fixed += count; + } + } + + return 0; +} + + static int parallels_create(const char *filename, QemuOpts *opts, Error **errp) { int64_t total_size, cl_size; @@ -449,6 +533,7 @@ static BlockDriver bdrv_parallels = { .bdrv_co_writev = parallels_co_writev, .bdrv_create = parallels_create, + .bdrv_check = parallels_check, .create_opts = ¶llels_create_opts, }; From 6dd6b9f1440c37811ad963b49a48bf80a8bde377 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 28 Apr 2015 10:46:53 +0300 Subject: [PATCH 20/38] block/parallels: implement incorrect close detection The software driver must set inuse field in Parallels header to 0x746F6E59 when the image is opened in read-write mode. The presence of this magic in the header on open forces image consistency check. There is an unfortunate trick here. We can not check for inuse in parallels_check as this will happen too late. It is possible to do that for simple check, but during the fix this would always report an error as the image was opened in BDRV_O_RDWR mode. Thus we save the flag in BDRVParallelsState for this. On the other hand, nothing should be done to clear inuse in parallels_check. Generic close will do the job right. Signed-off-by: Denis V. Lunev Reviewed-by: Roman Kagan Reviewed-by: Stefan Hajnoczi Signed-off-by: Roman Kagan Message-id: 1430207220-24458-21-git-send-email-den@openvz.org CC: Kevin Wolf Signed-off-by: Stefan Hajnoczi --- block/parallels.c | 50 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) diff --git a/block/parallels.c b/block/parallels.c index 35f231af88..76e3a4e311 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -36,6 +36,7 @@ #define HEADER_MAGIC "WithoutFreeSpace" #define HEADER_MAGIC2 "WithouFreSpacExt" #define HEADER_VERSION 2 +#define HEADER_INUSE_MAGIC (0x746F6E59) #define DEFAULT_CLUSTER_SIZE 1048576 /* 1 MiB */ @@ -63,6 +64,8 @@ typedef struct BDRVParallelsState { ParallelsHeader *header; uint32_t header_size; + bool header_unclean; + uint32_t *bat_bitmap; unsigned int bat_size; @@ -259,6 +262,17 @@ static int parallels_check(BlockDriverState *bs, BdrvCheckResult *res, return size; } + if (s->header_unclean) { + fprintf(stderr, "%s image was not closed correctly\n", + fix & BDRV_FIX_ERRORS ? "Repairing" : "ERROR"); + res->corruptions++; + if (fix & BDRV_FIX_ERRORS) { + /* parallels_close will do the job right */ + res->corruptions_fixed++; + s->header_unclean = false; + } + } + res->bfi.total_clusters = s->bat_size; res->bfi.compressed_clusters = 0; /* compression is not supported */ @@ -417,6 +431,17 @@ static int parallels_probe(const uint8_t *buf, int buf_size, return 0; } +static int parallels_update_header(BlockDriverState *bs) +{ + BDRVParallelsState *s = bs->opaque; + unsigned size = MAX(bdrv_opt_mem_align(bs->file), sizeof(ParallelsHeader)); + + if (size > s->header_size) { + size = s->header_size; + } + return bdrv_pwrite_sync(bs->file, 0, s->header, size); +} + static int parallels_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { @@ -484,6 +509,25 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, s->has_truncate = bdrv_has_zero_init(bs->file) && bdrv_truncate(bs->file, bdrv_getlength(bs->file)) == 0; + if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) { + /* Image was not closed correctly. The check is mandatory */ + s->header_unclean = true; + if ((flags & BDRV_O_RDWR) && !(flags & BDRV_O_CHECK)) { + error_setg(errp, "parallels: Image was not closed correctly; " + "cannot be opened read/write"); + ret = -EACCES; + goto fail; + } + } + + if (flags & BDRV_O_RDWR) { + s->header->inuse = cpu_to_le32(HEADER_INUSE_MAGIC); + ret = parallels_update_header(bs); + if (ret < 0) { + goto fail; + } + } + qemu_co_mutex_init(&s->lock); return 0; @@ -499,6 +543,12 @@ fail: static void parallels_close(BlockDriverState *bs) { BDRVParallelsState *s = bs->opaque; + + if (bs->open_flags & BDRV_O_RDWR) { + s->header->inuse = 0; + parallels_update_header(bs); + } + qemu_vfree(s->header); } From a6be831e99f89d72a8c4a114347d5512c326af29 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 28 Apr 2015 10:46:54 +0300 Subject: [PATCH 21/38] iotests, parallels: check for incorrectly closed image in tests Signed-off-by: Denis V. Lunev Reviewed-by: Roman Kagan Reviewed-by: Stefan Hajnoczi Signed-off-by: Roman Kagan Message-id: 1430207220-24458-22-git-send-email-den@openvz.org CC: Kevin Wolf Signed-off-by: Stefan Hajnoczi --- tests/qemu-iotests/131 | 9 +++++++++ tests/qemu-iotests/131.out | 17 +++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/tests/qemu-iotests/131 b/tests/qemu-iotests/131 index f45afa72d6..4873f40e94 100755 --- a/tests/qemu-iotests/131 +++ b/tests/qemu-iotests/131 @@ -42,6 +42,8 @@ _supported_fmt parallels _supported_proto file _supported_os Linux +inuse_offset=$((0x2c)) + size=64M CLUSTER_SIZE=64k IMGFMT=parallels @@ -62,6 +64,13 @@ echo == check that there is no trash after written == echo == check that there is no trash before written == { $QEMU_IO -c "read -P 0 0 32k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +echo "== Corrupt image ==" +poke_file "$TEST_IMG" "$inuse_offset" "\x59\x6e\x6f\x74" +{ $QEMU_IO -c "read -P 0x11 64k 64k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir +_check_test_img +_check_test_img -r all +{ $QEMU_IO -c "read -P 0x11 64k 64k" "$TEST_IMG"; } 2>&1 | _filter_qemu_io | _filter_testdir + # success, all done echo "*** done" rm -f $seq.full diff --git a/tests/qemu-iotests/131.out b/tests/qemu-iotests/131.out index 4158a2f520..021a04c812 100644 --- a/tests/qemu-iotests/131.out +++ b/tests/qemu-iotests/131.out @@ -21,4 +21,21 @@ read 32768/32768 bytes at offset 163840 == check that there is no trash before written == read 32768/32768 bytes at offset 0 32 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== Corrupt image == +qemu-io: can't open device TEST_DIR/t.parallels: parallels: Image was not closed correctly; cannot be opened read/write +no file open, try 'help open' +ERROR image was not closed correctly + +1 errors were found on the image. +Data may be corrupted, or further writes to the image may corrupt it. +Repairing image was not closed correctly +The following inconsistencies were found and repaired: + + 0 leaked clusters + 1 corruptions + +Double checking the fixed image now... +No errors were found on the image. +read 65536/65536 bytes at offset 65536 +64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) *** done From 6953d920784466dfaea77f7cfd23df2ad8b772a0 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 28 Apr 2015 10:46:55 +0300 Subject: [PATCH 22/38] block/parallels: improve image reading performance Try to perform IO for the biggest continuous block possible. The performance for sequential read is increased from 220 Mb/sec to 360 Mb/sec for continous image on my SSD HDD. Signed-off-by: Denis V. Lunev Reviewed-by: Roman Kagan Reviewed-by: Stefan Hajnoczi Signed-off-by: Roman Kagan Message-id: 1430207220-24458-23-git-send-email-den@openvz.org CC: Kevin Wolf Signed-off-by: Stefan Hajnoczi --- block/parallels.c | 36 +++++++++++++++++++++++++++++++----- 1 file changed, 31 insertions(+), 5 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index 76e3a4e311..5ff74e8a0f 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -103,6 +103,35 @@ static int cluster_remainder(BDRVParallelsState *s, int64_t sector_num, return MIN(nb_sectors, ret); } +static int64_t block_status(BDRVParallelsState *s, int64_t sector_num, + int nb_sectors, int *pnum) +{ + int64_t start_off = -2, prev_end_off = -2; + + *pnum = 0; + while (nb_sectors > 0 || start_off == -2) { + int64_t offset = seek_to_sector(s, sector_num); + int to_end; + + if (start_off == -2) { + start_off = offset; + prev_end_off = offset; + } else if (offset != prev_end_off) { + break; + } + + to_end = cluster_remainder(s, sector_num, nb_sectors); + nb_sectors -= to_end; + sector_num += to_end; + *pnum += to_end; + + if (offset > 0) { + prev_end_off += to_end; + } + } + return start_off; +} + static int64_t allocate_cluster(BlockDriverState *bs, int64_t sector_num) { BDRVParallelsState *s = bs->opaque; @@ -148,11 +177,9 @@ static int64_t coroutine_fn parallels_co_get_block_status(BlockDriverState *bs, int64_t offset; qemu_co_mutex_lock(&s->lock); - offset = seek_to_sector(s, sector_num); + offset = block_status(s, sector_num, nb_sectors, pnum); qemu_co_mutex_unlock(&s->lock); - *pnum = cluster_remainder(s, sector_num, nb_sectors); - if (offset < 0) { return 0; } @@ -218,10 +245,9 @@ static coroutine_fn int parallels_co_readv(BlockDriverState *bs, int n, nbytes; qemu_co_mutex_lock(&s->lock); - position = seek_to_sector(s, sector_num); + position = block_status(s, sector_num, nb_sectors, &n); qemu_co_mutex_unlock(&s->lock); - n = cluster_remainder(s, sector_num, nb_sectors); nbytes = n << BDRV_SECTOR_BITS; if (position < 0) { From 2d68e22e94e8bc5a0d32a38b53c592c320db8261 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 28 Apr 2015 10:46:56 +0300 Subject: [PATCH 23/38] block/parallels: create bat_entry_off helper calculate offset of the BAT entry in the image file. Signed-off-by: Denis V. Lunev Reviewed-by: Roman Kagan Reviewed-by: Stefan Hajnoczi Signed-off-by: Roman Kagan Message-id: 1430207220-24458-24-git-send-email-den@openvz.org CC: Kevin Wolf Signed-off-by: Stefan Hajnoczi --- block/parallels.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index 5ff74e8a0f..4d8a0d48aa 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -82,6 +82,11 @@ static int64_t bat2sect(BDRVParallelsState *s, uint32_t idx) return (uint64_t)le32_to_cpu(s->bat_bitmap[idx]) * s->off_multiplier; } +static uint32_t bat_entry_off(uint32_t idx) +{ + return sizeof(ParallelsHeader) + sizeof(uint32_t) * idx; +} + static int64_t seek_to_sector(BDRVParallelsState *s, int64_t sector_num) { uint32_t index, offset; @@ -160,9 +165,8 @@ static int64_t allocate_cluster(BlockDriverState *bs, int64_t sector_num) } s->bat_bitmap[idx] = cpu_to_le32(pos / s->off_multiplier); - ret = bdrv_pwrite(bs->file, - sizeof(ParallelsHeader) + idx * sizeof(s->bat_bitmap[idx]), - s->bat_bitmap + idx, sizeof(s->bat_bitmap[idx])); + ret = bdrv_pwrite(bs->file, bat_entry_off(idx), s->bat_bitmap + idx, + sizeof(s->bat_bitmap[idx])); if (ret < 0) { s->bat_bitmap[idx] = 0; return ret; @@ -400,8 +404,7 @@ static int parallels_create(const char *filename, QemuOpts *opts, Error **errp) } bat_entries = DIV_ROUND_UP(total_size, cl_size); - bat_sectors = DIV_ROUND_UP(bat_entries * sizeof(uint32_t) + - sizeof(ParallelsHeader), cl_size); + bat_sectors = DIV_ROUND_UP(bat_entry_off(bat_entries), cl_size); bat_sectors = (bat_sectors * cl_size) >> BDRV_SECTOR_BITS; memset(&header, 0, sizeof(header)); @@ -513,7 +516,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } - size = sizeof(ParallelsHeader) + sizeof(uint32_t) * s->bat_size; + size = bat_entry_off(s->bat_size); s->header_size = ROUND_UP(size, bdrv_opt_mem_align(bs->file)); s->header = qemu_try_blockalign(bs->file, s->header_size); if (s->header == NULL) { From 0d31c7c200b3dca2aeeaa6f74ff3fd539aad803a Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 28 Apr 2015 10:46:57 +0300 Subject: [PATCH 24/38] block/parallels: delay writing to BAT till bdrv_co_flush_to_os The idea is that we do not need to immediately sync BAT to the image as from the guest point of view there is a possibility that IO is lost even in the physical controller until flush command was finished. bdrv_co_flush_to_os is exactly the right place for this purpose. Technically the patch uses loaded BAT data as a cache and performs actual on-disk metadata updates in parallels_co_flush_to_os callback. This patch speed ups qemu-img create -f parallels -o cluster_size=64k ./1.hds 64G qemu-io -f parallels -c "write -P 0x11 0 1024k" 1.hds writing from 50-60 Mb/sec to 80-90 Mb/sec on rotational media and from 160 Mb/sec to 190 Mb/sec on SSD disk. Signed-off-by: Denis V. Lunev Reviewed-by: Roman Kagan Reviewed-by: Stefan Hajnoczi Signed-off-by: Roman Kagan Message-id: 1430207220-24458-25-git-send-email-den@openvz.org CC: Kevin Wolf Signed-off-by: Stefan Hajnoczi --- block/parallels.c | 50 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 44 insertions(+), 6 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index 4d8a0d48aa..05fe030f05 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -30,6 +30,7 @@ #include "qemu-common.h" #include "block/block_int.h" #include "qemu/module.h" +#include "qemu/bitmap.h" /**************************************************************/ @@ -66,6 +67,9 @@ typedef struct BDRVParallelsState { uint32_t header_size; bool header_unclean; + unsigned long *bat_dirty_bmap; + unsigned int bat_dirty_block; + uint32_t *bat_bitmap; unsigned int bat_size; @@ -165,15 +169,43 @@ static int64_t allocate_cluster(BlockDriverState *bs, int64_t sector_num) } s->bat_bitmap[idx] = cpu_to_le32(pos / s->off_multiplier); - ret = bdrv_pwrite(bs->file, bat_entry_off(idx), s->bat_bitmap + idx, - sizeof(s->bat_bitmap[idx])); - if (ret < 0) { - s->bat_bitmap[idx] = 0; - return ret; - } + + bitmap_set(s->bat_dirty_bmap, bat_entry_off(idx) / s->bat_dirty_block, 1); return bat2sect(s, idx) + offset; } + +static coroutine_fn int parallels_co_flush_to_os(BlockDriverState *bs) +{ + BDRVParallelsState *s = bs->opaque; + unsigned long size = DIV_ROUND_UP(s->header_size, s->bat_dirty_block); + unsigned long bit; + + qemu_co_mutex_lock(&s->lock); + + bit = find_first_bit(s->bat_dirty_bmap, size); + while (bit < size) { + uint32_t off = bit * s->bat_dirty_block; + uint32_t to_write = s->bat_dirty_block; + int ret; + + if (off + to_write > s->header_size) { + to_write = s->header_size - off; + } + ret = bdrv_pwrite(bs->file, off, (uint8_t *)s->header + off, to_write); + if (ret < 0) { + qemu_co_mutex_unlock(&s->lock); + return ret; + } + bit = find_next_bit(s->bat_dirty_bmap, size, bit + 1); + } + bitmap_zero(s->bat_dirty_bmap, size); + + qemu_co_mutex_unlock(&s->lock); + return 0; +} + + static int64_t coroutine_fn parallels_co_get_block_status(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum) { @@ -557,6 +589,10 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, } } + s->bat_dirty_block = 4 * getpagesize(); + s->bat_dirty_bmap = + bitmap_new(DIV_ROUND_UP(s->header_size, s->bat_dirty_block)); + qemu_co_mutex_init(&s->lock); return 0; @@ -578,6 +614,7 @@ static void parallels_close(BlockDriverState *bs) parallels_update_header(bs); } + g_free(s->bat_dirty_bmap); qemu_vfree(s->header); } @@ -608,6 +645,7 @@ static BlockDriver bdrv_parallels = { .bdrv_close = parallels_close, .bdrv_co_get_block_status = parallels_co_get_block_status, .bdrv_has_zero_init = bdrv_has_zero_init_1, + .bdrv_co_flush_to_os = parallels_co_flush_to_os, .bdrv_co_readv = parallels_co_readv, .bdrv_co_writev = parallels_co_writev, From d61790112fa861fbbbb02b53f9c3beb9ca7f8419 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 28 Apr 2015 10:46:58 +0300 Subject: [PATCH 25/38] block/parallels: add prealloc-mode and prealloc-size open paramemets This is preparational commit for tweaks in Parallels image expansion. The idea is that enlarge via truncate by one data block is slow. It would be much better to use fallocate via bdrv_write_zeroes and expand by some significant amount at once. Original idea with sequential file writing to the end of the file without fallocate/truncate would be slower than this approach if the image is expanded with several operations: - each image expanding means file metadata update, i.e. filesystem journal write. Truncate/write to newly truncated space update file metadata twice thus truncate removal helps. With fallocate call inside bdrv_write_zeroes file metadata is updated only once and this should happen infrequently thus this approach is the best one for the image expansion - tail writes are ordered, i.e. the guest IO queue could not be sent immediately to the host introducing additional IO delays This patch just adds proper parameters into BDRVParallelsState and performs options parsing in parallels_open. Signed-off-by: Denis V. Lunev Reviewed-by: Roman Kagan Signed-off-by: Roman Kagan Message-id: 1430207220-24458-26-git-send-email-den@openvz.org CC: Roman Kagan CC: Kevin Wolf CC: Stefan Hajnoczi Signed-off-by: Stefan Hajnoczi --- block/parallels.c | 83 +++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 77 insertions(+), 6 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index 05fe030f05..440938e18a 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -31,6 +31,7 @@ #include "block/block_int.h" #include "qemu/module.h" #include "qemu/bitmap.h" +#include "qapi/util.h" /**************************************************************/ @@ -56,6 +57,20 @@ typedef struct ParallelsHeader { char padding[12]; } QEMU_PACKED ParallelsHeader; + +typedef enum ParallelsPreallocMode { + PRL_PREALLOC_MODE_FALLOCATE = 0, + PRL_PREALLOC_MODE_TRUNCATE = 1, + PRL_PREALLOC_MODE_MAX = 2, +} ParallelsPreallocMode; + +static const char *prealloc_mode_lookup[] = { + "falloc", + "truncate", + NULL, +}; + + typedef struct BDRVParallelsState { /** Locking is conservative, the lock protects * - image file extending (truncate, fallocate) @@ -73,14 +88,40 @@ typedef struct BDRVParallelsState { uint32_t *bat_bitmap; unsigned int bat_size; + uint64_t prealloc_size; + ParallelsPreallocMode prealloc_mode; + unsigned int tracks; unsigned int off_multiplier; - - bool has_truncate; } BDRVParallelsState; +#define PARALLELS_OPT_PREALLOC_MODE "prealloc-mode" +#define PARALLELS_OPT_PREALLOC_SIZE "prealloc-size" + +static QemuOptsList parallels_runtime_opts = { + .name = "parallels", + .head = QTAILQ_HEAD_INITIALIZER(parallels_runtime_opts.head), + .desc = { + { + .name = PARALLELS_OPT_PREALLOC_SIZE, + .type = QEMU_OPT_SIZE, + .help = "Preallocation size on image expansion", + .def_value_str = "128MiB", + }, + { + .name = PARALLELS_OPT_PREALLOC_MODE, + .type = QEMU_OPT_STRING, + .help = "Preallocation mode on image expansion " + "(allowed values: falloc, truncate)", + .def_value_str = "falloc", + }, + { /* end of list */ }, + }, +}; + + static int64_t bat2sect(BDRVParallelsState *s, uint32_t idx) { return (uint64_t)le32_to_cpu(s->bat_bitmap[idx]) * s->off_multiplier; @@ -159,7 +200,7 @@ static int64_t allocate_cluster(BlockDriverState *bs, int64_t sector_num) } pos = bdrv_getlength(bs->file) >> BDRV_SECTOR_BITS; - if (s->has_truncate) { + if (s->prealloc_mode == PRL_PREALLOC_MODE_TRUNCATE) { ret = bdrv_truncate(bs->file, (pos + s->tracks) << BDRV_SECTOR_BITS); } else { ret = bdrv_write_zeroes(bs->file, pos, s->tracks, 0); @@ -509,6 +550,9 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, BDRVParallelsState *s = bs->opaque; ParallelsHeader ph; int ret, size; + QemuOpts *opts = NULL; + Error *local_err = NULL; + char *buf; ret = bdrv_pread(bs->file, 0, &ph, sizeof(ph)); if (ret < 0) { @@ -567,9 +611,6 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, } s->bat_bitmap = (uint32_t *)(s->header + 1); - s->has_truncate = bdrv_has_zero_init(bs->file) && - bdrv_truncate(bs->file, bdrv_getlength(bs->file)) == 0; - if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) { /* Image was not closed correctly. The check is mandatory */ s->header_unclean = true; @@ -581,6 +622,31 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, } } + opts = qemu_opts_create(¶llels_runtime_opts, NULL, 0, &local_err); + if (local_err != NULL) { + goto fail_options; + } + + qemu_opts_absorb_qdict(opts, options, &local_err); + if (local_err != NULL) { + goto fail_options; + } + + s->prealloc_size = + qemu_opt_get_size_del(opts, PARALLELS_OPT_PREALLOC_SIZE, 0); + s->prealloc_size = MAX(s->tracks, s->prealloc_size >> BDRV_SECTOR_BITS); + buf = qemu_opt_get_del(opts, PARALLELS_OPT_PREALLOC_MODE); + s->prealloc_mode = qapi_enum_parse(prealloc_mode_lookup, buf, + PRL_PREALLOC_MODE_MAX, PRL_PREALLOC_MODE_FALLOCATE, &local_err); + g_free(buf); + if (local_err != NULL) { + goto fail_options; + } + if (!bdrv_has_zero_init(bs->file) || + bdrv_truncate(bs->file, bdrv_getlength(bs->file)) != 0) { + s->prealloc_mode = PRL_PREALLOC_MODE_FALLOCATE; + } + if (flags & BDRV_O_RDWR) { s->header->inuse = cpu_to_le32(HEADER_INUSE_MAGIC); ret = parallels_update_header(bs); @@ -602,6 +668,11 @@ fail_format: fail: qemu_vfree(s->header); return ret; + +fail_options: + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; } From 19f5dc15912dfb6af06c97e4975023e545e85c72 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 28 Apr 2015 10:46:59 +0300 Subject: [PATCH 26/38] block/parallels: optimize linear image expansion Plain image expansion spends a lot of time to update image file size. This seriously affects the performance. The following simple test qemu_img create -f parallels -o cluster_size=64k ./1.hds 64G qemu_io -n -c "write -P 0x11 0 1024M" ./1.hds could be improved if the format driver will pre-allocate some space in the image file with a reasonable chunk. This patch preallocates 128 Mb using bdrv_write_zeroes, which should normally use fallocate() call inside. Fallback to older truncate() could be used as a fallback using image open options thanks to the previous patch. The benefit is around 15%. Signed-off-by: Denis V. Lunev Reviewed-by: Roman Karan Signed-off-by: Roman Kagan Message-id: 1430207220-24458-27-git-send-email-den@openvz.org CC: Kevin Wolf CC: Stefan Hajnoczi Signed-off-by: Stefan Hajnoczi --- block/parallels.c | 42 ++++++++++++++++++++++++++++++++---------- 1 file changed, 32 insertions(+), 10 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index 440938e18a..e7124d97e7 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -88,6 +88,7 @@ typedef struct BDRVParallelsState { uint32_t *bat_bitmap; unsigned int bat_size; + int64_t data_end; uint64_t prealloc_size; ParallelsPreallocMode prealloc_mode; @@ -187,7 +188,6 @@ static int64_t allocate_cluster(BlockDriverState *bs, int64_t sector_num) BDRVParallelsState *s = bs->opaque; uint32_t idx, offset; int64_t pos; - int ret; idx = sector_num / s->tracks; offset = sector_num % s->tracks; @@ -200,14 +200,21 @@ static int64_t allocate_cluster(BlockDriverState *bs, int64_t sector_num) } pos = bdrv_getlength(bs->file) >> BDRV_SECTOR_BITS; - if (s->prealloc_mode == PRL_PREALLOC_MODE_TRUNCATE) { - ret = bdrv_truncate(bs->file, (pos + s->tracks) << BDRV_SECTOR_BITS); - } else { - ret = bdrv_write_zeroes(bs->file, pos, s->tracks, 0); - } - if (ret < 0) { - return ret; + if (s->data_end + s->tracks > pos) { + int ret; + if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) { + ret = bdrv_write_zeroes(bs->file, s->data_end, + s->prealloc_size, 0); + } else { + ret = bdrv_truncate(bs->file, + (s->data_end + s->prealloc_size) << BDRV_SECTOR_BITS); + } + if (ret < 0) { + return ret; + } } + pos = s->data_end; + s->data_end += s->tracks; s->bat_bitmap[idx] = cpu_to_le32(pos / s->off_multiplier); @@ -549,7 +556,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, { BDRVParallelsState *s = bs->opaque; ParallelsHeader ph; - int ret, size; + int ret, size, i; QemuOpts *opts = NULL; Error *local_err = NULL; char *buf; @@ -599,7 +606,11 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, ret = -ENOMEM; goto fail; } - if (le32_to_cpu(ph.data_off) < s->header_size) { + s->data_end = le32_to_cpu(ph.data_off); + if (s->data_end == 0) { + s->data_end = ROUND_UP(bat_entry_off(s->bat_size), BDRV_SECTOR_SIZE); + } + if (s->data_end < s->header_size) { /* there is not enough unused space to fit to block align between BAT and actual data. We can't avoid read-modify-write... */ s->header_size = size; @@ -611,6 +622,13 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags, } s->bat_bitmap = (uint32_t *)(s->header + 1); + for (i = 0; i < s->bat_size; i++) { + int64_t off = bat2sect(s, i); + if (off >= s->data_end) { + s->data_end = off + s->tracks; + } + } + if (le32_to_cpu(ph.inuse) == HEADER_INUSE_MAGIC) { /* Image was not closed correctly. The check is mandatory */ s->header_unclean = true; @@ -685,6 +703,10 @@ static void parallels_close(BlockDriverState *bs) parallels_update_header(bs); } + if (bs->open_flags & BDRV_O_RDWR) { + bdrv_truncate(bs->file, s->data_end << BDRV_SECTOR_BITS); + } + g_free(s->bat_dirty_bmap); qemu_vfree(s->header); } From ddd2ef2ce8d693b6e2635a0c20f65744641ff8df Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 28 Apr 2015 10:47:00 +0300 Subject: [PATCH 27/38] block/parallels: improve image writing performance further Try to perform IO for the biggest continuous block possible. All blocks abscent in the image are accounted in the same type and preallocation is made for all of them at once. The performance for sequential write is increased from 200 Mb/sec to 235 Mb/sec on my SSD HDD. Signed-off-by: Denis V. Lunev Reviewed-by: Roman Kagan Signed-off-by: Roman Kagan Message-id: 1430207220-24458-28-git-send-email-den@openvz.org CC: Kevin Wolf CC: Stefan Hajnoczi Signed-off-by: Stefan Hajnoczi --- block/parallels.c | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/block/parallels.c b/block/parallels.c index e7124d97e7..046b56844c 100644 --- a/block/parallels.c +++ b/block/parallels.c @@ -183,43 +183,47 @@ static int64_t block_status(BDRVParallelsState *s, int64_t sector_num, return start_off; } -static int64_t allocate_cluster(BlockDriverState *bs, int64_t sector_num) +static int64_t allocate_clusters(BlockDriverState *bs, int64_t sector_num, + int nb_sectors, int *pnum) { BDRVParallelsState *s = bs->opaque; - uint32_t idx, offset; - int64_t pos; + uint32_t idx, to_allocate, i; + int64_t pos, space; + + pos = block_status(s, sector_num, nb_sectors, pnum); + if (pos > 0) { + return pos; + } idx = sector_num / s->tracks; - offset = sector_num % s->tracks; - if (idx >= s->bat_size) { return -EINVAL; } - if (s->bat_bitmap[idx] != 0) { - return bat2sect(s, idx) + offset; - } - pos = bdrv_getlength(bs->file) >> BDRV_SECTOR_BITS; - if (s->data_end + s->tracks > pos) { + to_allocate = (sector_num + *pnum + s->tracks - 1) / s->tracks - idx; + space = to_allocate * s->tracks; + if (s->data_end + space > bdrv_getlength(bs->file) >> BDRV_SECTOR_BITS) { int ret; + space += s->prealloc_size; if (s->prealloc_mode == PRL_PREALLOC_MODE_FALLOCATE) { - ret = bdrv_write_zeroes(bs->file, s->data_end, - s->prealloc_size, 0); + ret = bdrv_write_zeroes(bs->file, s->data_end, space, 0); } else { ret = bdrv_truncate(bs->file, - (s->data_end + s->prealloc_size) << BDRV_SECTOR_BITS); + (s->data_end + space) << BDRV_SECTOR_BITS); } if (ret < 0) { return ret; } } - pos = s->data_end; - s->data_end += s->tracks; - s->bat_bitmap[idx] = cpu_to_le32(pos / s->off_multiplier); + for (i = 0; i < to_allocate; i++) { + s->bat_bitmap[idx + i] = cpu_to_le32(s->data_end / s->off_multiplier); + s->data_end += s->tracks; + bitmap_set(s->bat_dirty_bmap, + bat_entry_off(idx) / s->bat_dirty_block, 1); + } - bitmap_set(s->bat_dirty_bmap, bat_entry_off(idx) / s->bat_dirty_block, 1); - return bat2sect(s, idx) + offset; + return bat2sect(s, idx) + sector_num % s->tracks; } @@ -287,14 +291,13 @@ static coroutine_fn int parallels_co_writev(BlockDriverState *bs, int n, nbytes; qemu_co_mutex_lock(&s->lock); - position = allocate_cluster(bs, sector_num); + position = allocate_clusters(bs, sector_num, nb_sectors, &n); qemu_co_mutex_unlock(&s->lock); if (position < 0) { ret = (int)position; break; } - n = cluster_remainder(s, sector_num, nb_sectors); nbytes = n << BDRV_SECTOR_BITS; qemu_iovec_reset(&hd_qiov); From e4a7b344df40b1f4b2e732ddb0d68079ce658d89 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Wed, 25 Mar 2015 18:57:36 -0400 Subject: [PATCH 28/38] configure: handle clang -nopie argument warning MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit gcc 4.9.2 treats -nopie as an error: cc: error: unrecognized command line option ‘-nopie’ clang 3.5.0 treats -nopie as a warning: clang: warning: argument unused during compilation: '-nopie' The causes ./configure to fail with clang: ERROR: configure test passed without -Werror but failed with -Werror. Make the -nopie test use -Werror so that compile_prog works for both gcc and clang. Signed-off-by: Stefan Hajnoczi Signed-off-by: John Snow Reviewed-by: Stefan Hajnoczi Message-id: 1427324259-1481-2-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi --- configure | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/configure b/configure index 1f0f485768..770f4c6526 100755 --- a/configure +++ b/configure @@ -1607,7 +1607,7 @@ EOF fi fi - if compile_prog "-fno-pie" "-nopie"; then + if compile_prog "-Werror -fno-pie" "-nopie"; then CFLAGS_NOPIE="-fno-pie" LDFLAGS_NOPIE="-nopie" fi From 93b25869228a3c0c632a6aa66624cc4e549ba14a Mon Sep 17 00:00:00 2001 From: John Snow Date: Wed, 25 Mar 2015 18:57:37 -0400 Subject: [PATCH 29/38] configure: factor out supported flag check Factor out the function that checks if a compiler flag is supported or not. Signed-off-by: John Snow Reviewed-by: Stefan Hajnoczi Message-id: 1427324259-1481-3-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi --- configure | 33 +++++++++++++++++++-------------- 1 file changed, 19 insertions(+), 14 deletions(-) diff --git a/configure b/configure index 770f4c6526..f744266681 100755 --- a/configure +++ b/configure @@ -436,6 +436,12 @@ EOF compile_object } +write_c_skeleton() { + cat > $TMPC < $TMPC << EOF -int main(void) { return 0; } -EOF + write_c_skeleton; if compile_prog "" "-liberty" ; then LIBS="-liberty $LIBS" fi @@ -1445,10 +1449,7 @@ if test -z "$werror" ; then fi # check that the C compiler works. -cat > $TMPC < $TMPC << EOF -int main(void) { return 0; } -EOF -for flag in $gcc_flags; do + +cc_has_warning_flag() { + write_c_skeleton; + # Use the positive sense of the flag when testing for -Wno-wombat # support (gcc will happily accept the -Wno- form of unknown # warning options). - optflag="$(echo $flag | sed -e 's/^-Wno-/-W/')" - if compile_prog "-Werror $optflag" "" ; then - QEMU_CFLAGS="$QEMU_CFLAGS $flag" + optflag="$(echo $1 | sed -e 's/^-Wno-/-W/')" + compile_prog "-Werror $optflag" "" +} + +for flag in $gcc_flags; do + if cc_has_warning_flag $flag ; then + QEMU_CFLAGS="$QEMU_CFLAGS $flag" fi done From bbbf2e04e5ea347d877c7fa8ee02e4bb647a48fc Mon Sep 17 00:00:00 2001 From: John Snow Date: Wed, 25 Mar 2015 18:57:38 -0400 Subject: [PATCH 30/38] configure: silence glib unknown attribute __alloc_size__ The glib headers use GCC attributes. Unfortunately the __GNUC__ and __GNUC_MINOR__ version macros are also defined by clang, but clang doesn't support the same attributes as GCC. clang 3.5.0 does not support the __alloc_size__ attribute: https://github.com/llvm-mirror/clang/commit/c047507a9a79e89fc8339e074fa72822a7e7ea73 The following warning is produced: gstrfuncs.h:257:44: warning: unknown attribute '__alloc_size__' ignored [-Wunknown-attributes] G_GNUC_MALLOC G_GNUC_ALLOC_SIZE(2); gmacros.h:67:45: note: expanded from macro 'G_GNUC_ALLOC_SIZE' #define G_GNUC_ALLOC_SIZE(x) __attribute__((__alloc_size__(x))) This patch checks whether glib headers cause warnings and disables -Wunknown-attributes if it is able to. Signed-off-by: Stefan Hajnoczi Signed-off-by: John Snow Reviewed-by: Stefan Hajnoczi Message-id: 1427324259-1481-4-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi --- configure | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/configure b/configure index f744266681..24d7ecc26e 100755 --- a/configure +++ b/configure @@ -2807,6 +2807,18 @@ if ! $pkg_config --atleast-version=2.38 glib-2.0; then glib_subprocess=no fi +# Silence clang 3.5.0 warnings about glib attribute __alloc_size__ usage +cat > $TMPC << EOF +#include +int main(void) { return 0; } +EOF +if ! compile_prog "$glib_cflags -Werror" "$glib_libs" ; then + if cc_has_warning_flag "-Wno-unknown-attributes"; then + glib_cflags="-Wno-unknown-attributes $glib_cflags" + CFLAGS="-Wno-unknown-attributes $CFLAGS" + fi +fi + ########################################## # SHA command probe for modules if test "$modules" = yes; then From fd0e60530f10078f488fa3e9591cc7db5732989c Mon Sep 17 00:00:00 2001 From: John Snow Date: Wed, 25 Mar 2015 18:57:39 -0400 Subject: [PATCH 31/38] configure: Add workaround for ccache and clang Test if ccache is interfering with semantic analysis of macros, disable its habit of trying to compile already pre-processed versions of code if so. ccache attempts to save time by compiling pre-processed versions of code, but this disturbs clang's static analysis enough to produce false positives. ccache allows us to disable this feature, opting instead to compile the original version instead of its preprocessed version. This makes ccache much slower for cache misses, but at least it becomes usable with QEMU/clang. This workaround only activates for users using ccache AND clang, and only if their configuration is observed to be producing warnings. You may need to clear your ccache for builds started without -Werror, as those may continue to produce warnings from the cache. Thanks to Peter Eisentraut for his writeup on the issue: http://peter.eisentraut.org/blog/2014/12/01/ccache-and-clang-part-3/ Signed-off-by: John Snow Reviewed-by: Stefan Hajnoczi Message-id: 1427324259-1481-5-git-send-email-jsnow@redhat.com Signed-off-by: Stefan Hajnoczi --- configure | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) diff --git a/configure b/configure index 24d7ecc26e..f758f32061 100755 --- a/configure +++ b/configure @@ -103,7 +103,8 @@ update_cxxflags() { } compile_object() { - do_cc $QEMU_CFLAGS -c -o $TMPO $TMPC + local_cflags="$1" + do_cc $QEMU_CFLAGS $local_cflags -c -o $TMPO $TMPC } compile_prog() { @@ -4209,6 +4210,33 @@ if compile_prog "" "" ; then getauxval=yes fi +######################################## +# check if ccache is interfering with +# semantic analysis of macros + +ccache_cpp2=no +cat > $TMPC << EOF +static const int Z = 1; +#define fn() ({ Z; }) +#define TAUT(X) ((X) == Z) +#define PAREN(X, Y) (X == Y) +#define ID(X) (X) +int main(int argc, char *argv[]) +{ + int x = 0, y = 0; + x = ID(x); + x = fn(); + fn(); + if (PAREN(x, y)) return 0; + if (TAUT(Z)) return 0; + return 0; +} +EOF + +if ! compile_object "-Werror"; then + ccache_cpp2=yes +fi + ########################################## # End of CC checks # After here, no more $cc or $ld runs @@ -5502,6 +5530,10 @@ if test "$numa" = "yes"; then echo "CONFIG_NUMA=y" >> $config_host_mak fi +if test "$ccache_cpp2" = "yes"; then + echo "export CCACHE_CPP2=y" >> $config_host_mak +fi + # build tree in object directory in case the source is not in the current directory DIRS="tests tests/tcg tests/tcg/cris tests/tcg/lm32 tests/libqos tests/qapi-schema tests/tcg/xtensa tests/qemu-iotests" DIRS="$DIRS fsdev" From eaf5fe2dd4ec001d645ff3b343f466457badaa64 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 7 May 2015 17:45:48 +0200 Subject: [PATCH 32/38] block: return EPERM on writes or discards to read-only devices This is the behavior in the operating system, for example Linux's blkdev_write_iter has the following: if (bdev_read_only(I_BDEV(bd_inode))) return -EPERM; This does not apply to opening a device for read/write, when the device only supports read-only operation. In this case any of EACCES, EPERM or EROFS is acceptable depending on why writing is not possible. Signed-off-by: Paolo Bonzini Message-id: 1431013548-22492-1-git-send-email-pbonzini@redhat.com Signed-off-by: Stefan Hajnoczi --- block/io.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/block/io.c b/block/io.c index 1ce62c4fbc..a05ad677d3 100644 --- a/block/io.c +++ b/block/io.c @@ -1205,7 +1205,7 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, return -ENOMEDIUM; } if (bs->read_only) { - return -EACCES; + return -EPERM; } ret = bdrv_check_byte_request(bs, offset, bytes); @@ -2340,7 +2340,7 @@ int coroutine_fn bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, if (ret < 0) { return ret; } else if (bs->read_only) { - return -EROFS; + return -EPERM; } bdrv_reset_dirty(bs, sector_num, nb_sectors); From 4196d2f0308cb1ae13ed450424ab7dfe154acda9 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 12 May 2015 17:30:55 +0300 Subject: [PATCH 33/38] block: minimal bounce buffer alignment The patch introduces new concept: minimal memory alignment for bounce buffers. Original so called "optimal" value is actually minimal required value for aligment. It should be used for validation that the IOVec is properly aligned and bounce buffer is not required. Though, from the performance point of view, it would be better if bounce buffer or IOVec allocated by QEMU will be aligned stricter. The patch does not change any alignment value yet. Signed-off-by: Denis V. Lunev Reviewed-by: Kevin Wolf Message-id: 1431441056-26198-2-git-send-email-den@openvz.org CC: Paolo Bonzini Reviewed-by: Kevin Wolf CC: Stefan Hajnoczi Signed-off-by: Stefan Hajnoczi --- block.c | 11 +++++++++++ block/io.c | 7 ++++++- block/raw-posix.c | 1 + include/block/block.h | 2 ++ include/block/block_int.h | 3 +++ 5 files changed, 23 insertions(+), 1 deletion(-) diff --git a/block.c b/block.c index 7904098c64..e293907c2b 100644 --- a/block.c +++ b/block.c @@ -113,6 +113,16 @@ size_t bdrv_opt_mem_align(BlockDriverState *bs) return bs->bl.opt_mem_alignment; } +size_t bdrv_min_mem_align(BlockDriverState *bs) +{ + if (!bs || !bs->drv) { + /* 4k should be on the safe side */ + return 4096; + } + + return bs->bl.min_mem_alignment; +} + /* check if the path starts with ":" */ int path_has_protocol(const char *path) { @@ -890,6 +900,7 @@ static int bdrv_open_common(BlockDriverState *bs, BlockDriverState *file, } assert(bdrv_opt_mem_align(bs) != 0); + assert(bdrv_min_mem_align(bs) != 0); assert((bs->request_alignment != 0) || bs->sg); return 0; diff --git a/block/io.c b/block/io.c index a05ad677d3..e6c363921d 100644 --- a/block/io.c +++ b/block/io.c @@ -201,8 +201,10 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp) } bs->bl.opt_transfer_length = bs->file->bl.opt_transfer_length; bs->bl.max_transfer_length = bs->file->bl.max_transfer_length; + bs->bl.min_mem_alignment = bs->file->bl.min_mem_alignment; bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment; } else { + bs->bl.min_mem_alignment = 512; bs->bl.opt_mem_alignment = 512; } @@ -221,6 +223,9 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp) bs->bl.opt_mem_alignment = MAX(bs->bl.opt_mem_alignment, bs->backing_hd->bl.opt_mem_alignment); + bs->bl.min_mem_alignment = + MAX(bs->bl.min_mem_alignment, + bs->backing_hd->bl.min_mem_alignment); } /* Then let the driver override it */ @@ -2489,7 +2494,7 @@ void *qemu_try_blockalign0(BlockDriverState *bs, size_t size) bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov) { int i; - size_t alignment = bdrv_opt_mem_align(bs); + size_t alignment = bdrv_min_mem_align(bs); for (i = 0; i < qiov->niov; i++) { if ((uintptr_t) qiov->iov[i].iov_base % alignment) { diff --git a/block/raw-posix.c b/block/raw-posix.c index 24d85826c4..70839245fd 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -725,6 +725,7 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) BDRVRawState *s = bs->opaque; raw_probe_alignment(bs, s->fd, errp); + bs->bl.min_mem_alignment = s->buf_align; bs->bl.opt_mem_alignment = s->buf_align; } diff --git a/include/block/block.h b/include/block/block.h index 7d1a7174f6..c1c963eb5c 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -440,6 +440,8 @@ void bdrv_img_create(const char *filename, const char *fmt, /* Returns the alignment in bytes that is required so that no bounce buffer * is required throughout the stack */ +size_t bdrv_min_mem_align(BlockDriverState *bs); +/* Returns optimal alignment in bytes for bounce buffer */ size_t bdrv_opt_mem_align(BlockDriverState *bs); void bdrv_set_guest_block_size(BlockDriverState *bs, int align); void *qemu_blockalign(BlockDriverState *bs, size_t size); diff --git a/include/block/block_int.h b/include/block/block_int.h index db29b7424e..f004378d58 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -313,6 +313,9 @@ typedef struct BlockLimits { int max_transfer_length; /* memory alignment so that no bounce buffer is needed */ + size_t min_mem_alignment; + + /* memory alignment for bounce buffer */ size_t opt_mem_alignment; } BlockLimits; From 459b4e66129d091a11e9886ecc15a8bf9f7f3d92 Mon Sep 17 00:00:00 2001 From: "Denis V. Lunev" Date: Tue, 12 May 2015 17:30:56 +0300 Subject: [PATCH 34/38] block: align bounce buffers to page The following sequence int fd = open(argv[1], O_RDWR | O_CREAT | O_DIRECT, 0644); for (i = 0; i < 100000; i++) write(fd, buf, 4096); performs 5% better if buf is aligned to 4096 bytes. The difference is quite reliable. On the other hand we do not want at the moment to enforce bounce buffering if guest request is aligned to 512 bytes. The patch changes default bounce buffer optimal alignment to MAX(page size, 4k). 4k is chosen as maximal known sector size on real HDD. The justification of the performance improve is quite interesting. From the kernel point of view each request to the disk was split by two. This could be seen by blktrace like this: 9,0 11 1 0.000000000 11151 Q WS 312737792 + 1023 [qemu-img] 9,0 11 2 0.000007938 11151 Q WS 312738815 + 8 [qemu-img] 9,0 11 3 0.000030735 11151 Q WS 312738823 + 1016 [qemu-img] 9,0 11 4 0.000032482 11151 Q WS 312739839 + 8 [qemu-img] 9,0 11 5 0.000041379 11151 Q WS 312739847 + 1016 [qemu-img] 9,0 11 6 0.000042818 11151 Q WS 312740863 + 8 [qemu-img] 9,0 11 7 0.000051236 11151 Q WS 312740871 + 1017 [qemu-img] 9,0 5 1 0.169071519 11151 Q WS 312741888 + 1023 [qemu-img] After the patch the pattern becomes normal: 9,0 6 1 0.000000000 12422 Q WS 314834944 + 1024 [qemu-img] 9,0 6 2 0.000038527 12422 Q WS 314835968 + 1024 [qemu-img] 9,0 6 3 0.000072849 12422 Q WS 314836992 + 1024 [qemu-img] 9,0 6 4 0.000106276 12422 Q WS 314838016 + 1024 [qemu-img] and the amount of requests sent to disk (could be calculated counting number of lines in the output of blktrace) is reduced about 2 times. Both qemu-img and qemu-io are affected while qemu-kvm is not. The guest does his job well and real requests comes properly aligned (to page). Signed-off-by: Denis V. Lunev Reviewed-by: Kevin Wolf Message-id: 1431441056-26198-3-git-send-email-den@openvz.org CC: Paolo Bonzini CC: Kevin Wolf CC: Stefan Hajnoczi Signed-off-by: Stefan Hajnoczi --- block.c | 8 ++++---- block/io.c | 2 +- block/raw-posix.c | 13 +++++++------ 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/block.c b/block.c index e293907c2b..325f7272fe 100644 --- a/block.c +++ b/block.c @@ -106,8 +106,8 @@ int is_windows_drive(const char *filename) size_t bdrv_opt_mem_align(BlockDriverState *bs) { if (!bs || !bs->drv) { - /* 4k should be on the safe side */ - return 4096; + /* page size or 4k (hdd sector size) should be on the safe side */ + return MAX(4096, getpagesize()); } return bs->bl.opt_mem_alignment; @@ -116,8 +116,8 @@ size_t bdrv_opt_mem_align(BlockDriverState *bs) size_t bdrv_min_mem_align(BlockDriverState *bs) { if (!bs || !bs->drv) { - /* 4k should be on the safe side */ - return 4096; + /* page size or 4k (hdd sector size) should be on the safe side */ + return MAX(4096, getpagesize()); } return bs->bl.min_mem_alignment; diff --git a/block/io.c b/block/io.c index e6c363921d..b3ea95c74f 100644 --- a/block/io.c +++ b/block/io.c @@ -205,7 +205,7 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp) bs->bl.opt_mem_alignment = bs->file->bl.opt_mem_alignment; } else { bs->bl.min_mem_alignment = 512; - bs->bl.opt_mem_alignment = 512; + bs->bl.opt_mem_alignment = getpagesize(); } if (bs->backing_hd) { diff --git a/block/raw-posix.c b/block/raw-posix.c index 70839245fd..2990e954ae 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -301,6 +301,7 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp) { BDRVRawState *s = bs->opaque; char *buf; + size_t max_align = MAX(MAX_BLOCKSIZE, getpagesize()); /* For /dev/sg devices the alignment is not really used. With buffered I/O, we don't have any restrictions. */ @@ -330,9 +331,9 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp) /* If we could not get the sizes so far, we can only guess them */ if (!s->buf_align) { size_t align; - buf = qemu_memalign(MAX_BLOCKSIZE, 2 * MAX_BLOCKSIZE); - for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) { - if (raw_is_io_aligned(fd, buf + align, MAX_BLOCKSIZE)) { + buf = qemu_memalign(max_align, 2 * max_align); + for (align = 512; align <= max_align; align <<= 1) { + if (raw_is_io_aligned(fd, buf + align, max_align)) { s->buf_align = align; break; } @@ -342,8 +343,8 @@ static void raw_probe_alignment(BlockDriverState *bs, int fd, Error **errp) if (!bs->request_alignment) { size_t align; - buf = qemu_memalign(s->buf_align, MAX_BLOCKSIZE); - for (align = 512; align <= MAX_BLOCKSIZE; align <<= 1) { + buf = qemu_memalign(s->buf_align, max_align); + for (align = 512; align <= max_align; align <<= 1) { if (raw_is_io_aligned(fd, buf, align)) { bs->request_alignment = align; break; @@ -726,7 +727,7 @@ static void raw_refresh_limits(BlockDriverState *bs, Error **errp) raw_probe_alignment(bs, s->fd, errp); bs->bl.min_mem_alignment = s->buf_align; - bs->bl.opt_mem_alignment = s->buf_align; + bs->bl.opt_mem_alignment = MAX(s->buf_align, getpagesize()); } static int check_for_dasd(int fd) From d01c07f2221ca39ab2dd9e55932d99db98103b30 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 13 May 2015 13:11:59 +0000 Subject: [PATCH 35/38] Revert "block: Fix unaligned zero write" This reverts commit fc3959e4669a1c2149b91ccb05101cfc7ae1fc05. The core write code already handles the case, so remove this duplication. Because commit 61007b316 moved the touched code from block.c to block/io.c, the change is manually reverted. Signed-off-by: Fam Zheng Reviewed-by: Stefan Hajnoczi Reviewed-by: Kevin Wolf Message-id: 1431522721-3266-2-git-send-email-famz@redhat.com Signed-off-by: Stefan Hajnoczi --- block/io.c | 45 ++++++--------------------------------------- 1 file changed, 6 insertions(+), 39 deletions(-) diff --git a/block/io.c b/block/io.c index b3ea95c74f..ad877b0aeb 100644 --- a/block/io.c +++ b/block/io.c @@ -934,19 +934,6 @@ out: return ret; } -static inline uint64_t bdrv_get_align(BlockDriverState *bs) -{ - /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */ - return MAX(BDRV_SECTOR_SIZE, bs->request_alignment); -} - -static inline bool bdrv_req_is_aligned(BlockDriverState *bs, - int64_t offset, size_t bytes) -{ - int64_t align = bdrv_get_align(bs); - return !(offset & (align - 1) || (bytes & (align - 1))); -} - /* * Handle a read request in coroutine context */ @@ -957,7 +944,8 @@ static int coroutine_fn bdrv_co_do_preadv(BlockDriverState *bs, BlockDriver *drv = bs->drv; BdrvTrackedRequest req; - uint64_t align = bdrv_get_align(bs); + /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */ + uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment); uint8_t *head_buf = NULL; uint8_t *tail_buf = NULL; QEMUIOVector local_qiov; @@ -1199,7 +1187,8 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, BdrvRequestFlags flags) { BdrvTrackedRequest req; - uint64_t align = bdrv_get_align(bs); + /* TODO Lift BDRV_SECTOR_SIZE restriction in BlockDriver interface */ + uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment); uint8_t *head_buf = NULL; uint8_t *tail_buf = NULL; QEMUIOVector local_qiov; @@ -1298,10 +1287,6 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, bytes = ROUND_UP(bytes, align); } - if (use_local_qiov) { - /* Local buffer may have non-zero data. */ - flags &= ~BDRV_REQ_ZERO_WRITE; - } ret = bdrv_aligned_pwritev(bs, &req, offset, bytes, use_local_qiov ? &local_qiov : qiov, flags); @@ -1342,32 +1327,14 @@ int coroutine_fn bdrv_co_write_zeroes(BlockDriverState *bs, int64_t sector_num, int nb_sectors, BdrvRequestFlags flags) { - int ret; - trace_bdrv_co_write_zeroes(bs, sector_num, nb_sectors, flags); if (!(bs->open_flags & BDRV_O_UNMAP)) { flags &= ~BDRV_REQ_MAY_UNMAP; } - if (bdrv_req_is_aligned(bs, sector_num << BDRV_SECTOR_BITS, - nb_sectors << BDRV_SECTOR_BITS)) { - ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL, - BDRV_REQ_ZERO_WRITE | flags); - } else { - uint8_t *buf; - QEMUIOVector local_qiov; - size_t bytes = nb_sectors << BDRV_SECTOR_BITS; - buf = qemu_memalign(bdrv_opt_mem_align(bs), bytes); - memset(buf, 0, bytes); - qemu_iovec_init(&local_qiov, 1); - qemu_iovec_add(&local_qiov, buf, bytes); - - ret = bdrv_co_do_writev(bs, sector_num, nb_sectors, &local_qiov, - BDRV_REQ_ZERO_WRITE | flags); - qemu_vfree(buf); - } - return ret; + return bdrv_co_do_writev(bs, sector_num, nb_sectors, NULL, + BDRV_REQ_ZERO_WRITE | flags); } int bdrv_flush_all(void) From 9eeb6dd1b27bd57eb4e3869290e87feac8e8b226 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 13 May 2015 13:12:00 +0000 Subject: [PATCH 36/38] block: Fix NULL deference for unaligned write if qiov is NULL For zero write, callers pass in NULL qiov (qemu-io "write -z" or scsi-disk "write same"). Commit fc3959e466 fixed bdrv_co_write_zeroes which is the common case for this bug, but it still exists in bdrv_aio_write_zeroes. A simpler fix would be in bdrv_co_do_pwritev which is the NULL dereference point and covers both cases. So don't access it in bdrv_co_do_pwritev in this case, use three aligned writes. [Initialize ret to 0 in bdrv_co_do_zero_pwritev() to avoid uninitialized variable warning with gcc 4.9.2. --Stefan] Signed-off-by: Fam Zheng Message-id: 1431522721-3266-3-git-send-email-famz@redhat.com Signed-off-by: Stefan Hajnoczi --- block/io.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 95 insertions(+), 2 deletions(-) diff --git a/block/io.c b/block/io.c index ad877b0aeb..284784eea1 100644 --- a/block/io.c +++ b/block/io.c @@ -1179,6 +1179,94 @@ static int coroutine_fn bdrv_aligned_pwritev(BlockDriverState *bs, return ret; } +static int coroutine_fn bdrv_co_do_zero_pwritev(BlockDriverState *bs, + int64_t offset, + unsigned int bytes, + BdrvRequestFlags flags, + BdrvTrackedRequest *req) +{ + uint8_t *buf = NULL; + QEMUIOVector local_qiov; + struct iovec iov; + uint64_t align = MAX(BDRV_SECTOR_SIZE, bs->request_alignment); + unsigned int head_padding_bytes, tail_padding_bytes; + int ret = 0; + + head_padding_bytes = offset & (align - 1); + tail_padding_bytes = align - ((offset + bytes) & (align - 1)); + + + assert(flags & BDRV_REQ_ZERO_WRITE); + if (head_padding_bytes || tail_padding_bytes) { + buf = qemu_blockalign(bs, align); + iov = (struct iovec) { + .iov_base = buf, + .iov_len = align, + }; + qemu_iovec_init_external(&local_qiov, &iov, 1); + } + if (head_padding_bytes) { + uint64_t zero_bytes = MIN(bytes, align - head_padding_bytes); + + /* RMW the unaligned part before head. */ + mark_request_serialising(req, align); + wait_serialising_requests(req); + BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_HEAD); + ret = bdrv_aligned_preadv(bs, req, offset & ~(align - 1), align, + align, &local_qiov, 0); + if (ret < 0) { + goto fail; + } + BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_HEAD); + + memset(buf + head_padding_bytes, 0, zero_bytes); + ret = bdrv_aligned_pwritev(bs, req, offset & ~(align - 1), align, + &local_qiov, + flags & ~BDRV_REQ_ZERO_WRITE); + if (ret < 0) { + goto fail; + } + offset += zero_bytes; + bytes -= zero_bytes; + } + + assert(!bytes || (offset & (align - 1)) == 0); + if (bytes >= align) { + /* Write the aligned part in the middle. */ + uint64_t aligned_bytes = bytes & ~(align - 1); + ret = bdrv_aligned_pwritev(bs, req, offset, aligned_bytes, + NULL, flags); + if (ret < 0) { + goto fail; + } + bytes -= aligned_bytes; + offset += aligned_bytes; + } + + assert(!bytes || (offset & (align - 1)) == 0); + if (bytes) { + assert(align == tail_padding_bytes + bytes); + /* RMW the unaligned part after tail. */ + mark_request_serialising(req, align); + wait_serialising_requests(req); + BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_TAIL); + ret = bdrv_aligned_preadv(bs, req, offset, align, + align, &local_qiov, 0); + if (ret < 0) { + goto fail; + } + BLKDBG_EVENT(bs, BLKDBG_PWRITEV_RMW_AFTER_TAIL); + + memset(buf, 0, bytes); + ret = bdrv_aligned_pwritev(bs, req, offset, align, + &local_qiov, flags & ~BDRV_REQ_ZERO_WRITE); + } +fail: + qemu_vfree(buf); + return ret; + +} + /* * Handle a write request in coroutine context */ @@ -1219,6 +1307,11 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, */ tracked_request_begin(&req, bs, offset, bytes, true); + if (!qiov) { + ret = bdrv_co_do_zero_pwritev(bs, offset, bytes, flags, &req); + goto out; + } + if (offset & (align - 1)) { QEMUIOVector head_qiov; struct iovec head_iov; @@ -1292,14 +1385,14 @@ static int coroutine_fn bdrv_co_do_pwritev(BlockDriverState *bs, flags); fail: - tracked_request_end(&req); if (use_local_qiov) { qemu_iovec_destroy(&local_qiov); } qemu_vfree(head_buf); qemu_vfree(tail_buf); - +out: + tracked_request_end(&req); return ret; } From ab53c44718305d3fde3d9d2251889f1cab694be2 Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 13 May 2015 13:12:01 +0000 Subject: [PATCH 37/38] qemu-iotests: Test unaligned sub-block zero write Test zero write in byte range 512~1024 for 4k alignment. Signed-off-by: Fam Zheng Reviewed-by: Stefan Hajnoczi Reviewed-by: Kevin Wolf Message-id: 1431522721-3266-4-git-send-email-famz@redhat.com Signed-off-by: Stefan Hajnoczi --- tests/qemu-iotests/033 | 13 +++++++++++++ tests/qemu-iotests/033.out | 30 ++++++++++++++++++++++++++++++ 2 files changed, 43 insertions(+) diff --git a/tests/qemu-iotests/033 b/tests/qemu-iotests/033 index 4008f103db..a61d8ced1c 100755 --- a/tests/qemu-iotests/033 +++ b/tests/qemu-iotests/033 @@ -78,6 +78,19 @@ for align in 512 4k; do echo echo "== verifying patterns (2) ==" do_test $align "read -P 0x0 0x400 0x20000" "$TEST_IMG" | _filter_qemu_io + + echo + echo "== rewriting unaligned zeroes ==" + do_test $align "write -P 0xb 0x0 0x1000" "$TEST_IMG" | _filter_qemu_io + do_test $align "write -z 0x200 0x200" "$TEST_IMG" | _filter_qemu_io + + echo + echo "== verifying patterns (3) ==" + do_test $align "read -P 0xb 0x0 0x200" "$TEST_IMG" | _filter_qemu_io + do_test $align "read -P 0x0 0x200 0x200" "$TEST_IMG" | _filter_qemu_io + do_test $align "read -P 0xb 0x400 0xc00" "$TEST_IMG" | _filter_qemu_io + + echo done # success, all done diff --git a/tests/qemu-iotests/033.out b/tests/qemu-iotests/033.out index 305949fa43..c3d18aa450 100644 --- a/tests/qemu-iotests/033.out +++ b/tests/qemu-iotests/033.out @@ -27,6 +27,21 @@ wrote 65536/65536 bytes at offset 65536 read 131072/131072 bytes at offset 1024 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +== rewriting unaligned zeroes == +wrote 4096/4096 bytes at offset 0 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 512 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== verifying patterns (3) == +read 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 512 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 3072/3072 bytes at offset 1024 +3 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + + == preparing image == wrote 1024/1024 bytes at offset 512 1 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) @@ -52,4 +67,19 @@ wrote 65536/65536 bytes at offset 65536 == verifying patterns (2) == read 131072/131072 bytes at offset 1024 128 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== rewriting unaligned zeroes == +wrote 4096/4096 bytes at offset 0 +4 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +wrote 512/512 bytes at offset 512 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + +== verifying patterns (3) == +read 512/512 bytes at offset 0 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 512/512 bytes at offset 512 +512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) +read 3072/3072 bytes at offset 1024 +3 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) + *** done From a53f1a95f9605f300fbafbc8b60b8a8c67e9c4b4 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 14 May 2015 12:35:02 +0200 Subject: [PATCH 38/38] block: get_block_status: use "else" when testing the opposite condition A bit of Boolean algebra (and common sense) tells us that the second "if" here is looking for blocks that are not allocated. This is the opposite of the "if" that sets BDRV_BLOCK_ALLOCATED, and thus it can use an "else". Signed-off-by: Paolo Bonzini Reviewed-by: Eric Blake Reviewed-by: Fam Zheng Message-id: 1431599702-10431-1-git-send-email-pbonzini@redhat.com Signed-off-by: Stefan Hajnoczi --- block/io.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/block/io.c b/block/io.c index 284784eea1..e394d92626 100644 --- a/block/io.c +++ b/block/io.c @@ -1521,9 +1521,7 @@ static int64_t coroutine_fn bdrv_co_get_block_status(BlockDriverState *bs, if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) { ret |= BDRV_BLOCK_ALLOCATED; - } - - if (!(ret & BDRV_BLOCK_DATA) && !(ret & BDRV_BLOCK_ZERO)) { + } else { if (bdrv_unallocated_blocks_are_zero(bs)) { ret |= BDRV_BLOCK_ZERO; } else if (bs->backing_hd) {