From a66c4b83c99c06626bed19ec0bf609bfe87a8cf3 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Wed, 20 Mar 2019 14:59:37 +0300 Subject: [PATCH 1/7] iotests: add 248: test resume mirror after auto pause on ENOSPC Test that mirror job actually resume on resume command after being automatically paused on ENOSPC error. It's a follow-up test for 8d9648cbf3e "blockjob: fix user pause in block_job_error_action" Signed-off-by: Vladimir Sementsov-Ogievskiy Tested-by: John Snow Reviewed-by: John Snow Signed-off-by: Kevin Wolf --- tests/qemu-iotests/248 | 71 ++++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/248.out | 8 +++++ tests/qemu-iotests/group | 1 + 3 files changed, 80 insertions(+) create mode 100755 tests/qemu-iotests/248 create mode 100644 tests/qemu-iotests/248.out diff --git a/tests/qemu-iotests/248 b/tests/qemu-iotests/248 new file mode 100755 index 0000000000..f26b4bb2aa --- /dev/null +++ b/tests/qemu-iotests/248 @@ -0,0 +1,71 @@ +#!/usr/bin/env python +# +# Test resume mirror after auto pause on ENOSPC +# +# Copyright (c) 2019 Virtuozzo International GmbH. All rights reserved. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +import iotests +from iotests import qemu_img_create, qemu_io, file_path, filter_qmp_testfiles + +iotests.verify_image_format(supported_fmts=['qcow2']) + +source, target = file_path('source', 'target') +size = 5 * 1024 * 1024 +limit = 2 * 1024 * 1024 + +qemu_img_create('-f', iotests.imgfmt, source, str(size)) +qemu_img_create('-f', iotests.imgfmt, target, str(size)) +qemu_io('-c', 'write 0 {}'.format(size), source) + +# raw format don't like empty files +qemu_io('-c', 'write 0 {}'.format(size), target) + +vm = iotests.VM().add_drive(source) +vm.launch() + +blockdev_opts = { + 'driver': iotests.imgfmt, + 'node-name': 'target', + 'file': { + 'driver': 'raw', + 'size': limit, + 'file': { + 'driver': 'file', + 'filename': target + } + } +} +vm.qmp_log('blockdev-add', filters=[filter_qmp_testfiles], **blockdev_opts) + +vm.qmp_log('blockdev-mirror', device='drive0', sync='full', target='target', + on_target_error='enospc') + +vm.event_wait('JOB_STATUS_CHANGE', timeout=3.0, + match={'data': {'status': 'paused'}}) + +# drop other cached events, to not interfere with further wait for 'running' +vm.get_qmp_events() + +del blockdev_opts['file']['size'] +vm.qmp_log('x-blockdev-reopen', filters=[filter_qmp_testfiles], + **blockdev_opts) + +vm.qmp_log('block-job-resume', device='drive0') +vm.event_wait('JOB_STATUS_CHANGE', timeout=1.0, + match={'data': {'status': 'running'}}) + +vm.shutdown() diff --git a/tests/qemu-iotests/248.out b/tests/qemu-iotests/248.out new file mode 100644 index 0000000000..369b25bf26 --- /dev/null +++ b/tests/qemu-iotests/248.out @@ -0,0 +1,8 @@ +{"execute": "blockdev-add", "arguments": {"driver": "qcow2", "file": {"driver": "raw", "file": {"driver": "file", "filename": "TEST_DIR/PID-target"}, "size": 2097152}, "node-name": "target"}} +{"return": {}} +{"execute": "blockdev-mirror", "arguments": {"device": "drive0", "on-target-error": "enospc", "sync": "full", "target": "target"}} +{"return": {}} +{"execute": "x-blockdev-reopen", "arguments": {"driver": "qcow2", "file": {"driver": "raw", "file": {"driver": "file", "filename": "TEST_DIR/PID-target"}}, "node-name": "target"}} +{"return": {}} +{"execute": "block-job-resume", "arguments": {"device": "drive0"}} +{"return": {}} diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index d192abaecf..41da10c6cf 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -246,3 +246,4 @@ 245 rw auto 246 rw auto quick 247 rw auto quick +248 rw auto quick From 48ce986096bb70354b12f0becb253a06bcf9c434 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 22 Mar 2019 13:53:46 +0100 Subject: [PATCH 2/7] block: Remove error messages in bdrv_make_zero() There is only a single caller of bdrv_make_zero(), which is qemu-img convert. If the function fails, we just fall back to a different method of zeroing out blocks on the target image. There is no good reason to print error messages on stderr when the higher level operation will actually succeed. Signed-off-by: Kevin Wolf Acked-by: Eric Blake --- block/io.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/block/io.c b/block/io.c index 2ba603c7bc..952372c2bb 100644 --- a/block/io.c +++ b/block/io.c @@ -909,8 +909,6 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags) } ret = bdrv_block_status(bs, offset, bytes, &bytes, NULL, NULL); if (ret < 0) { - error_report("error getting block status at offset %" PRId64 ": %s", - offset, strerror(-ret)); return ret; } if (ret & BDRV_BLOCK_ZERO) { @@ -919,8 +917,6 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags) } ret = bdrv_pwrite_zeroes(child, offset, bytes, flags); if (ret < 0) { - error_report("error writing zeroes at offset %" PRId64 ": %s", - offset, strerror(-ret)); return ret; } offset += bytes; From fe0480d6294270ff0d6fb60e66bb725a6aad2043 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 22 Mar 2019 13:38:43 +0100 Subject: [PATCH 3/7] block: Add BDRV_REQ_NO_FALLBACK For qemu-img convert, we want an operation that zeroes out the whole image if this can be done efficiently, but that returns an error otherwise so we don't write explicit zeroes and immediately overwrite them with the real data, potentially doubling the amount of data to be written. Signed-off-by: Kevin Wolf Acked-by: Eric Blake --- block/io.c | 12 +++++++++++- include/block/block.h | 7 ++++++- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/block/io.c b/block/io.c index 952372c2bb..dfc153b8d8 100644 --- a/block/io.c +++ b/block/io.c @@ -1015,6 +1015,7 @@ static int coroutine_fn bdrv_driver_preadv(BlockDriverState *bs, unsigned int nb_sectors; assert(!(flags & ~BDRV_REQ_MASK)); + assert(!(flags & BDRV_REQ_NO_FALLBACK)); if (!drv) { return -ENOMEDIUM; @@ -1061,6 +1062,7 @@ static int coroutine_fn bdrv_driver_pwritev(BlockDriverState *bs, int ret; assert(!(flags & ~BDRV_REQ_MASK)); + assert(!(flags & BDRV_REQ_NO_FALLBACK)); if (!drv) { return -ENOMEDIUM; @@ -1467,6 +1469,10 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, return -ENOMEDIUM; } + if ((flags & ~bs->supported_zero_flags) & BDRV_REQ_NO_FALLBACK) { + return -ENOTSUP; + } + assert(alignment % bs->bl.request_alignment == 0); head = offset % alignment; tail = (offset + bytes) % alignment; @@ -1510,7 +1516,7 @@ static int coroutine_fn bdrv_co_do_pwrite_zeroes(BlockDriverState *bs, assert(!bs->supported_zero_flags); } - if (ret == -ENOTSUP) { + if (ret == -ENOTSUP && !(flags & BDRV_REQ_NO_FALLBACK)) { /* Fall back to bounce buffer if write zeroes is unsupported */ BdrvRequestFlags write_flags = flags & ~BDRV_REQ_ZERO_WRITE; @@ -2949,6 +2955,10 @@ static int coroutine_fn bdrv_co_copy_range_internal( BdrvTrackedRequest req; int ret; + /* TODO We can support BDRV_REQ_NO_FALLBACK here */ + assert(!(read_flags & BDRV_REQ_NO_FALLBACK)); + assert(!(write_flags & BDRV_REQ_NO_FALLBACK)); + if (!dst || !dst->bs) { return -ENOMEDIUM; } diff --git a/include/block/block.h b/include/block/block.h index e452988b66..c7a26199aa 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -83,8 +83,13 @@ typedef enum { */ BDRV_REQ_SERIALISING = 0x80, + /* Execute the request only if the operation can be offloaded or otherwise + * be executed efficiently, but return an error instead of using a slow + * fallback. */ + BDRV_REQ_NO_FALLBACK = 0x100, + /* Mask of valid flags */ - BDRV_REQ_MASK = 0xff, + BDRV_REQ_MASK = 0x1ff, } BdrvRequestFlags; typedef struct BlockSizes { From 80f5c33ff31eb9333f5036ee278fb1483fb4ff41 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 22 Mar 2019 13:42:39 +0100 Subject: [PATCH 4/7] block: Advertise BDRV_REQ_NO_FALLBACK in filter drivers Filter drivers that support .bdrv_co_pwrite_zeroes can safely advertise BDRV_REQ_NO_FALLBACK because they just forward the request flags to their child node. Signed-off-by: Kevin Wolf Acked-by: Eric Blake --- block/blkdebug.c | 2 +- block/copy-on-read.c | 7 +++---- block/mirror.c | 3 ++- block/raw-format.c | 2 +- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/block/blkdebug.c b/block/blkdebug.c index 1ea835c2b9..efd9441625 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -401,7 +401,7 @@ static int blkdebug_open(BlockDriverState *bs, QDict *options, int flags, bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED | (BDRV_REQ_FUA & bs->file->bs->supported_write_flags); bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | - ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) & + ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) & bs->file->bs->supported_zero_flags); ret = -EINVAL; diff --git a/block/copy-on-read.c b/block/copy-on-read.c index d670fec42b..53972b1da3 100644 --- a/block/copy-on-read.c +++ b/block/copy-on-read.c @@ -34,12 +34,11 @@ static int cor_open(BlockDriverState *bs, QDict *options, int flags, } bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED | - (BDRV_REQ_FUA & - bs->file->bs->supported_write_flags); + (BDRV_REQ_FUA & bs->file->bs->supported_write_flags); bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | - ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) & - bs->file->bs->supported_zero_flags); + ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) & + bs->file->bs->supported_zero_flags); return 0; } diff --git a/block/mirror.c b/block/mirror.c index eb9a4cdf56..ff15cfb197 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -1548,7 +1548,8 @@ static void mirror_start_job(const char *job_id, BlockDriverState *bs, } mirror_top_bs->total_sectors = bs->total_sectors; mirror_top_bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED; - mirror_top_bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED; + mirror_top_bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | + BDRV_REQ_NO_FALLBACK; bs_opaque = g_new0(MirrorBDSOpaque, 1); mirror_top_bs->opaque = bs_opaque; bdrv_set_aio_context(mirror_top_bs, bdrv_get_aio_context(bs)); diff --git a/block/raw-format.c b/block/raw-format.c index cec29986cc..385cdc2490 100644 --- a/block/raw-format.c +++ b/block/raw-format.c @@ -434,7 +434,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, bs->supported_write_flags = BDRV_REQ_WRITE_UNCHANGED | (BDRV_REQ_FUA & bs->file->bs->supported_write_flags); bs->supported_zero_flags = BDRV_REQ_WRITE_UNCHANGED | - ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP) & + ((BDRV_REQ_FUA | BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK) & bs->file->bs->supported_zero_flags); if (bs->probed && !bdrv_is_read_only(bs)) { From 738301e11758171defaa5a5237d584f8226af89f Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 22 Mar 2019 13:45:23 +0100 Subject: [PATCH 5/7] file-posix: Support BDRV_REQ_NO_FALLBACK for zero writes We know that the kernel implements a slow fallback code path for BLKZEROOUT, so if BDRV_REQ_NO_FALLBACK is given, we shouldn't call it. The other operations we call in the context of .bdrv_co_pwrite_zeroes should usually be quick, so no modification should be needed for them. If we ever notice that there are additional problematic cases, we can still make these conditional as well. Signed-off-by: Kevin Wolf Acked-by: Eric Blake --- block/file-posix.c | 24 ++++++++++++++++-------- include/block/raw-aio.h | 1 + 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/block/file-posix.c b/block/file-posix.c index d102f3b222..db4cccbe51 100644 --- a/block/file-posix.c +++ b/block/file-posix.c @@ -652,7 +652,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, } #endif - bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP; + bs->supported_zero_flags = BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK; ret = 0; fail: if (filename && (bdrv_flags & BDRV_O_TEMPORARY)) { @@ -1500,14 +1500,19 @@ static ssize_t handle_aiocb_write_zeroes_block(RawPosixAIOData *aiocb) } #ifdef BLKZEROOUT - do { - uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes }; - if (ioctl(aiocb->aio_fildes, BLKZEROOUT, range) == 0) { - return 0; - } - } while (errno == EINTR); + /* The BLKZEROOUT implementation in the kernel doesn't set + * BLKDEV_ZERO_NOFALLBACK, so we can't call this if we have to avoid slow + * fallbacks. */ + if (!(aiocb->aio_type & QEMU_AIO_NO_FALLBACK)) { + do { + uint64_t range[2] = { aiocb->aio_offset, aiocb->aio_nbytes }; + if (ioctl(aiocb->aio_fildes, BLKZEROOUT, range) == 0) { + return 0; + } + } while (errno == EINTR); - ret = translate_err(-errno); + ret = translate_err(-errno); + } #endif if (ret == -ENOTSUP) { @@ -2659,6 +2664,9 @@ raw_do_pwrite_zeroes(BlockDriverState *bs, int64_t offset, int bytes, if (blkdev) { acb.aio_type |= QEMU_AIO_BLKDEV; } + if (flags & BDRV_REQ_NO_FALLBACK) { + acb.aio_type |= QEMU_AIO_NO_FALLBACK; + } if (flags & BDRV_REQ_MAY_UNMAP) { acb.aio_type |= QEMU_AIO_DISCARD; diff --git a/include/block/raw-aio.h b/include/block/raw-aio.h index 6799614e56..ba223dd1f1 100644 --- a/include/block/raw-aio.h +++ b/include/block/raw-aio.h @@ -40,6 +40,7 @@ /* AIO flags */ #define QEMU_AIO_MISALIGNED 0x1000 #define QEMU_AIO_BLKDEV 0x2000 +#define QEMU_AIO_NO_FALLBACK 0x4000 /* linux-aio.c - Linux native implementation */ From c9fdcf202f19fc2acdcb1ee0522ff5d61bf8c906 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 22 Mar 2019 13:49:28 +0100 Subject: [PATCH 6/7] qemu-img: Use BDRV_REQ_NO_FALLBACK for pre-zeroing If qemu-img convert sees that the target image isn't zero-initialised yet, it tries to do an efficient zero write for the whole image first to save the overhead of repeated explicit zero writes during the conversion. Obviously, this provides only an advantage if the pre-zeroing is actually efficient. Otherwise, we can end up writing zeroes slowly while zeroing out the whole image, and then overwrite the same blocks again with real data, potentially doubling the written data. Pass BDRV_REQ_NO_FALLBACK to blk_make_zero() to avoid this case. If we can't efficiently zero out, we'll instead write explicit zeroes only if there is no data to be written to a block. Signed-off-by: Kevin Wolf Acked-by: Eric Blake --- qemu-img.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/qemu-img.c b/qemu-img.c index 5fac840742..8ee63daeae 100644 --- a/qemu-img.c +++ b/qemu-img.c @@ -1932,7 +1932,7 @@ static int convert_do_copy(ImgConvertState *s) if (!s->has_zero_init && !s->target_has_backing && bdrv_can_write_zeroes_with_unmap(blk_bs(s->target))) { - ret = blk_make_zero(s->target, BDRV_REQ_MAY_UNMAP); + ret = blk_make_zero(s->target, BDRV_REQ_MAY_UNMAP | BDRV_REQ_NO_FALLBACK); if (ret == 0) { s->has_zero_init = true; } From c6e3f520c802c5cb2de80576aba7f9f1fe985d8b Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 22 Mar 2019 13:53:03 +0100 Subject: [PATCH 7/7] qemu-io: Add write -n for BDRV_REQ_NO_FALLBACK This makes the new BDRV_REQ_NO_FALLBACK flag available in the qemu-io write command. Signed-off-by: Kevin Wolf Acked-by: Eric Blake --- qemu-io-cmds.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/qemu-io-cmds.c b/qemu-io-cmds.c index 35dcdcf413..09750a23ce 100644 --- a/qemu-io-cmds.c +++ b/qemu-io-cmds.c @@ -946,6 +946,7 @@ static void write_help(void) " -b, -- write to the VM state rather than the virtual disk\n" " -c, -- write compressed data with blk_write_compressed\n" " -f, -- use Force Unit Access semantics\n" +" -n, -- with -z, don't allow slow fallback\n" " -p, -- ignored for backwards compatibility\n" " -P, -- use different pattern to fill file\n" " -C, -- report statistics in a machine parsable format\n" @@ -964,7 +965,7 @@ static const cmdinfo_t write_cmd = { .perm = BLK_PERM_WRITE, .argmin = 2, .argmax = -1, - .args = "[-bcCfquz] [-P pattern] off len", + .args = "[-bcCfnquz] [-P pattern] off len", .oneline = "writes a number of bytes at a specified offset", .help = write_help, }; @@ -983,7 +984,7 @@ static int write_f(BlockBackend *blk, int argc, char **argv) int64_t total = 0; int pattern = 0xcd; - while ((c = getopt(argc, argv, "bcCfpP:quz")) != -1) { + while ((c = getopt(argc, argv, "bcCfnpP:quz")) != -1) { switch (c) { case 'b': bflag = true; @@ -997,6 +998,9 @@ static int write_f(BlockBackend *blk, int argc, char **argv) case 'f': flags |= BDRV_REQ_FUA; break; + case 'n': + flags |= BDRV_REQ_NO_FALLBACK; + break; case 'p': /* Ignored for backwards compatibility */ break; @@ -1037,6 +1041,11 @@ static int write_f(BlockBackend *blk, int argc, char **argv) return -EINVAL; } + if ((flags & BDRV_REQ_NO_FALLBACK) && !zflag) { + printf("-n requires -z to be specified\n"); + return -EINVAL; + } + if ((flags & BDRV_REQ_MAY_UNMAP) && !zflag) { printf("-u requires -z to be specified\n"); return -EINVAL;