From 4085f5c7a239567a292876f46cb59d9b19bcf6ac Mon Sep 17 00:00:00 2001 From: John Snow Date: Thu, 22 Sep 2016 21:45:50 -0400 Subject: [PATCH 01/17] block: reintroduce bdrv_flush_all Commit fe1a9cbc moved the flush_all routine from the bdrv layer to the block-backend layer. In doing so, however, the semantics of the routine changed slightly such that flush_all now used blk_flush instead of bdrv_flush. blk_flush can fail if the attached device model reports that it is not "available," (i.e. the tray is open.) This changed the semantics of flush_all such that it can now fail for e.g. open CDROM drives. Reintroduce bdrv_flush_all to regain the old semantics without having to alter the behavior of blk_flush or blk_flush_all, which are already 'doing the right thing.' Signed-off-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Max Reitz Acked-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/io.c | 25 +++++++++++++++++++++++++ include/block/block.h | 1 + 2 files changed, 26 insertions(+) diff --git a/block/io.c b/block/io.c index fdf70807b0..57a2eeb512 100644 --- a/block/io.c +++ b/block/io.c @@ -1619,6 +1619,31 @@ int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset, BDRV_REQ_ZERO_WRITE | flags); } +/* + * Flush ALL BDSes regardless of if they are reachable via a BlkBackend or not. + */ +int bdrv_flush_all(void) +{ + BdrvNextIterator it; + BlockDriverState *bs = NULL; + int result = 0; + + for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) { + AioContext *aio_context = bdrv_get_aio_context(bs); + int ret; + + aio_context_acquire(aio_context); + ret = bdrv_flush(bs); + if (ret < 0 && !result) { + result = ret; + } + aio_context_release(aio_context); + } + + return result; +} + + typedef struct BdrvCoGetBlockStatusData { BlockDriverState *bs; BlockDriverState *base; diff --git a/include/block/block.h b/include/block/block.h index e18233afe0..811b060f41 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -333,6 +333,7 @@ int bdrv_inactivate_all(void); /* Ensure contents are flushed to disk. */ int bdrv_flush(BlockDriverState *bs); int coroutine_fn bdrv_co_flush(BlockDriverState *bs); +int bdrv_flush_all(void); void bdrv_close_all(void); void bdrv_drain(BlockDriverState *bs); void coroutine_fn bdrv_co_drain(BlockDriverState *bs); From 22af08eacf6b5aa0e6c0581e547380b3eb4f95e9 Mon Sep 17 00:00:00 2001 From: John Snow Date: Thu, 22 Sep 2016 21:45:51 -0400 Subject: [PATCH 02/17] qemu: use bdrv_flush_all for vm_stop et al Reimplement bdrv_flush_all for vm_stop. In contrast to blk_flush_all, bdrv_flush_all does not have device model restrictions. This allows us to flush and halt unconditionally without error. This allows us to do things like migrate when we have a device with an open tray, but has a node that may need to be flushed, or nodes that aren't currently attached to any device and need to be flushed. Specifically, this allows us to migrate when we have a CDROM with an open tray. Signed-off-by: John Snow Reviewed-by: Kevin Wolf Reviewed-by: Max Reitz Acked-by: Fam Zheng Signed-off-by: Kevin Wolf --- cpus.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpus.c b/cpus.c index b2fbe33304..31204bb4b3 100644 --- a/cpus.c +++ b/cpus.c @@ -751,7 +751,7 @@ static int do_vm_stop(RunState state) bdrv_drain_all(); replay_disable_events(); - ret = blk_flush_all(); + ret = bdrv_flush_all(); return ret; } @@ -1408,7 +1408,7 @@ int vm_stop_force_state(RunState state) bdrv_drain_all(); /* Make sure to return an error if the flush in a previous vm_stop() * failed. */ - return blk_flush_all(); + return bdrv_flush_all(); } } From 49137bf6845eaecad51a047fc06dd11c56118460 Mon Sep 17 00:00:00 2001 From: John Snow Date: Thu, 22 Sep 2016 21:45:52 -0400 Subject: [PATCH 03/17] block-backend: remove blk_flush_all We can teach Xen to drain and flush each device as it needs to, instead of trying to flush ALL devices. This removes the last user of blk_flush_all. The function is therefore removed under the premise that any new uses of blk_flush_all would be the wrong paradigm: either flush the single device that requires flushing, or use an appropriate flush_all mechanism from outside of the BlkBackend layer. Signed-off-by: John Snow Reviewed-by: Max Reitz Acked-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/block-backend.c | 22 ---------------------- hw/i386/xen/xen_platform.c | 2 -- hw/ide/piix.c | 4 ++++ include/sysemu/block-backend.h | 1 - 4 files changed, 4 insertions(+), 25 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index 0bd19abdfb..f34bad5840 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1640,28 +1640,6 @@ int blk_commit_all(void) return 0; } -int blk_flush_all(void) -{ - BlockBackend *blk = NULL; - int result = 0; - - while ((blk = blk_all_next(blk)) != NULL) { - AioContext *aio_context = blk_get_aio_context(blk); - int ret; - - aio_context_acquire(aio_context); - if (blk_is_inserted(blk)) { - ret = blk_flush(blk); - if (ret < 0 && !result) { - result = ret; - } - } - aio_context_release(aio_context); - } - - return result; -} - /* throttling disk I/O limits */ void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg) diff --git a/hw/i386/xen/xen_platform.c b/hw/i386/xen/xen_platform.c index aa7839324c..f85635cc9a 100644 --- a/hw/i386/xen/xen_platform.c +++ b/hw/i386/xen/xen_platform.c @@ -134,8 +134,6 @@ static void platform_fixed_ioport_writew(void *opaque, uint32_t addr, uint32_t v devices, and bit 2 the non-primary-master IDE devices. */ if (val & UNPLUG_ALL_IDE_DISKS) { DPRINTF("unplug disks\n"); - blk_drain_all(); - blk_flush_all(); pci_unplug_disks(pci_dev->bus); } if (val & UNPLUG_ALL_NICS) { diff --git a/hw/ide/piix.c b/hw/ide/piix.c index c190fcaa3c..d5777fd0b3 100644 --- a/hw/ide/piix.c +++ b/hw/ide/piix.c @@ -179,6 +179,10 @@ int pci_piix3_xen_ide_unplug(DeviceState *dev) if (di != NULL && !di->media_cd) { BlockBackend *blk = blk_by_legacy_dinfo(di); DeviceState *ds = blk_get_attached_dev(blk); + + blk_drain(blk); + blk_flush(blk); + if (ds) { blk_detach_dev(blk, ds); } diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 3b29317349..24d1d85399 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -152,7 +152,6 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf, int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int count); int blk_co_flush(BlockBackend *blk); int blk_flush(BlockBackend *blk); -int blk_flush_all(void); int blk_commit_all(void); void blk_drain(BlockBackend *blk); void blk_drain_all(void); From 24df38b00e23f76558ac12d7055c2df8d4b05150 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 26 Sep 2016 15:03:00 +0200 Subject: [PATCH 04/17] block: Fix error path in qmp_blockdev_change_medium() Commit 00949bab incorrectly changed one instance of &err into errp while touching the line. Change it back. Signed-off-by: Kevin Wolf Reviewed-by: Max Reitz --- blockdev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/blockdev.c b/blockdev.c index 29c6561fd8..62d0dd016f 100644 --- a/blockdev.c +++ b/blockdev.c @@ -2614,7 +2614,7 @@ void qmp_blockdev_change_medium(bool has_device, const char *device, error_free(err); err = NULL; - qmp_x_blockdev_remove_medium(has_device, device, has_id, id, errp); + qmp_x_blockdev_remove_medium(has_device, device, has_id, id, &err); if (err) { error_propagate(errp, err); goto fail; From 0ffcdd9c06343a3fe0b2b3e1ca93ce8aa5366f98 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 7 Sep 2016 15:26:42 +0200 Subject: [PATCH 05/17] block: Drop aio/cache consistency check from qmp_blockdev_add() The TODO comment has been addressed a while ago and this is now checked in raw-posix, so we don't have to special case this in blockdev-add any more. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Max Reitz --- blockdev.c | 15 --------------- tests/qemu-iotests/087.out | 2 +- 2 files changed, 1 insertion(+), 16 deletions(-) diff --git a/blockdev.c b/blockdev.c index 62d0dd016f..7820f42210 100644 --- a/blockdev.c +++ b/blockdev.c @@ -3832,21 +3832,6 @@ void qmp_blockdev_add(BlockdevOptions *options, Error **errp) QDict *qdict; Error *local_err = NULL; - /* TODO Sort it out in raw-posix and drive_new(): Reject aio=native with - * cache.direct=false instead of silently switching to aio=threads, except - * when called from drive_new(). - * - * For now, simply forbidding the combination for all drivers will do. */ - if (options->has_aio && options->aio == BLOCKDEV_AIO_OPTIONS_NATIVE) { - bool direct = options->has_cache && - options->cache->has_direct && - options->cache->direct; - if (!direct) { - error_setg(errp, "aio=native requires cache.direct=true"); - goto fail; - } - } - visit_type_BlockdevOptions(v, NULL, &options, &local_err); if (local_err) { error_propagate(errp, local_err); diff --git a/tests/qemu-iotests/087.out b/tests/qemu-iotests/087.out index bef68626c8..cd02eaed4c 100644 --- a/tests/qemu-iotests/087.out +++ b/tests/qemu-iotests/087.out @@ -27,7 +27,7 @@ QMP_VERSION Testing: QMP_VERSION {"return": {}} -{"error": {"class": "GenericError", "desc": "aio=native requires cache.direct=true"}} +{"error": {"class": "GenericError", "desc": "aio=native was specified, but it requires cache.direct=on, which was not specified."}} {"return": {}} {"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"} From 685552850bf29ef1cd4bef0e2629cbc3ebf0a23b Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 8 Sep 2016 13:08:20 +0200 Subject: [PATCH 06/17] block/qapi: Use separate options type for curl driver We're going to add an option to the file drivers which doesn't apply to the curl drivers, so give them a separate option type. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Max Reitz --- qapi/block-core.json | 25 ++++++++++++++++++------- 1 file changed, 18 insertions(+), 7 deletions(-) diff --git a/qapi/block-core.json b/qapi/block-core.json index 92193ab0a1..b5fdd426c9 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1721,8 +1721,7 @@ ## # @BlockdevOptionsFile # -# Driver specific block device options for the file backend and similar -# protocols. +# Driver specific block device options for the file backend. # # @filename: path to the image file # @@ -2210,6 +2209,18 @@ 'data': { 'mode': 'ReplicationMode', '*top-id': 'str' } } +## +# @BlockdevOptionsCurl +# +# Driver specific block device options for the curl backend. +# +# @filename: path to the image file +# +# Since: 1.7 +## +{ 'struct': 'BlockdevOptionsCurl', + 'data': { 'filename': 'str' } } + ## # @BlockdevOptions # @@ -2248,13 +2259,13 @@ 'cloop': 'BlockdevOptionsGenericFormat', 'dmg': 'BlockdevOptionsGenericFormat', 'file': 'BlockdevOptionsFile', - 'ftp': 'BlockdevOptionsFile', - 'ftps': 'BlockdevOptionsFile', + 'ftp': 'BlockdevOptionsCurl', + 'ftps': 'BlockdevOptionsCurl', 'gluster': 'BlockdevOptionsGluster', 'host_cdrom': 'BlockdevOptionsFile', 'host_device':'BlockdevOptionsFile', - 'http': 'BlockdevOptionsFile', - 'https': 'BlockdevOptionsFile', + 'http': 'BlockdevOptionsCurl', + 'https': 'BlockdevOptionsCurl', # TODO iscsi: Wait for structured options 'luks': 'BlockdevOptionsLUKS', # TODO nbd: Should take InetSocketAddress for 'host'? @@ -2271,7 +2282,7 @@ 'replication':'BlockdevOptionsReplication', # TODO sheepdog: Wait for structured options # TODO ssh: Should take InetSocketAddress for 'host'? - 'tftp': 'BlockdevOptionsFile', + 'tftp': 'BlockdevOptionsCurl', 'vdi': 'BlockdevOptionsGenericFormat', 'vhdx': 'BlockdevOptionsGenericFormat', 'vmdk': 'BlockdevOptionsGenericCOWFormat', From 0a4279d97c501254f6164b8abcd89055a11e2dc5 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Thu, 8 Sep 2016 15:09:01 +0200 Subject: [PATCH 07/17] block/qapi: Move 'aio' option to file driver The option whether or not to use a native AIO interface really isn't a generic option for all drivers, but only applies to the native file protocols. This patch moves the option in blockdev-add to the appropriate places (raw-posix and raw-win32). We still have to keep the flag BDRV_O_NATIVE_AIO for compatibility because so far the AIO option was usually specified on the wrong layer (the top-level format driver, which didn't even look at it) and then inherited by the protocol driver (where it was actually used). We can't forbid this use except in new interfaces. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Max Reitz --- block/raw-posix.c | 44 ++++++++++++++++++++------------- block/raw-win32.c | 56 ++++++++++++++++++++++++++++++++++++++---- qapi/block-core.json | 6 ++--- tests/qemu-iotests/087 | 4 +-- 4 files changed, 83 insertions(+), 27 deletions(-) diff --git a/block/raw-posix.c b/block/raw-posix.c index 6ed7547392..166e9d1ad5 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -143,6 +143,7 @@ typedef struct BDRVRawState { bool has_discard:1; bool has_write_zeroes:1; bool discard_zeroes:1; + bool use_linux_aio:1; bool has_fallocate; bool needs_alignment; } BDRVRawState; @@ -367,18 +368,6 @@ static void raw_parse_flags(int bdrv_flags, int *open_flags) } } -#ifdef CONFIG_LINUX_AIO -static bool raw_use_aio(int bdrv_flags) -{ - /* - * Currently Linux do AIO only for files opened with O_DIRECT - * specified so check NOCACHE flag too - */ - return (bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) == - (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO); -} -#endif - static void raw_parse_filename(const char *filename, QDict *options, Error **errp) { @@ -399,6 +388,11 @@ static QemuOptsList raw_runtime_opts = { .type = QEMU_OPT_STRING, .help = "File name of the image", }, + { + .name = "aio", + .type = QEMU_OPT_STRING, + .help = "host AIO implementation (threads, native)", + }, { /* end of list */ } }, }; @@ -410,6 +404,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, QemuOpts *opts; Error *local_err = NULL; const char *filename = NULL; + BlockdevAioOptions aio, aio_default; int fd, ret; struct stat st; @@ -429,6 +424,18 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, goto fail; } + aio_default = (bdrv_flags & BDRV_O_NATIVE_AIO) + ? BLOCKDEV_AIO_OPTIONS_NATIVE + : BLOCKDEV_AIO_OPTIONS_THREADS; + aio = qapi_enum_parse(BlockdevAioOptions_lookup, qemu_opt_get(opts, "aio"), + BLOCKDEV_AIO_OPTIONS__MAX, aio_default, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + s->use_linux_aio = (aio == BLOCKDEV_AIO_OPTIONS_NATIVE); + s->open_flags = open_flags; raw_parse_flags(bdrv_flags, &s->open_flags); @@ -444,14 +451,15 @@ static int raw_open_common(BlockDriverState *bs, QDict *options, s->fd = fd; #ifdef CONFIG_LINUX_AIO - if (!raw_use_aio(bdrv_flags) && (bdrv_flags & BDRV_O_NATIVE_AIO)) { + /* Currently Linux does AIO only for files opened with O_DIRECT */ + if (s->use_linux_aio && !(s->open_flags & O_DIRECT)) { error_setg(errp, "aio=native was specified, but it requires " "cache.direct=on, which was not specified."); ret = -EINVAL; goto fail; } #else - if (bdrv_flags & BDRV_O_NATIVE_AIO) { + if (s->use_linux_aio) { error_setg(errp, "aio=native was specified, but is not supported " "in this build."); ret = -EINVAL; @@ -1256,7 +1264,7 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset, if (!bdrv_qiov_is_aligned(bs, qiov)) { type |= QEMU_AIO_MISALIGNED; #ifdef CONFIG_LINUX_AIO - } else if (bs->open_flags & BDRV_O_NATIVE_AIO) { + } else if (s->use_linux_aio) { LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs)); assert(qiov->size == bytes); return laio_co_submit(bs, aio, s->fd, offset, qiov, type); @@ -1285,7 +1293,8 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset, static void raw_aio_plug(BlockDriverState *bs) { #ifdef CONFIG_LINUX_AIO - if (bs->open_flags & BDRV_O_NATIVE_AIO) { + BDRVRawState *s = bs->opaque; + if (s->use_linux_aio) { LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs)); laio_io_plug(bs, aio); } @@ -1295,7 +1304,8 @@ static void raw_aio_plug(BlockDriverState *bs) static void raw_aio_unplug(BlockDriverState *bs) { #ifdef CONFIG_LINUX_AIO - if (bs->open_flags & BDRV_O_NATIVE_AIO) { + BDRVRawState *s = bs->opaque; + if (s->use_linux_aio) { LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs)); laio_io_unplug(bs, aio); } diff --git a/block/raw-win32.c b/block/raw-win32.c index 56f45fea9e..734bb105bd 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -32,6 +32,7 @@ #include "block/thread-pool.h" #include "qemu/iov.h" #include "qapi/qmp/qstring.h" +#include "qapi/util.h" #include #include @@ -252,7 +253,8 @@ static void raw_probe_alignment(BlockDriverState *bs, Error **errp) } } -static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped) +static void raw_parse_flags(int flags, bool use_aio, int *access_flags, + DWORD *overlapped) { assert(access_flags != NULL); assert(overlapped != NULL); @@ -264,7 +266,7 @@ static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped) } *overlapped = FILE_ATTRIBUTE_NORMAL; - if (flags & BDRV_O_NATIVE_AIO) { + if (use_aio) { *overlapped |= FILE_FLAG_OVERLAPPED; } if (flags & BDRV_O_NOCACHE) { @@ -292,10 +294,35 @@ static QemuOptsList raw_runtime_opts = { .type = QEMU_OPT_STRING, .help = "File name of the image", }, + { + .name = "aio", + .type = QEMU_OPT_STRING, + .help = "host AIO implementation (threads, native)", + }, { /* end of list */ } }, }; +static bool get_aio_option(QemuOpts *opts, int flags, Error **errp) +{ + BlockdevAioOptions aio, aio_default; + + aio_default = (flags & BDRV_O_NATIVE_AIO) ? BLOCKDEV_AIO_OPTIONS_NATIVE + : BLOCKDEV_AIO_OPTIONS_THREADS; + aio = qapi_enum_parse(BlockdevAioOptions_lookup, qemu_opt_get(opts, "aio"), + BLOCKDEV_AIO_OPTIONS__MAX, aio_default, errp); + + switch (aio) { + case BLOCKDEV_AIO_OPTIONS_NATIVE: + return true; + case BLOCKDEV_AIO_OPTIONS_THREADS: + return false; + default: + error_setg(errp, "Invalid AIO option"); + } + return false; +} + static int raw_open(BlockDriverState *bs, QDict *options, int flags, Error **errp) { @@ -305,6 +332,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, QemuOpts *opts; Error *local_err = NULL; const char *filename; + bool use_aio; int ret; s->type = FTYPE_FILE; @@ -319,7 +347,14 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, filename = qemu_opt_get(opts, "filename"); - raw_parse_flags(flags, &access_flags, &overlapped); + use_aio = get_aio_option(opts, flags, &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail; + } + + raw_parse_flags(flags, use_aio, &access_flags, &overlapped); if (filename[0] && filename[1] == ':') { snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", filename[0]); @@ -346,7 +381,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags, goto fail; } - if (flags & BDRV_O_NATIVE_AIO) { + if (use_aio) { s->aio = win32_aio_init(); if (s->aio == NULL) { CloseHandle(s->hfile); @@ -647,6 +682,7 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags, Error *local_err = NULL; const char *filename; + bool use_aio; QemuOpts *opts = qemu_opts_create(&raw_runtime_opts, NULL, 0, &error_abort); @@ -659,6 +695,16 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags, filename = qemu_opt_get(opts, "filename"); + use_aio = get_aio_option(opts, flags, &local_err); + if (!local_err && use_aio) { + error_setg(&local_err, "AIO is not supported on Windows host devices"); + } + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto done; + } + if (strstart(filename, "/dev/cdrom", NULL)) { if (find_cdrom(device_name, sizeof(device_name)) < 0) { error_setg(errp, "Could not open CD-ROM drive"); @@ -677,7 +723,7 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags, } s->type = find_device_type(bs, filename); - raw_parse_flags(flags, &access_flags, &overlapped); + raw_parse_flags(flags, use_aio, &access_flags, &overlapped); create_flags = OPEN_EXISTING; diff --git a/qapi/block-core.json b/qapi/block-core.json index b5fdd426c9..9d797b8fe0 100644 --- a/qapi/block-core.json +++ b/qapi/block-core.json @@ -1724,11 +1724,13 @@ # Driver specific block device options for the file backend. # # @filename: path to the image file +# @aio: #optional AIO backend (default: threads) (since: 2.8) # # Since: 1.7 ## { 'struct': 'BlockdevOptionsFile', - 'data': { 'filename': 'str' } } + 'data': { 'filename': 'str', + '*aio': 'BlockdevAioOptions' } } ## # @BlockdevOptionsNull @@ -2232,7 +2234,6 @@ # This option is required on the top level of blockdev-add. # @discard: #optional discard-related options (default: ignore) # @cache: #optional cache-related options -# @aio: #optional AIO backend (default: threads) # @read-only: #optional whether the block device should be read-only # (default: false) # @detect-zeroes: #optional detect and optimize zero writes (Since 2.1) @@ -2247,7 +2248,6 @@ '*node-name': 'str', '*discard': 'BlockdevDiscardOptions', '*cache': 'BlockdevCacheOptions', - '*aio': 'BlockdevAioOptions', '*read-only': 'bool', '*detect-zeroes': 'BlockdevDetectZeroesOptions' }, 'discriminator': 'driver', diff --git a/tests/qemu-iotests/087 b/tests/qemu-iotests/087 index 5c04577b36..b1ac71f2b8 100755 --- a/tests/qemu-iotests/087 +++ b/tests/qemu-iotests/087 @@ -117,10 +117,10 @@ run_qemu < Date: Mon, 12 Sep 2016 21:00:41 +0200 Subject: [PATCH 08/17] block: Parse 'detect-zeroes' in bdrv_open_common() Amongst others, this means that you can now use the 'detect-zeroes' option for non-top-level nodes in blockdev-add, like the QAPI schema promises. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Max Reitz --- block.c | 33 +++++++++++++++++++++++++++++++++ blockdev.c | 9 +-------- 2 files changed, 34 insertions(+), 8 deletions(-) diff --git a/block.c b/block.c index 493ecf3137..1f1045738d 100644 --- a/block.c +++ b/block.c @@ -42,6 +42,7 @@ #include "qapi-event.h" #include "qemu/cutils.h" #include "qemu/id.h" +#include "qapi/util.h" #ifdef CONFIG_BSD #include @@ -954,6 +955,11 @@ static QemuOptsList bdrv_runtime_opts = { .type = QEMU_OPT_BOOL, .help = "Node is opened in read-only mode", }, + { + .name = "detect-zeroes", + .type = QEMU_OPT_STRING, + .help = "try to optimize zero writes (off, on, unmap)", + }, { /* end of list */ } }, }; @@ -970,6 +976,7 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file, const char *filename; const char *driver_name = NULL; const char *node_name = NULL; + const char *detect_zeroes; QemuOpts *opts; BlockDriver *drv; Error *local_err = NULL; @@ -1038,6 +1045,32 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file, } } + detect_zeroes = qemu_opt_get(opts, "detect-zeroes"); + if (detect_zeroes) { + BlockdevDetectZeroesOptions value = + qapi_enum_parse(BlockdevDetectZeroesOptions_lookup, + detect_zeroes, + BLOCKDEV_DETECT_ZEROES_OPTIONS__MAX, + BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF, + &local_err); + if (local_err) { + error_propagate(errp, local_err); + ret = -EINVAL; + goto fail_opts; + } + + if (value == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP && + !(bs->open_flags & BDRV_O_UNMAP)) + { + error_setg(errp, "setting detect-zeroes to unmap is not allowed " + "without setting discard operation to unmap"); + ret = -EINVAL; + goto fail_opts; + } + + bs->detect_zeroes = value; + } + if (filename != NULL) { pstrcpy(bs->filename, sizeof(bs->filename), filename); } else { diff --git a/blockdev.c b/blockdev.c index 7820f42210..511260ce93 100644 --- a/blockdev.c +++ b/blockdev.c @@ -658,7 +658,6 @@ static BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp) BlockDriverState *bs; QemuOpts *opts; Error *local_error = NULL; - BlockdevDetectZeroesOptions detect_zeroes; int bdrv_flags = 0; opts = qemu_opts_create(&qemu_root_bds_opts, NULL, 1, errp); @@ -673,7 +672,7 @@ static BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp) } extract_common_blockdev_options(opts, &bdrv_flags, NULL, NULL, - &detect_zeroes, &local_error); + NULL, &local_error); if (local_error) { error_propagate(errp, local_error); goto fail; @@ -695,8 +694,6 @@ static BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp) goto fail_no_bs_opts; } - bs->detect_zeroes = detect_zeroes; - fail_no_bs_opts: qemu_opts_del(opts); return bs; @@ -4136,10 +4133,6 @@ static QemuOptsList qemu_root_bds_opts = { .name = "copy-on-read", .type = QEMU_OPT_BOOL, .help = "copy read data from backing file into image file", - },{ - .name = "detect-zeroes", - .type = QEMU_OPT_STRING, - .help = "try to optimize zero writes (off, on, unmap)", }, { /* end of list */ } }, From b85114f8cfbede8b153db68875973ef0790bf296 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 12 Sep 2016 19:08:31 +0200 Subject: [PATCH 09/17] block: Use 'detect-zeroes' option for 'blockdev-change-medium' Instead of modifying the new BDS after it has been opened, use the newly supported 'detect-zeroes' option in bdrv_open_common() so that all requirements are checked (detect-zeroes=unmap requires discard=unmap). Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Max Reitz --- block/block-backend.c | 9 ++++----- blockdev.c | 9 ++++++--- include/sysemu/block-backend.h | 2 +- 3 files changed, 11 insertions(+), 9 deletions(-) diff --git a/block/block-backend.c b/block/block-backend.c index f34bad5840..639294b8e6 100644 --- a/block/block-backend.c +++ b/block/block-backend.c @@ -1592,13 +1592,12 @@ void blk_update_root_state(BlockBackend *blk) } /* - * Applies the information in the root state to the given BlockDriverState. This - * does not include the flags which have to be specified for bdrv_open(), use - * blk_get_open_flags_from_root_state() to inquire them. + * Returns the detect-zeroes setting to be used for bdrv_open() of a + * BlockDriverState which is supposed to inherit the root state. */ -void blk_apply_root_state(BlockBackend *blk, BlockDriverState *bs) +bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk) { - bs->detect_zeroes = blk->root_state.detect_zeroes; + return blk->root_state.detect_zeroes; } /* diff --git a/blockdev.c b/blockdev.c index 511260ce93..7b87bd8a71 100644 --- a/blockdev.c +++ b/blockdev.c @@ -2546,6 +2546,7 @@ void qmp_blockdev_change_medium(bool has_device, const char *device, BlockBackend *blk; BlockDriverState *medium_bs = NULL; int bdrv_flags; + bool detect_zeroes; int rc; QDict *options = NULL; Error *err = NULL; @@ -2585,8 +2586,12 @@ void qmp_blockdev_change_medium(bool has_device, const char *device, abort(); } + options = qdict_new(); + detect_zeroes = blk_get_detect_zeroes_from_root_state(blk); + qdict_put(options, "detect-zeroes", + qstring_from_str(detect_zeroes ? "on" : "off")); + if (has_format) { - options = qdict_new(); qdict_put(options, "driver", qstring_from_str(format)); } @@ -2623,8 +2628,6 @@ void qmp_blockdev_change_medium(bool has_device, const char *device, goto fail; } - blk_apply_root_state(blk, medium_bs); - qmp_blockdev_close_tray(has_device, device, has_id, id, errp); fail: diff --git a/include/sysemu/block-backend.h b/include/sysemu/block-backend.h index 24d1d85399..a7993afcda 100644 --- a/include/sysemu/block-backend.h +++ b/include/sysemu/block-backend.h @@ -198,7 +198,7 @@ void blk_io_unplug(BlockBackend *blk); BlockAcctStats *blk_get_stats(BlockBackend *blk); BlockBackendRootState *blk_get_root_state(BlockBackend *blk); void blk_update_root_state(BlockBackend *blk); -void blk_apply_root_state(BlockBackend *blk, BlockDriverState *bs); +bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk); int blk_get_open_flags_from_root_state(BlockBackend *blk); void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk, From 818584a43ab0ef52c131865128ef110f867726cd Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Mon, 12 Sep 2016 18:03:18 +0200 Subject: [PATCH 10/17] block: Move 'discard' option to bdrv_open_common() This enables its use for nested child nodes. The compatibility between the 'discard' and 'detect-zeroes' setting is checked in bdrv_open_common() now as the former setting isn't available before calling bdrv_open() any more. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Max Reitz --- block.c | 17 ++++++++++++++++- blockdev.c | 25 ------------------------- include/block/block.h | 1 + 3 files changed, 17 insertions(+), 26 deletions(-) diff --git a/block.c b/block.c index 1f1045738d..bb1f1ec957 100644 --- a/block.c +++ b/block.c @@ -765,7 +765,7 @@ static void bdrv_inherited_options(int *child_flags, QDict *child_options, /* Our block drivers take care to send flushes and respect unmap policy, * so we can default to enable both on lower layers regardless of the * corresponding parent options. */ - flags |= BDRV_O_UNMAP; + qdict_set_default_str(child_options, BDRV_OPT_DISCARD, "unmap"); /* Clear flags that only apply to the top layer */ flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ | @@ -960,6 +960,11 @@ static QemuOptsList bdrv_runtime_opts = { .type = QEMU_OPT_STRING, .help = "try to optimize zero writes (off, on, unmap)", }, + { + .name = "discard", + .type = QEMU_OPT_STRING, + .help = "discard operation (ignore/off, unmap/on)", + }, { /* end of list */ } }, }; @@ -976,6 +981,7 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file, const char *filename; const char *driver_name = NULL; const char *node_name = NULL; + const char *discard; const char *detect_zeroes; QemuOpts *opts; BlockDriver *drv; @@ -1045,6 +1051,15 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file, } } + discard = qemu_opt_get(opts, "discard"); + if (discard != NULL) { + if (bdrv_parse_discard_flags(discard, &bs->open_flags) != 0) { + error_setg(errp, "Invalid discard option"); + ret = -EINVAL; + goto fail_opts; + } + } + detect_zeroes = qemu_opt_get(opts, "detect-zeroes"); if (detect_zeroes) { BlockdevDetectZeroesOptions value = diff --git a/blockdev.c b/blockdev.c index 7b87bd8a71..e2ace04346 100644 --- a/blockdev.c +++ b/blockdev.c @@ -356,7 +356,6 @@ static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags, const char **throttling_group, ThrottleConfig *throttle_cfg, BlockdevDetectZeroesOptions *detect_zeroes, Error **errp) { - const char *discard; Error *local_error = NULL; const char *aio; @@ -365,13 +364,6 @@ static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags, *bdrv_flags |= BDRV_O_COPY_ON_READ; } - if ((discard = qemu_opt_get(opts, "discard")) != NULL) { - if (bdrv_parse_discard_flags(discard, bdrv_flags) != 0) { - error_setg(errp, "Invalid discard option"); - return; - } - } - if ((aio = qemu_opt_get(opts, "aio")) != NULL) { if (!strcmp(aio, "native")) { *bdrv_flags |= BDRV_O_NATIVE_AIO; @@ -449,15 +441,6 @@ static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags, error_propagate(errp, local_error); return; } - - if (bdrv_flags && - *detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP && - !(*bdrv_flags & BDRV_O_UNMAP)) - { - error_setg(errp, "setting detect-zeroes to unmap is not allowed " - "without setting discard operation to unmap"); - return; - } } } @@ -3989,10 +3972,6 @@ QemuOptsList qemu_common_drive_opts = { .name = "snapshot", .type = QEMU_OPT_BOOL, .help = "enable/disable snapshot mode", - },{ - .name = "discard", - .type = QEMU_OPT_STRING, - .help = "discard operation (ignore/off, unmap/on)", },{ .name = "aio", .type = QEMU_OPT_STRING, @@ -4125,10 +4104,6 @@ static QemuOptsList qemu_root_bds_opts = { .head = QTAILQ_HEAD_INITIALIZER(qemu_root_bds_opts.head), .desc = { { - .name = "discard", - .type = QEMU_OPT_STRING, - .help = "discard operation (ignore/off, unmap/on)", - },{ .name = "aio", .type = QEMU_OPT_STRING, .help = "host AIO implementation (threads, native)", diff --git a/include/block/block.h b/include/block/block.h index 811b060f41..107c603605 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -108,6 +108,7 @@ typedef struct HDGeometry { #define BDRV_OPT_CACHE_DIRECT "cache.direct" #define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush" #define BDRV_OPT_READ_ONLY "read-only" +#define BDRV_OPT_DISCARD "discard" #define BDRV_SECTOR_BITS 9 From 74e1ae7c0b66d85d2b04c153ffdf6294e6a40798 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 20 Sep 2016 20:48:52 +0200 Subject: [PATCH 11/17] block: Remove qemu_root_bds_opts The remaining options in qemu_root_bds_opts (aio and copy-on-read) aren't used any more, the QAPI schema doesn't contain them. Therefore all the code processing qemu_root_bds_opts options is dead and can be removed. Signed-off-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Max Reitz --- blockdev.c | 54 +----------------------------------------------------- 1 file changed, 1 insertion(+), 53 deletions(-) diff --git a/blockdev.c b/blockdev.c index e2ace04346..07ec733905 100644 --- a/blockdev.c +++ b/blockdev.c @@ -633,34 +633,11 @@ err_no_opts: return NULL; } -static QemuOptsList qemu_root_bds_opts; - /* Takes the ownership of bs_opts */ static BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp) { - BlockDriverState *bs; - QemuOpts *opts; - Error *local_error = NULL; int bdrv_flags = 0; - opts = qemu_opts_create(&qemu_root_bds_opts, NULL, 1, errp); - if (!opts) { - goto fail; - } - - qemu_opts_absorb_qdict(opts, bs_opts, &local_error); - if (local_error) { - error_propagate(errp, local_error); - goto fail; - } - - extract_common_blockdev_options(opts, &bdrv_flags, NULL, NULL, - NULL, &local_error); - if (local_error) { - error_propagate(errp, local_error); - goto fail; - } - /* bdrv_open() defaults to the values in bdrv_flags (for compatibility * with other callers) rather than what we want as the real defaults. * Apply the defaults here instead. */ @@ -672,19 +649,7 @@ static BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp) bdrv_flags |= BDRV_O_INACTIVE; } - bs = bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp); - if (!bs) { - goto fail_no_bs_opts; - } - -fail_no_bs_opts: - qemu_opts_del(opts); - return bs; - -fail: - qemu_opts_del(opts); - QDECREF(bs_opts); - return NULL; + return bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp); } void blockdev_close_all_bdrv_states(void) @@ -4099,23 +4064,6 @@ QemuOptsList qemu_common_drive_opts = { }, }; -static QemuOptsList qemu_root_bds_opts = { - .name = "root-bds", - .head = QTAILQ_HEAD_INITIALIZER(qemu_root_bds_opts.head), - .desc = { - { - .name = "aio", - .type = QEMU_OPT_STRING, - .help = "host AIO implementation (threads, native)", - },{ - .name = "copy-on-read", - .type = QEMU_OPT_BOOL, - .help = "copy read data from backing file into image file", - }, - { /* end of list */ } - }, -}; - QemuOptsList qemu_drive_opts = { .name = "drive", .head = QTAILQ_HEAD_INITIALIZER(qemu_drive_opts.head), From 8737d9e0c4017aaa5ab1fcf1356c8ee4f7caf1df Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Tue, 27 Sep 2016 11:58:40 +0200 Subject: [PATCH 12/17] oslib-posix: add helpers for stack alloc and free the allocated stack will be adjusted to the minimum supported stack size by the OS and rounded up to be a multiple of the system pagesize. Additionally an architecture dependent guard page is added to the stack to catch stack overflows. Signed-off-by: Peter Lieven Signed-off-by: Kevin Wolf --- include/sysemu/os-posix.h | 27 +++++++++++++++++++++++++ util/oslib-posix.c | 42 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/include/sysemu/os-posix.h b/include/sysemu/os-posix.h index 9c7dfdfbec..3cfedbc28b 100644 --- a/include/sysemu/os-posix.h +++ b/include/sysemu/os-posix.h @@ -60,4 +60,31 @@ int qemu_utimens(const char *path, const qemu_timespec *times); bool is_daemonized(void); +/** + * qemu_alloc_stack: + * @sz: pointer to a size_t holding the requested usable stack size + * + * Allocate memory that can be used as a stack, for instance for + * coroutines. If the memory cannot be allocated, this function + * will abort (like g_malloc()). This function also inserts an + * additional guard page to catch a potential stack overflow. + * Note that the memory required for the guard page and alignment + * and minimal stack size restrictions will increase the value of sz. + * + * The allocated stack must be freed with qemu_free_stack(). + * + * Returns: pointer to (the lowest address of) the stack memory. + */ +void *qemu_alloc_stack(size_t *sz); + +/** + * qemu_free_stack: + * @stack: stack to free + * @sz: size of stack in bytes + * + * Free a stack allocated via qemu_alloc_stack(). Note that sz must + * be exactly the adjusted stack size returned by qemu_alloc_stack. + */ +void qemu_free_stack(void *stack, size_t sz); + #endif diff --git a/util/oslib-posix.c b/util/oslib-posix.c index f2d4e9e592..d950c347e2 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -499,3 +499,45 @@ pid_t qemu_fork(Error **errp) } return pid; } + +void *qemu_alloc_stack(size_t *sz) +{ + void *ptr, *guardpage; + size_t pagesz = getpagesize(); +#ifdef _SC_THREAD_STACK_MIN + /* avoid stacks smaller than _SC_THREAD_STACK_MIN */ + long min_stack_sz = sysconf(_SC_THREAD_STACK_MIN); + *sz = MAX(MAX(min_stack_sz, 0), *sz); +#endif + /* adjust stack size to a multiple of the page size */ + *sz = ROUND_UP(*sz, pagesz); + /* allocate one extra page for the guard page */ + *sz += pagesz; + + ptr = mmap(NULL, *sz, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (ptr == MAP_FAILED) { + abort(); + } + +#if defined(HOST_IA64) + /* separate register stack */ + guardpage = ptr + (((*sz - pagesz) / 2) & ~pagesz); +#elif defined(HOST_HPPA) + /* stack grows up */ + guardpage = ptr + *sz - pagesz; +#else + /* stack grows down */ + guardpage = ptr; +#endif + if (mprotect(guardpage, pagesz, PROT_NONE) != 0) { + abort(); + } + + return ptr; +} + +void qemu_free_stack(void *stack, size_t sz) +{ + munmap(stack, sz); +} From be87a393f9aed6f9406a728d4d55199aee0ebbb4 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Tue, 27 Sep 2016 11:58:41 +0200 Subject: [PATCH 13/17] coroutine-sigaltstack: rename coroutine struct appropriately The name of the sigaltstack coroutine struct was misleading. Signed-off-by: Peter Lieven Signed-off-by: Kevin Wolf --- util/coroutine-sigaltstack.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/util/coroutine-sigaltstack.c b/util/coroutine-sigaltstack.c index a7c3366553..171cd44b7f 100644 --- a/util/coroutine-sigaltstack.c +++ b/util/coroutine-sigaltstack.c @@ -34,7 +34,7 @@ typedef struct { Coroutine base; void *stack; sigjmp_buf env; -} CoroutineUContext; +} CoroutineSigAltStack; /** * Per-thread coroutine bookkeeping @@ -44,7 +44,7 @@ typedef struct { Coroutine *current; /** The default coroutine */ - CoroutineUContext leader; + CoroutineSigAltStack leader; /** Information for the signal handler (trampoline) */ sigjmp_buf tr_reenter; @@ -89,7 +89,7 @@ static void __attribute__((constructor)) coroutine_init(void) * (from the signal handler when it is not signal handling, read ahead * for more information). */ -static void coroutine_bootstrap(CoroutineUContext *self, Coroutine *co) +static void coroutine_bootstrap(CoroutineSigAltStack *self, Coroutine *co) { /* Initialize longjmp environment and switch back the caller */ if (!sigsetjmp(self->env, 0)) { @@ -109,7 +109,7 @@ static void coroutine_bootstrap(CoroutineUContext *self, Coroutine *co) */ static void coroutine_trampoline(int signal) { - CoroutineUContext *self; + CoroutineSigAltStack *self; Coroutine *co; CoroutineThreadState *coTS; @@ -144,7 +144,7 @@ static void coroutine_trampoline(int signal) Coroutine *qemu_coroutine_new(void) { const size_t stack_size = 1 << 20; - CoroutineUContext *co; + CoroutineSigAltStack *co; CoroutineThreadState *coTS; struct sigaction sa; struct sigaction osa; @@ -251,7 +251,7 @@ Coroutine *qemu_coroutine_new(void) void qemu_coroutine_delete(Coroutine *co_) { - CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_); + CoroutineSigAltStack *co = DO_UPCAST(CoroutineSigAltStack, base, co_); g_free(co->stack); g_free(co); @@ -260,8 +260,8 @@ void qemu_coroutine_delete(Coroutine *co_) CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_, CoroutineAction action) { - CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_); - CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_); + CoroutineSigAltStack *from = DO_UPCAST(CoroutineSigAltStack, base, from_); + CoroutineSigAltStack *to = DO_UPCAST(CoroutineSigAltStack, base, to_); CoroutineThreadState *s = coroutine_get_thread_state(); int ret; From 8adcd6fb6d14ff7fbb47179384dcddbd6dfd95a3 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Tue, 27 Sep 2016 11:58:42 +0200 Subject: [PATCH 14/17] coroutine: add a macro for the coroutine stack size Signed-off-by: Peter Lieven Reviewed-by: Paolo Bonzini Reviewed-by: Richard Henderson Signed-off-by: Kevin Wolf --- include/qemu/coroutine_int.h | 2 ++ util/coroutine-sigaltstack.c | 2 +- util/coroutine-ucontext.c | 2 +- util/coroutine-win32.c | 2 +- 4 files changed, 5 insertions(+), 3 deletions(-) diff --git a/include/qemu/coroutine_int.h b/include/qemu/coroutine_int.h index 6df9d33352..14d4f1d1f2 100644 --- a/include/qemu/coroutine_int.h +++ b/include/qemu/coroutine_int.h @@ -28,6 +28,8 @@ #include "qemu/queue.h" #include "qemu/coroutine.h" +#define COROUTINE_STACK_SIZE (1 << 20) + typedef enum { COROUTINE_YIELD = 1, COROUTINE_TERMINATE = 2, diff --git a/util/coroutine-sigaltstack.c b/util/coroutine-sigaltstack.c index 171cd44b7f..a5bcb7e19e 100644 --- a/util/coroutine-sigaltstack.c +++ b/util/coroutine-sigaltstack.c @@ -143,7 +143,7 @@ static void coroutine_trampoline(int signal) Coroutine *qemu_coroutine_new(void) { - const size_t stack_size = 1 << 20; + const size_t stack_size = COROUTINE_STACK_SIZE; CoroutineSigAltStack *co; CoroutineThreadState *coTS; struct sigaction sa; diff --git a/util/coroutine-ucontext.c b/util/coroutine-ucontext.c index 2bb7e10d4b..31254abd4c 100644 --- a/util/coroutine-ucontext.c +++ b/util/coroutine-ucontext.c @@ -82,7 +82,7 @@ static void coroutine_trampoline(int i0, int i1) Coroutine *qemu_coroutine_new(void) { - const size_t stack_size = 1 << 20; + const size_t stack_size = COROUTINE_STACK_SIZE; CoroutineUContext *co; ucontext_t old_uc, uc; sigjmp_buf old_env; diff --git a/util/coroutine-win32.c b/util/coroutine-win32.c index 02e28e825f..de6bd4fd3e 100644 --- a/util/coroutine-win32.c +++ b/util/coroutine-win32.c @@ -71,7 +71,7 @@ static void CALLBACK coroutine_trampoline(void *co_) Coroutine *qemu_coroutine_new(void) { - const size_t stack_size = 1 << 20; + const size_t stack_size = COROUTINE_STACK_SIZE; CoroutineWin32 *co; co = g_malloc0(sizeof(*co)); From ddba15919bd51b45d0d1425cc2de0c0c0bae3877 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Tue, 27 Sep 2016 11:58:43 +0200 Subject: [PATCH 15/17] coroutine-ucontext: use helper for allocating stack memory Signed-off-by: Peter Lieven Signed-off-by: Kevin Wolf --- util/coroutine-ucontext.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/util/coroutine-ucontext.c b/util/coroutine-ucontext.c index 31254abd4c..6621f3f692 100644 --- a/util/coroutine-ucontext.c +++ b/util/coroutine-ucontext.c @@ -34,6 +34,7 @@ typedef struct { Coroutine base; void *stack; + size_t stack_size; sigjmp_buf env; #ifdef CONFIG_VALGRIND_H @@ -82,7 +83,6 @@ static void coroutine_trampoline(int i0, int i1) Coroutine *qemu_coroutine_new(void) { - const size_t stack_size = COROUTINE_STACK_SIZE; CoroutineUContext *co; ucontext_t old_uc, uc; sigjmp_buf old_env; @@ -101,17 +101,18 @@ Coroutine *qemu_coroutine_new(void) } co = g_malloc0(sizeof(*co)); - co->stack = g_malloc(stack_size); + co->stack_size = COROUTINE_STACK_SIZE; + co->stack = qemu_alloc_stack(&co->stack_size); co->base.entry_arg = &old_env; /* stash away our jmp_buf */ uc.uc_link = &old_uc; uc.uc_stack.ss_sp = co->stack; - uc.uc_stack.ss_size = stack_size; + uc.uc_stack.ss_size = co->stack_size; uc.uc_stack.ss_flags = 0; #ifdef CONFIG_VALGRIND_H co->valgrind_stack_id = - VALGRIND_STACK_REGISTER(co->stack, co->stack + stack_size); + VALGRIND_STACK_REGISTER(co->stack, co->stack + co->stack_size); #endif arg.p = co; @@ -149,7 +150,7 @@ void qemu_coroutine_delete(Coroutine *co_) valgrind_stack_deregister(co); #endif - g_free(co->stack); + qemu_free_stack(co->stack, co->stack_size); g_free(co); } From 2f4aa2329945c741e10fd5b6631e99a4be25ea60 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Tue, 27 Sep 2016 11:58:44 +0200 Subject: [PATCH 16/17] coroutine-sigaltstack: use helper for allocating stack memory Signed-off-by: Peter Lieven Signed-off-by: Kevin Wolf --- util/coroutine-sigaltstack.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/util/coroutine-sigaltstack.c b/util/coroutine-sigaltstack.c index a5bcb7e19e..f6fc49a0e5 100644 --- a/util/coroutine-sigaltstack.c +++ b/util/coroutine-sigaltstack.c @@ -33,6 +33,7 @@ typedef struct { Coroutine base; void *stack; + size_t stack_size; sigjmp_buf env; } CoroutineSigAltStack; @@ -143,7 +144,6 @@ static void coroutine_trampoline(int signal) Coroutine *qemu_coroutine_new(void) { - const size_t stack_size = COROUTINE_STACK_SIZE; CoroutineSigAltStack *co; CoroutineThreadState *coTS; struct sigaction sa; @@ -164,7 +164,8 @@ Coroutine *qemu_coroutine_new(void) */ co = g_malloc0(sizeof(*co)); - co->stack = g_malloc(stack_size); + co->stack_size = COROUTINE_STACK_SIZE; + co->stack = qemu_alloc_stack(&co->stack_size); co->base.entry_arg = &old_env; /* stash away our jmp_buf */ coTS = coroutine_get_thread_state(); @@ -189,7 +190,7 @@ Coroutine *qemu_coroutine_new(void) * Set the new stack. */ ss.ss_sp = co->stack; - ss.ss_size = stack_size; + ss.ss_size = co->stack_size; ss.ss_flags = 0; if (sigaltstack(&ss, &oss) < 0) { abort(); @@ -253,7 +254,7 @@ void qemu_coroutine_delete(Coroutine *co_) { CoroutineSigAltStack *co = DO_UPCAST(CoroutineSigAltStack, base, co_); - g_free(co->stack); + qemu_free_stack(co->stack, co->stack_size); g_free(co); } From 7d992e4d5a95d0b21c6c33bd32cef8671805e39b Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Tue, 27 Sep 2016 11:58:45 +0200 Subject: [PATCH 17/17] oslib-posix: add a configure switch to debug stack usage this adds a knob to track the maximum stack usage of stacks created by qemu_alloc_stack. Signed-off-by: Peter Lieven Signed-off-by: Kevin Wolf --- configure | 19 +++++++++++++++++++ util/oslib-posix.c | 35 +++++++++++++++++++++++++++++++++++ 2 files changed, 54 insertions(+) diff --git a/configure b/configure index df4a247fc2..5751d8ecaa 100755 --- a/configure +++ b/configure @@ -296,6 +296,7 @@ libiscsi="" libnfs="" coroutine="" coroutine_pool="" +debug_stack_usage="no" seccomp="" glusterfs="" glusterfs_xlator_opt="no" @@ -1004,6 +1005,8 @@ for opt do ;; --enable-coroutine-pool) coroutine_pool="yes" ;; + --enable-debug-stack-usage) debug_stack_usage="yes" + ;; --disable-docs) docs="no" ;; --enable-docs) docs="yes" @@ -4331,6 +4334,17 @@ if test "$coroutine" = "gthread" -a "$coroutine_pool" = "yes"; then error_exit "'gthread' coroutine backend does not support pool (use --disable-coroutine-pool)" fi +if test "$debug_stack_usage" = "yes"; then + if test "$cpu" = "ia64" -o "$cpu" = "hppa"; then + error_exit "stack usage debugging is not supported for $cpu" + fi + if test "$coroutine_pool" = "yes"; then + echo "WARN: disabling coroutine pool for stack usage debugging" + coroutine_pool=no + fi +fi + + ########################################## # check if we have open_by_handle_at @@ -4916,6 +4930,7 @@ echo "QGA MSI support $guest_agent_msi" echo "seccomp support $seccomp" echo "coroutine backend $coroutine" echo "coroutine pool $coroutine_pool" +echo "debug stack usage $debug_stack_usage" echo "GlusterFS support $glusterfs" echo "Archipelago support $archipelago" echo "gcov $gcov_tool" @@ -5384,6 +5399,10 @@ else echo "CONFIG_COROUTINE_POOL=0" >> $config_host_mak fi +if test "$debug_stack_usage" = "yes" ; then + echo "CONFIG_DEBUG_STACK_USAGE=y" >> $config_host_mak +fi + if test "$open_by_handle_at" = "yes" ; then echo "CONFIG_OPEN_BY_HANDLE=y" >> $config_host_mak fi diff --git a/util/oslib-posix.c b/util/oslib-posix.c index d950c347e2..aaec1891f5 100644 --- a/util/oslib-posix.c +++ b/util/oslib-posix.c @@ -50,6 +50,10 @@ #include "qemu/mmap-alloc.h" +#ifdef CONFIG_DEBUG_STACK_USAGE +#include "qemu/error-report.h" +#endif + int qemu_get_thread_id(void) { #if defined(__linux__) @@ -503,6 +507,9 @@ pid_t qemu_fork(Error **errp) void *qemu_alloc_stack(size_t *sz) { void *ptr, *guardpage; +#ifdef CONFIG_DEBUG_STACK_USAGE + void *ptr2; +#endif size_t pagesz = getpagesize(); #ifdef _SC_THREAD_STACK_MIN /* avoid stacks smaller than _SC_THREAD_STACK_MIN */ @@ -534,10 +541,38 @@ void *qemu_alloc_stack(size_t *sz) abort(); } +#ifdef CONFIG_DEBUG_STACK_USAGE + for (ptr2 = ptr + pagesz; ptr2 < ptr + *sz; ptr2 += sizeof(uint32_t)) { + *(uint32_t *)ptr2 = 0xdeadbeaf; + } +#endif + return ptr; } +#ifdef CONFIG_DEBUG_STACK_USAGE +static __thread unsigned int max_stack_usage; +#endif + void qemu_free_stack(void *stack, size_t sz) { +#ifdef CONFIG_DEBUG_STACK_USAGE + unsigned int usage; + void *ptr; + + for (ptr = stack + getpagesize(); ptr < stack + sz; + ptr += sizeof(uint32_t)) { + if (*(uint32_t *)ptr != 0xdeadbeaf) { + break; + } + } + usage = sz - (uintptr_t) (ptr - stack); + if (usage > max_stack_usage) { + error_report("thread %d max stack usage increased from %u to %u", + qemu_get_thread_id(), max_stack_usage, usage); + max_stack_usage = usage; + } +#endif + munmap(stack, sz); }