Block layer patches

-----BEGIN PGP SIGNATURE-----
 Version: GnuPG v2.0.22 (GNU/Linux)
 
 iQIcBAABAgAGBQJX7RMCAAoJEH8JsnLIjy/Wa3YQAIZkhb0XpWSbGjS0N3VsqI+P
 IqkwXDkYf7H02sptgGJtb0uHxsIdeTUv7B5NKld7FhAC2UNqrP6IZLiZUM3/yvCB
 oWITyfM3Y35OJi+hR4I4s+8lPaOjXyonBrBBOhipMqBkkgQ7Ck+ZhSWr4lDvo9oe
 BLijZLEVRZQdimAz82VDtBFW4jwjPzj8RBLfFDI8HThrQ2DMmt0DTHWywTVi/Lkj
 4qa0lUzVXRRaroyRFzu2T2joakzgaj9TH7/o5RZS2hcEsiaj4Sd7o6+UmKlSk/7v
 JjjgFpNXDW0Lm7Hm14ldp6zQSJ4yCraFBv/edzJ7KvpD04ESWNQQm6ABHaAlfdpL
 I6M8x/8v20tPMaAA0l3KTAuWzyq9Ihu7SK4pifhPazbpl/bEUzovHm4vjV/RIdHO
 AIojs7w9vBGxOaxlQyPchTj8bqOkZbdNTClj6HEEOc6V4SIIzg4PgC4gFu8yX88u
 NxOaLQvXUSVONWGKQ3SC7PSY+vxTGRerQZ8a7TGitSZFvPD0w7QR+DYeqUPBoQtO
 m6P9+QfZ6Lzqm2UD2e2oY0GYQE08jjE5J6V4rF6lNaYO2TL2elw8Cg1lKImtggST
 K1rFfg4Fhraqk2g7MrJoa9LBkul/0G1OfcCgbWwfWyv/FA9kNyvwHCLcPLqrCd8S
 1OBjN2A6WlIS6gfCRQUt
 =6HYD
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging

Block layer patches

# gpg: Signature made Thu 29 Sep 2016 14:11:30 BST
# gpg:                using RSA key 0x7F09B272C88F2FD6
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>"
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream:
  oslib-posix: add a configure switch to debug stack usage
  coroutine-sigaltstack: use helper for allocating stack memory
  coroutine-ucontext: use helper for allocating stack memory
  coroutine: add a macro for the coroutine stack size
  coroutine-sigaltstack: rename coroutine struct appropriately
  oslib-posix: add helpers for stack alloc and free
  block: Remove qemu_root_bds_opts
  block: Move 'discard' option to bdrv_open_common()
  block: Use 'detect-zeroes' option for 'blockdev-change-medium'
  block: Parse 'detect-zeroes' in bdrv_open_common()
  block/qapi: Move 'aio' option to file driver
  block/qapi: Use separate options type for curl driver
  block: Drop aio/cache consistency check from qmp_blockdev_add()
  block: Fix error path in qmp_blockdev_change_medium()
  block-backend: remove blk_flush_all
  qemu: use bdrv_flush_all for vm_stop et al
  block: reintroduce bdrv_flush_all

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2016-10-04 14:25:08 +01:00
commit bbc4c3f4f3
21 changed files with 342 additions and 191 deletions

50
block.c
View File

@ -42,6 +42,7 @@
#include "qapi-event.h"
#include "qemu/cutils.h"
#include "qemu/id.h"
#include "qapi/util.h"
#ifdef CONFIG_BSD
#include <sys/ioctl.h>
@ -764,7 +765,7 @@ static void bdrv_inherited_options(int *child_flags, QDict *child_options,
/* Our block drivers take care to send flushes and respect unmap policy,
* so we can default to enable both on lower layers regardless of the
* corresponding parent options. */
flags |= BDRV_O_UNMAP;
qdict_set_default_str(child_options, BDRV_OPT_DISCARD, "unmap");
/* Clear flags that only apply to the top layer */
flags &= ~(BDRV_O_SNAPSHOT | BDRV_O_NO_BACKING | BDRV_O_COPY_ON_READ |
@ -954,6 +955,16 @@ static QemuOptsList bdrv_runtime_opts = {
.type = QEMU_OPT_BOOL,
.help = "Node is opened in read-only mode",
},
{
.name = "detect-zeroes",
.type = QEMU_OPT_STRING,
.help = "try to optimize zero writes (off, on, unmap)",
},
{
.name = "discard",
.type = QEMU_OPT_STRING,
.help = "discard operation (ignore/off, unmap/on)",
},
{ /* end of list */ }
},
};
@ -970,6 +981,8 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
const char *filename;
const char *driver_name = NULL;
const char *node_name = NULL;
const char *discard;
const char *detect_zeroes;
QemuOpts *opts;
BlockDriver *drv;
Error *local_err = NULL;
@ -1038,6 +1051,41 @@ static int bdrv_open_common(BlockDriverState *bs, BdrvChild *file,
}
}
discard = qemu_opt_get(opts, "discard");
if (discard != NULL) {
if (bdrv_parse_discard_flags(discard, &bs->open_flags) != 0) {
error_setg(errp, "Invalid discard option");
ret = -EINVAL;
goto fail_opts;
}
}
detect_zeroes = qemu_opt_get(opts, "detect-zeroes");
if (detect_zeroes) {
BlockdevDetectZeroesOptions value =
qapi_enum_parse(BlockdevDetectZeroesOptions_lookup,
detect_zeroes,
BLOCKDEV_DETECT_ZEROES_OPTIONS__MAX,
BLOCKDEV_DETECT_ZEROES_OPTIONS_OFF,
&local_err);
if (local_err) {
error_propagate(errp, local_err);
ret = -EINVAL;
goto fail_opts;
}
if (value == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
!(bs->open_flags & BDRV_O_UNMAP))
{
error_setg(errp, "setting detect-zeroes to unmap is not allowed "
"without setting discard operation to unmap");
ret = -EINVAL;
goto fail_opts;
}
bs->detect_zeroes = value;
}
if (filename != NULL) {
pstrcpy(bs->filename, sizeof(bs->filename), filename);
} else {

View File

@ -1592,13 +1592,12 @@ void blk_update_root_state(BlockBackend *blk)
}
/*
* Applies the information in the root state to the given BlockDriverState. This
* does not include the flags which have to be specified for bdrv_open(), use
* blk_get_open_flags_from_root_state() to inquire them.
* Returns the detect-zeroes setting to be used for bdrv_open() of a
* BlockDriverState which is supposed to inherit the root state.
*/
void blk_apply_root_state(BlockBackend *blk, BlockDriverState *bs)
bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk)
{
bs->detect_zeroes = blk->root_state.detect_zeroes;
return blk->root_state.detect_zeroes;
}
/*
@ -1640,28 +1639,6 @@ int blk_commit_all(void)
return 0;
}
int blk_flush_all(void)
{
BlockBackend *blk = NULL;
int result = 0;
while ((blk = blk_all_next(blk)) != NULL) {
AioContext *aio_context = blk_get_aio_context(blk);
int ret;
aio_context_acquire(aio_context);
if (blk_is_inserted(blk)) {
ret = blk_flush(blk);
if (ret < 0 && !result) {
result = ret;
}
}
aio_context_release(aio_context);
}
return result;
}
/* throttling disk I/O limits */
void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg)

View File

@ -1619,6 +1619,31 @@ int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
BDRV_REQ_ZERO_WRITE | flags);
}
/*
* Flush ALL BDSes regardless of if they are reachable via a BlkBackend or not.
*/
int bdrv_flush_all(void)
{
BdrvNextIterator it;
BlockDriverState *bs = NULL;
int result = 0;
for (bs = bdrv_first(&it); bs; bs = bdrv_next(&it)) {
AioContext *aio_context = bdrv_get_aio_context(bs);
int ret;
aio_context_acquire(aio_context);
ret = bdrv_flush(bs);
if (ret < 0 && !result) {
result = ret;
}
aio_context_release(aio_context);
}
return result;
}
typedef struct BdrvCoGetBlockStatusData {
BlockDriverState *bs;
BlockDriverState *base;

View File

@ -143,6 +143,7 @@ typedef struct BDRVRawState {
bool has_discard:1;
bool has_write_zeroes:1;
bool discard_zeroes:1;
bool use_linux_aio:1;
bool has_fallocate;
bool needs_alignment;
} BDRVRawState;
@ -367,18 +368,6 @@ static void raw_parse_flags(int bdrv_flags, int *open_flags)
}
}
#ifdef CONFIG_LINUX_AIO
static bool raw_use_aio(int bdrv_flags)
{
/*
* Currently Linux do AIO only for files opened with O_DIRECT
* specified so check NOCACHE flag too
*/
return (bdrv_flags & (BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO)) ==
(BDRV_O_NOCACHE|BDRV_O_NATIVE_AIO);
}
#endif
static void raw_parse_filename(const char *filename, QDict *options,
Error **errp)
{
@ -399,6 +388,11 @@ static QemuOptsList raw_runtime_opts = {
.type = QEMU_OPT_STRING,
.help = "File name of the image",
},
{
.name = "aio",
.type = QEMU_OPT_STRING,
.help = "host AIO implementation (threads, native)",
},
{ /* end of list */ }
},
};
@ -410,6 +404,7 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
QemuOpts *opts;
Error *local_err = NULL;
const char *filename = NULL;
BlockdevAioOptions aio, aio_default;
int fd, ret;
struct stat st;
@ -429,6 +424,18 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
goto fail;
}
aio_default = (bdrv_flags & BDRV_O_NATIVE_AIO)
? BLOCKDEV_AIO_OPTIONS_NATIVE
: BLOCKDEV_AIO_OPTIONS_THREADS;
aio = qapi_enum_parse(BlockdevAioOptions_lookup, qemu_opt_get(opts, "aio"),
BLOCKDEV_AIO_OPTIONS__MAX, aio_default, &local_err);
if (local_err) {
error_propagate(errp, local_err);
ret = -EINVAL;
goto fail;
}
s->use_linux_aio = (aio == BLOCKDEV_AIO_OPTIONS_NATIVE);
s->open_flags = open_flags;
raw_parse_flags(bdrv_flags, &s->open_flags);
@ -444,14 +451,15 @@ static int raw_open_common(BlockDriverState *bs, QDict *options,
s->fd = fd;
#ifdef CONFIG_LINUX_AIO
if (!raw_use_aio(bdrv_flags) && (bdrv_flags & BDRV_O_NATIVE_AIO)) {
/* Currently Linux does AIO only for files opened with O_DIRECT */
if (s->use_linux_aio && !(s->open_flags & O_DIRECT)) {
error_setg(errp, "aio=native was specified, but it requires "
"cache.direct=on, which was not specified.");
ret = -EINVAL;
goto fail;
}
#else
if (bdrv_flags & BDRV_O_NATIVE_AIO) {
if (s->use_linux_aio) {
error_setg(errp, "aio=native was specified, but is not supported "
"in this build.");
ret = -EINVAL;
@ -1256,7 +1264,7 @@ static int coroutine_fn raw_co_prw(BlockDriverState *bs, uint64_t offset,
if (!bdrv_qiov_is_aligned(bs, qiov)) {
type |= QEMU_AIO_MISALIGNED;
#ifdef CONFIG_LINUX_AIO
} else if (bs->open_flags & BDRV_O_NATIVE_AIO) {
} else if (s->use_linux_aio) {
LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
assert(qiov->size == bytes);
return laio_co_submit(bs, aio, s->fd, offset, qiov, type);
@ -1285,7 +1293,8 @@ static int coroutine_fn raw_co_pwritev(BlockDriverState *bs, uint64_t offset,
static void raw_aio_plug(BlockDriverState *bs)
{
#ifdef CONFIG_LINUX_AIO
if (bs->open_flags & BDRV_O_NATIVE_AIO) {
BDRVRawState *s = bs->opaque;
if (s->use_linux_aio) {
LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
laio_io_plug(bs, aio);
}
@ -1295,7 +1304,8 @@ static void raw_aio_plug(BlockDriverState *bs)
static void raw_aio_unplug(BlockDriverState *bs)
{
#ifdef CONFIG_LINUX_AIO
if (bs->open_flags & BDRV_O_NATIVE_AIO) {
BDRVRawState *s = bs->opaque;
if (s->use_linux_aio) {
LinuxAioState *aio = aio_get_linux_aio(bdrv_get_aio_context(bs));
laio_io_unplug(bs, aio);
}

View File

@ -32,6 +32,7 @@
#include "block/thread-pool.h"
#include "qemu/iov.h"
#include "qapi/qmp/qstring.h"
#include "qapi/util.h"
#include <windows.h>
#include <winioctl.h>
@ -252,7 +253,8 @@ static void raw_probe_alignment(BlockDriverState *bs, Error **errp)
}
}
static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped)
static void raw_parse_flags(int flags, bool use_aio, int *access_flags,
DWORD *overlapped)
{
assert(access_flags != NULL);
assert(overlapped != NULL);
@ -264,7 +266,7 @@ static void raw_parse_flags(int flags, int *access_flags, DWORD *overlapped)
}
*overlapped = FILE_ATTRIBUTE_NORMAL;
if (flags & BDRV_O_NATIVE_AIO) {
if (use_aio) {
*overlapped |= FILE_FLAG_OVERLAPPED;
}
if (flags & BDRV_O_NOCACHE) {
@ -292,10 +294,35 @@ static QemuOptsList raw_runtime_opts = {
.type = QEMU_OPT_STRING,
.help = "File name of the image",
},
{
.name = "aio",
.type = QEMU_OPT_STRING,
.help = "host AIO implementation (threads, native)",
},
{ /* end of list */ }
},
};
static bool get_aio_option(QemuOpts *opts, int flags, Error **errp)
{
BlockdevAioOptions aio, aio_default;
aio_default = (flags & BDRV_O_NATIVE_AIO) ? BLOCKDEV_AIO_OPTIONS_NATIVE
: BLOCKDEV_AIO_OPTIONS_THREADS;
aio = qapi_enum_parse(BlockdevAioOptions_lookup, qemu_opt_get(opts, "aio"),
BLOCKDEV_AIO_OPTIONS__MAX, aio_default, errp);
switch (aio) {
case BLOCKDEV_AIO_OPTIONS_NATIVE:
return true;
case BLOCKDEV_AIO_OPTIONS_THREADS:
return false;
default:
error_setg(errp, "Invalid AIO option");
}
return false;
}
static int raw_open(BlockDriverState *bs, QDict *options, int flags,
Error **errp)
{
@ -305,6 +332,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
QemuOpts *opts;
Error *local_err = NULL;
const char *filename;
bool use_aio;
int ret;
s->type = FTYPE_FILE;
@ -319,7 +347,14 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
filename = qemu_opt_get(opts, "filename");
raw_parse_flags(flags, &access_flags, &overlapped);
use_aio = get_aio_option(opts, flags, &local_err);
if (local_err) {
error_propagate(errp, local_err);
ret = -EINVAL;
goto fail;
}
raw_parse_flags(flags, use_aio, &access_flags, &overlapped);
if (filename[0] && filename[1] == ':') {
snprintf(s->drive_path, sizeof(s->drive_path), "%c:\\", filename[0]);
@ -346,7 +381,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
goto fail;
}
if (flags & BDRV_O_NATIVE_AIO) {
if (use_aio) {
s->aio = win32_aio_init();
if (s->aio == NULL) {
CloseHandle(s->hfile);
@ -647,6 +682,7 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
Error *local_err = NULL;
const char *filename;
bool use_aio;
QemuOpts *opts = qemu_opts_create(&raw_runtime_opts, NULL, 0,
&error_abort);
@ -659,6 +695,16 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
filename = qemu_opt_get(opts, "filename");
use_aio = get_aio_option(opts, flags, &local_err);
if (!local_err && use_aio) {
error_setg(&local_err, "AIO is not supported on Windows host devices");
}
if (local_err) {
error_propagate(errp, local_err);
ret = -EINVAL;
goto done;
}
if (strstart(filename, "/dev/cdrom", NULL)) {
if (find_cdrom(device_name, sizeof(device_name)) < 0) {
error_setg(errp, "Could not open CD-ROM drive");
@ -677,7 +723,7 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags,
}
s->type = find_device_type(bs, filename);
raw_parse_flags(flags, &access_flags, &overlapped);
raw_parse_flags(flags, use_aio, &access_flags, &overlapped);
create_flags = OPEN_EXISTING;

View File

@ -356,7 +356,6 @@ static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags,
const char **throttling_group, ThrottleConfig *throttle_cfg,
BlockdevDetectZeroesOptions *detect_zeroes, Error **errp)
{
const char *discard;
Error *local_error = NULL;
const char *aio;
@ -365,13 +364,6 @@ static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags,
*bdrv_flags |= BDRV_O_COPY_ON_READ;
}
if ((discard = qemu_opt_get(opts, "discard")) != NULL) {
if (bdrv_parse_discard_flags(discard, bdrv_flags) != 0) {
error_setg(errp, "Invalid discard option");
return;
}
}
if ((aio = qemu_opt_get(opts, "aio")) != NULL) {
if (!strcmp(aio, "native")) {
*bdrv_flags |= BDRV_O_NATIVE_AIO;
@ -449,15 +441,6 @@ static void extract_common_blockdev_options(QemuOpts *opts, int *bdrv_flags,
error_propagate(errp, local_error);
return;
}
if (bdrv_flags &&
*detect_zeroes == BLOCKDEV_DETECT_ZEROES_OPTIONS_UNMAP &&
!(*bdrv_flags & BDRV_O_UNMAP))
{
error_setg(errp, "setting detect-zeroes to unmap is not allowed "
"without setting discard operation to unmap");
return;
}
}
}
@ -650,35 +633,11 @@ err_no_opts:
return NULL;
}
static QemuOptsList qemu_root_bds_opts;
/* Takes the ownership of bs_opts */
static BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
{
BlockDriverState *bs;
QemuOpts *opts;
Error *local_error = NULL;
BlockdevDetectZeroesOptions detect_zeroes;
int bdrv_flags = 0;
opts = qemu_opts_create(&qemu_root_bds_opts, NULL, 1, errp);
if (!opts) {
goto fail;
}
qemu_opts_absorb_qdict(opts, bs_opts, &local_error);
if (local_error) {
error_propagate(errp, local_error);
goto fail;
}
extract_common_blockdev_options(opts, &bdrv_flags, NULL, NULL,
&detect_zeroes, &local_error);
if (local_error) {
error_propagate(errp, local_error);
goto fail;
}
/* bdrv_open() defaults to the values in bdrv_flags (for compatibility
* with other callers) rather than what we want as the real defaults.
* Apply the defaults here instead. */
@ -690,21 +649,7 @@ static BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
bdrv_flags |= BDRV_O_INACTIVE;
}
bs = bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp);
if (!bs) {
goto fail_no_bs_opts;
}
bs->detect_zeroes = detect_zeroes;
fail_no_bs_opts:
qemu_opts_del(opts);
return bs;
fail:
qemu_opts_del(opts);
QDECREF(bs_opts);
return NULL;
return bdrv_open(NULL, NULL, bs_opts, bdrv_flags, errp);
}
void blockdev_close_all_bdrv_states(void)
@ -2549,6 +2494,7 @@ void qmp_blockdev_change_medium(bool has_device, const char *device,
BlockBackend *blk;
BlockDriverState *medium_bs = NULL;
int bdrv_flags;
bool detect_zeroes;
int rc;
QDict *options = NULL;
Error *err = NULL;
@ -2588,8 +2534,12 @@ void qmp_blockdev_change_medium(bool has_device, const char *device,
abort();
}
options = qdict_new();
detect_zeroes = blk_get_detect_zeroes_from_root_state(blk);
qdict_put(options, "detect-zeroes",
qstring_from_str(detect_zeroes ? "on" : "off"));
if (has_format) {
options = qdict_new();
qdict_put(options, "driver", qstring_from_str(format));
}
@ -2614,7 +2564,7 @@ void qmp_blockdev_change_medium(bool has_device, const char *device,
error_free(err);
err = NULL;
qmp_x_blockdev_remove_medium(has_device, device, has_id, id, errp);
qmp_x_blockdev_remove_medium(has_device, device, has_id, id, &err);
if (err) {
error_propagate(errp, err);
goto fail;
@ -2626,8 +2576,6 @@ void qmp_blockdev_change_medium(bool has_device, const char *device,
goto fail;
}
blk_apply_root_state(blk, medium_bs);
qmp_blockdev_close_tray(has_device, device, has_id, id, errp);
fail:
@ -3832,21 +3780,6 @@ void qmp_blockdev_add(BlockdevOptions *options, Error **errp)
QDict *qdict;
Error *local_err = NULL;
/* TODO Sort it out in raw-posix and drive_new(): Reject aio=native with
* cache.direct=false instead of silently switching to aio=threads, except
* when called from drive_new().
*
* For now, simply forbidding the combination for all drivers will do. */
if (options->has_aio && options->aio == BLOCKDEV_AIO_OPTIONS_NATIVE) {
bool direct = options->has_cache &&
options->cache->has_direct &&
options->cache->direct;
if (!direct) {
error_setg(errp, "aio=native requires cache.direct=true");
goto fail;
}
}
visit_type_BlockdevOptions(v, NULL, &options, &local_err);
if (local_err) {
error_propagate(errp, local_err);
@ -4004,10 +3937,6 @@ QemuOptsList qemu_common_drive_opts = {
.name = "snapshot",
.type = QEMU_OPT_BOOL,
.help = "enable/disable snapshot mode",
},{
.name = "discard",
.type = QEMU_OPT_STRING,
.help = "discard operation (ignore/off, unmap/on)",
},{
.name = "aio",
.type = QEMU_OPT_STRING,
@ -4135,31 +4064,6 @@ QemuOptsList qemu_common_drive_opts = {
},
};
static QemuOptsList qemu_root_bds_opts = {
.name = "root-bds",
.head = QTAILQ_HEAD_INITIALIZER(qemu_root_bds_opts.head),
.desc = {
{
.name = "discard",
.type = QEMU_OPT_STRING,
.help = "discard operation (ignore/off, unmap/on)",
},{
.name = "aio",
.type = QEMU_OPT_STRING,
.help = "host AIO implementation (threads, native)",
},{
.name = "copy-on-read",
.type = QEMU_OPT_BOOL,
.help = "copy read data from backing file into image file",
},{
.name = "detect-zeroes",
.type = QEMU_OPT_STRING,
.help = "try to optimize zero writes (off, on, unmap)",
},
{ /* end of list */ }
},
};
QemuOptsList qemu_drive_opts = {
.name = "drive",
.head = QTAILQ_HEAD_INITIALIZER(qemu_drive_opts.head),

19
configure vendored
View File

@ -296,6 +296,7 @@ libiscsi=""
libnfs=""
coroutine=""
coroutine_pool=""
debug_stack_usage="no"
seccomp=""
glusterfs=""
glusterfs_xlator_opt="no"
@ -1004,6 +1005,8 @@ for opt do
;;
--enable-coroutine-pool) coroutine_pool="yes"
;;
--enable-debug-stack-usage) debug_stack_usage="yes"
;;
--disable-docs) docs="no"
;;
--enable-docs) docs="yes"
@ -4331,6 +4334,17 @@ if test "$coroutine" = "gthread" -a "$coroutine_pool" = "yes"; then
error_exit "'gthread' coroutine backend does not support pool (use --disable-coroutine-pool)"
fi
if test "$debug_stack_usage" = "yes"; then
if test "$cpu" = "ia64" -o "$cpu" = "hppa"; then
error_exit "stack usage debugging is not supported for $cpu"
fi
if test "$coroutine_pool" = "yes"; then
echo "WARN: disabling coroutine pool for stack usage debugging"
coroutine_pool=no
fi
fi
##########################################
# check if we have open_by_handle_at
@ -4916,6 +4930,7 @@ echo "QGA MSI support $guest_agent_msi"
echo "seccomp support $seccomp"
echo "coroutine backend $coroutine"
echo "coroutine pool $coroutine_pool"
echo "debug stack usage $debug_stack_usage"
echo "GlusterFS support $glusterfs"
echo "Archipelago support $archipelago"
echo "gcov $gcov_tool"
@ -5384,6 +5399,10 @@ else
echo "CONFIG_COROUTINE_POOL=0" >> $config_host_mak
fi
if test "$debug_stack_usage" = "yes" ; then
echo "CONFIG_DEBUG_STACK_USAGE=y" >> $config_host_mak
fi
if test "$open_by_handle_at" = "yes" ; then
echo "CONFIG_OPEN_BY_HANDLE=y" >> $config_host_mak
fi

4
cpus.c
View File

@ -751,7 +751,7 @@ static int do_vm_stop(RunState state)
bdrv_drain_all();
replay_disable_events();
ret = blk_flush_all();
ret = bdrv_flush_all();
return ret;
}
@ -1408,7 +1408,7 @@ int vm_stop_force_state(RunState state)
bdrv_drain_all();
/* Make sure to return an error if the flush in a previous vm_stop()
* failed. */
return blk_flush_all();
return bdrv_flush_all();
}
}

View File

@ -134,8 +134,6 @@ static void platform_fixed_ioport_writew(void *opaque, uint32_t addr, uint32_t v
devices, and bit 2 the non-primary-master IDE devices. */
if (val & UNPLUG_ALL_IDE_DISKS) {
DPRINTF("unplug disks\n");
blk_drain_all();
blk_flush_all();
pci_unplug_disks(pci_dev->bus);
}
if (val & UNPLUG_ALL_NICS) {

View File

@ -179,6 +179,10 @@ int pci_piix3_xen_ide_unplug(DeviceState *dev)
if (di != NULL && !di->media_cd) {
BlockBackend *blk = blk_by_legacy_dinfo(di);
DeviceState *ds = blk_get_attached_dev(blk);
blk_drain(blk);
blk_flush(blk);
if (ds) {
blk_detach_dev(blk, ds);
}

View File

@ -108,6 +108,7 @@ typedef struct HDGeometry {
#define BDRV_OPT_CACHE_DIRECT "cache.direct"
#define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush"
#define BDRV_OPT_READ_ONLY "read-only"
#define BDRV_OPT_DISCARD "discard"
#define BDRV_SECTOR_BITS 9
@ -333,6 +334,7 @@ int bdrv_inactivate_all(void);
/* Ensure contents are flushed to disk. */
int bdrv_flush(BlockDriverState *bs);
int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
int bdrv_flush_all(void);
void bdrv_close_all(void);
void bdrv_drain(BlockDriverState *bs);
void coroutine_fn bdrv_co_drain(BlockDriverState *bs);

View File

@ -28,6 +28,8 @@
#include "qemu/queue.h"
#include "qemu/coroutine.h"
#define COROUTINE_STACK_SIZE (1 << 20)
typedef enum {
COROUTINE_YIELD = 1,
COROUTINE_TERMINATE = 2,

View File

@ -152,7 +152,6 @@ BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
int blk_co_pdiscard(BlockBackend *blk, int64_t offset, int count);
int blk_co_flush(BlockBackend *blk);
int blk_flush(BlockBackend *blk);
int blk_flush_all(void);
int blk_commit_all(void);
void blk_drain(BlockBackend *blk);
void blk_drain_all(void);
@ -199,7 +198,7 @@ void blk_io_unplug(BlockBackend *blk);
BlockAcctStats *blk_get_stats(BlockBackend *blk);
BlockBackendRootState *blk_get_root_state(BlockBackend *blk);
void blk_update_root_state(BlockBackend *blk);
void blk_apply_root_state(BlockBackend *blk, BlockDriverState *bs);
bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk);
int blk_get_open_flags_from_root_state(BlockBackend *blk);
void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,

View File

@ -60,4 +60,31 @@ int qemu_utimens(const char *path, const qemu_timespec *times);
bool is_daemonized(void);
/**
* qemu_alloc_stack:
* @sz: pointer to a size_t holding the requested usable stack size
*
* Allocate memory that can be used as a stack, for instance for
* coroutines. If the memory cannot be allocated, this function
* will abort (like g_malloc()). This function also inserts an
* additional guard page to catch a potential stack overflow.
* Note that the memory required for the guard page and alignment
* and minimal stack size restrictions will increase the value of sz.
*
* The allocated stack must be freed with qemu_free_stack().
*
* Returns: pointer to (the lowest address of) the stack memory.
*/
void *qemu_alloc_stack(size_t *sz);
/**
* qemu_free_stack:
* @stack: stack to free
* @sz: size of stack in bytes
*
* Free a stack allocated via qemu_alloc_stack(). Note that sz must
* be exactly the adjusted stack size returned by qemu_alloc_stack.
*/
void qemu_free_stack(void *stack, size_t sz);
#endif

View File

@ -1721,15 +1721,16 @@
##
# @BlockdevOptionsFile
#
# Driver specific block device options for the file backend and similar
# protocols.
# Driver specific block device options for the file backend.
#
# @filename: path to the image file
# @aio: #optional AIO backend (default: threads) (since: 2.8)
#
# Since: 1.7
##
{ 'struct': 'BlockdevOptionsFile',
'data': { 'filename': 'str' } }
'data': { 'filename': 'str',
'*aio': 'BlockdevAioOptions' } }
##
# @BlockdevOptionsNull
@ -2210,6 +2211,18 @@
'data': { 'mode': 'ReplicationMode',
'*top-id': 'str' } }
##
# @BlockdevOptionsCurl
#
# Driver specific block device options for the curl backend.
#
# @filename: path to the image file
#
# Since: 1.7
##
{ 'struct': 'BlockdevOptionsCurl',
'data': { 'filename': 'str' } }
##
# @BlockdevOptions
#
@ -2221,7 +2234,6 @@
# This option is required on the top level of blockdev-add.
# @discard: #optional discard-related options (default: ignore)
# @cache: #optional cache-related options
# @aio: #optional AIO backend (default: threads)
# @read-only: #optional whether the block device should be read-only
# (default: false)
# @detect-zeroes: #optional detect and optimize zero writes (Since 2.1)
@ -2236,7 +2248,6 @@
'*node-name': 'str',
'*discard': 'BlockdevDiscardOptions',
'*cache': 'BlockdevCacheOptions',
'*aio': 'BlockdevAioOptions',
'*read-only': 'bool',
'*detect-zeroes': 'BlockdevDetectZeroesOptions' },
'discriminator': 'driver',
@ -2248,13 +2259,13 @@
'cloop': 'BlockdevOptionsGenericFormat',
'dmg': 'BlockdevOptionsGenericFormat',
'file': 'BlockdevOptionsFile',
'ftp': 'BlockdevOptionsFile',
'ftps': 'BlockdevOptionsFile',
'ftp': 'BlockdevOptionsCurl',
'ftps': 'BlockdevOptionsCurl',
'gluster': 'BlockdevOptionsGluster',
'host_cdrom': 'BlockdevOptionsFile',
'host_device':'BlockdevOptionsFile',
'http': 'BlockdevOptionsFile',
'https': 'BlockdevOptionsFile',
'http': 'BlockdevOptionsCurl',
'https': 'BlockdevOptionsCurl',
# TODO iscsi: Wait for structured options
'luks': 'BlockdevOptionsLUKS',
# TODO nbd: Should take InetSocketAddress for 'host'?
@ -2271,7 +2282,7 @@
'replication':'BlockdevOptionsReplication',
# TODO sheepdog: Wait for structured options
# TODO ssh: Should take InetSocketAddress for 'host'?
'tftp': 'BlockdevOptionsFile',
'tftp': 'BlockdevOptionsCurl',
'vdi': 'BlockdevOptionsGenericFormat',
'vhdx': 'BlockdevOptionsGenericFormat',
'vmdk': 'BlockdevOptionsGenericCOWFormat',

View File

@ -117,10 +117,10 @@ run_qemu <<EOF
"options": {
"driver": "$IMGFMT",
"node-name": "disk",
"aio": "native",
"file": {
"driver": "file",
"filename": "$TEST_IMG"
"filename": "$TEST_IMG",
"aio": "native"
}
}
}

View File

@ -27,7 +27,7 @@ QMP_VERSION
Testing:
QMP_VERSION
{"return": {}}
{"error": {"class": "GenericError", "desc": "aio=native requires cache.direct=true"}}
{"error": {"class": "GenericError", "desc": "aio=native was specified, but it requires cache.direct=on, which was not specified."}}
{"return": {}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN"}

View File

@ -33,8 +33,9 @@
typedef struct {
Coroutine base;
void *stack;
size_t stack_size;
sigjmp_buf env;
} CoroutineUContext;
} CoroutineSigAltStack;
/**
* Per-thread coroutine bookkeeping
@ -44,7 +45,7 @@ typedef struct {
Coroutine *current;
/** The default coroutine */
CoroutineUContext leader;
CoroutineSigAltStack leader;
/** Information for the signal handler (trampoline) */
sigjmp_buf tr_reenter;
@ -89,7 +90,7 @@ static void __attribute__((constructor)) coroutine_init(void)
* (from the signal handler when it is not signal handling, read ahead
* for more information).
*/
static void coroutine_bootstrap(CoroutineUContext *self, Coroutine *co)
static void coroutine_bootstrap(CoroutineSigAltStack *self, Coroutine *co)
{
/* Initialize longjmp environment and switch back the caller */
if (!sigsetjmp(self->env, 0)) {
@ -109,7 +110,7 @@ static void coroutine_bootstrap(CoroutineUContext *self, Coroutine *co)
*/
static void coroutine_trampoline(int signal)
{
CoroutineUContext *self;
CoroutineSigAltStack *self;
Coroutine *co;
CoroutineThreadState *coTS;
@ -143,8 +144,7 @@ static void coroutine_trampoline(int signal)
Coroutine *qemu_coroutine_new(void)
{
const size_t stack_size = 1 << 20;
CoroutineUContext *co;
CoroutineSigAltStack *co;
CoroutineThreadState *coTS;
struct sigaction sa;
struct sigaction osa;
@ -164,7 +164,8 @@ Coroutine *qemu_coroutine_new(void)
*/
co = g_malloc0(sizeof(*co));
co->stack = g_malloc(stack_size);
co->stack_size = COROUTINE_STACK_SIZE;
co->stack = qemu_alloc_stack(&co->stack_size);
co->base.entry_arg = &old_env; /* stash away our jmp_buf */
coTS = coroutine_get_thread_state();
@ -189,7 +190,7 @@ Coroutine *qemu_coroutine_new(void)
* Set the new stack.
*/
ss.ss_sp = co->stack;
ss.ss_size = stack_size;
ss.ss_size = co->stack_size;
ss.ss_flags = 0;
if (sigaltstack(&ss, &oss) < 0) {
abort();
@ -251,17 +252,17 @@ Coroutine *qemu_coroutine_new(void)
void qemu_coroutine_delete(Coroutine *co_)
{
CoroutineUContext *co = DO_UPCAST(CoroutineUContext, base, co_);
CoroutineSigAltStack *co = DO_UPCAST(CoroutineSigAltStack, base, co_);
g_free(co->stack);
qemu_free_stack(co->stack, co->stack_size);
g_free(co);
}
CoroutineAction qemu_coroutine_switch(Coroutine *from_, Coroutine *to_,
CoroutineAction action)
{
CoroutineUContext *from = DO_UPCAST(CoroutineUContext, base, from_);
CoroutineUContext *to = DO_UPCAST(CoroutineUContext, base, to_);
CoroutineSigAltStack *from = DO_UPCAST(CoroutineSigAltStack, base, from_);
CoroutineSigAltStack *to = DO_UPCAST(CoroutineSigAltStack, base, to_);
CoroutineThreadState *s = coroutine_get_thread_state();
int ret;

View File

@ -34,6 +34,7 @@
typedef struct {
Coroutine base;
void *stack;
size_t stack_size;
sigjmp_buf env;
#ifdef CONFIG_VALGRIND_H
@ -82,7 +83,6 @@ static void coroutine_trampoline(int i0, int i1)
Coroutine *qemu_coroutine_new(void)
{
const size_t stack_size = 1 << 20;
CoroutineUContext *co;
ucontext_t old_uc, uc;
sigjmp_buf old_env;
@ -101,17 +101,18 @@ Coroutine *qemu_coroutine_new(void)
}
co = g_malloc0(sizeof(*co));
co->stack = g_malloc(stack_size);
co->stack_size = COROUTINE_STACK_SIZE;
co->stack = qemu_alloc_stack(&co->stack_size);
co->base.entry_arg = &old_env; /* stash away our jmp_buf */
uc.uc_link = &old_uc;
uc.uc_stack.ss_sp = co->stack;
uc.uc_stack.ss_size = stack_size;
uc.uc_stack.ss_size = co->stack_size;
uc.uc_stack.ss_flags = 0;
#ifdef CONFIG_VALGRIND_H
co->valgrind_stack_id =
VALGRIND_STACK_REGISTER(co->stack, co->stack + stack_size);
VALGRIND_STACK_REGISTER(co->stack, co->stack + co->stack_size);
#endif
arg.p = co;
@ -149,7 +150,7 @@ void qemu_coroutine_delete(Coroutine *co_)
valgrind_stack_deregister(co);
#endif
g_free(co->stack);
qemu_free_stack(co->stack, co->stack_size);
g_free(co);
}

View File

@ -71,7 +71,7 @@ static void CALLBACK coroutine_trampoline(void *co_)
Coroutine *qemu_coroutine_new(void)
{
const size_t stack_size = 1 << 20;
const size_t stack_size = COROUTINE_STACK_SIZE;
CoroutineWin32 *co;
co = g_malloc0(sizeof(*co));

View File

@ -50,6 +50,10 @@
#include "qemu/mmap-alloc.h"
#ifdef CONFIG_DEBUG_STACK_USAGE
#include "qemu/error-report.h"
#endif
int qemu_get_thread_id(void)
{
#if defined(__linux__)
@ -499,3 +503,76 @@ pid_t qemu_fork(Error **errp)
}
return pid;
}
void *qemu_alloc_stack(size_t *sz)
{
void *ptr, *guardpage;
#ifdef CONFIG_DEBUG_STACK_USAGE
void *ptr2;
#endif
size_t pagesz = getpagesize();
#ifdef _SC_THREAD_STACK_MIN
/* avoid stacks smaller than _SC_THREAD_STACK_MIN */
long min_stack_sz = sysconf(_SC_THREAD_STACK_MIN);
*sz = MAX(MAX(min_stack_sz, 0), *sz);
#endif
/* adjust stack size to a multiple of the page size */
*sz = ROUND_UP(*sz, pagesz);
/* allocate one extra page for the guard page */
*sz += pagesz;
ptr = mmap(NULL, *sz, PROT_READ | PROT_WRITE,
MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
if (ptr == MAP_FAILED) {
abort();
}
#if defined(HOST_IA64)
/* separate register stack */
guardpage = ptr + (((*sz - pagesz) / 2) & ~pagesz);
#elif defined(HOST_HPPA)
/* stack grows up */
guardpage = ptr + *sz - pagesz;
#else
/* stack grows down */
guardpage = ptr;
#endif
if (mprotect(guardpage, pagesz, PROT_NONE) != 0) {
abort();
}
#ifdef CONFIG_DEBUG_STACK_USAGE
for (ptr2 = ptr + pagesz; ptr2 < ptr + *sz; ptr2 += sizeof(uint32_t)) {
*(uint32_t *)ptr2 = 0xdeadbeaf;
}
#endif
return ptr;
}
#ifdef CONFIG_DEBUG_STACK_USAGE
static __thread unsigned int max_stack_usage;
#endif
void qemu_free_stack(void *stack, size_t sz)
{
#ifdef CONFIG_DEBUG_STACK_USAGE
unsigned int usage;
void *ptr;
for (ptr = stack + getpagesize(); ptr < stack + sz;
ptr += sizeof(uint32_t)) {
if (*(uint32_t *)ptr != 0xdeadbeaf) {
break;
}
}
usage = sz - (uintptr_t) (ptr - stack);
if (usage > max_stack_usage) {
error_report("thread %d max stack usage increased from %u to %u",
qemu_get_thread_id(), max_stack_usage, usage);
max_stack_usage = usage;
}
#endif
munmap(stack, sz);
}