Block layer patches

- qemu-storage-daemon: Add --daemonize
 - Fix x-blockdev-amend and block node activation code which incorrectly
   executed code in the iothread that must run in the main thread.
 - Add macros for coroutine-safe TLS variables (required for correctness
   with LTO)
 - Fix crashes with concurrent I/O and bdrv_refresh_limits()
 - Split block APIs in global state and I/O
 - iotests: Don't refuse to run at all without GNU sed, just skip tests
   that need it
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCAAvFiEE3D3rFZqa+V09dFb+fwmycsiPL9YFAmIiSecRHGt3b2xmQHJl
 ZGhhdC5jb20ACgkQfwmycsiPL9aSMBAAhS1FLwiUPJ5zsRlYkFiJ76M5AEJPNgYT
 F3QqBxJa4d/rR8Hibx0p6bFU21QKIat2OIkepcaVGh8oOM8/8DKx1dUlhQt3IOQq
 yTJ5klBTxQtnBYapEsZC1bcRgRhLXbhjsXtJluzJrfvIYO0BPdVmpetTY4vJ7v79
 U2lYImHkUYZ3xH84qXj3ymfURyBc8LpjmMwWrCaEkjxcwfgb1fOeZuGEy7B387aL
 zpYE2oKjSSI20TTbJ+VsPgf2CglmTRl2kILnWP0tFjh5clpozkXAJ/0WW/TwgQgJ
 20Blvxk4inSfkMxHPdW0ttoBfW+WqftFFh1t0xqeUn6AfQFJkpQ93RmWk4rpKc8k
 rVcXIO54sYNEcJfkofs0m7N6rDk5HBq1WA7wt5veWBeNeoKWALcqjFSlr52FofJr
 bcCFnf/DRrGJ9XSi0XDqAqJeuqcGARVViqJZL3jUm+7VuLYcdA7d1wVUzuPUdv+0
 KdANzzoLaGR8xNbB+NqRBuzOcxoXYRZWbKH5i2XDk+FCwl5qcg/XalsAcM0bwXPL
 moRkH7csqrnD4cBZDSToZoi/iNdlynSIZmI8pL5Tr9btPODBF8lQEiPtJziSHReo
 v7S1nR0Q6NNOpuZUMzLJJoPcm+uy7n672SAoWhpbvh0NTdW9msxtqY2KGCKjJH8l
 f5zp/zljV0Y=
 =Jdal
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/kwolf-gitlab/tags/for-upstream' into staging

Block layer patches

- qemu-storage-daemon: Add --daemonize
- Fix x-blockdev-amend and block node activation code which incorrectly
  executed code in the iothread that must run in the main thread.
- Add macros for coroutine-safe TLS variables (required for correctness
  with LTO)
- Fix crashes with concurrent I/O and bdrv_refresh_limits()
- Split block APIs in global state and I/O
- iotests: Don't refuse to run at all without GNU sed, just skip tests
  that need it

# gpg: Signature made Fri 04 Mar 2022 17:18:31 GMT
# gpg:                using RSA key DC3DEB159A9AF95D3D7456FE7F09B272C88F2FD6
# gpg:                issuer "kwolf@redhat.com"
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full]
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kwolf-gitlab/tags/for-upstream: (50 commits)
  block/amend: Keep strong reference to BDS
  block/amend: Always call .bdrv_amend_clean()
  tests/qemu-iotests: Rework the checks and spots using GNU sed
  iotests/graph-changes-while-io: New test
  iotests: Allow using QMP with the QSD
  block: Make bdrv_refresh_limits() non-recursive
  job.h: assertions in the callers of JobDriver function pointers
  job.h: split function pointers in JobDriver
  block-backend-common.h: split function pointers in BlockDevOps
  block_int-common.h: assertions in the callers of BdrvChildClass function pointers
  block_int-common.h: split function pointers in BdrvChildClass
  block_int-common.h: assertions in the callers of BlockDriver function pointers
  block_int-common.h: split function pointers in BlockDriver
  block/coroutines: I/O and "I/O or GS" API
  block/copy-before-write.h: global state API + assertions
  include/block/snapshot: global state API + assertions
  assertions for blockdev.h global state API
  include/sysemu/blockdev.h: global state API
  assertions for blockjob.h global state API
  include/block/blockjob.h: global state API
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2022-03-05 10:59:03 +00:00
commit d7e2fe4aac
75 changed files with 4864 additions and 2827 deletions

321
block.c

File diff suppressed because it is too large Load Diff

View File

@ -53,10 +53,31 @@ static int coroutine_fn blockdev_amend_run(Job *job, Error **errp)
return ret;
}
static int blockdev_amend_pre_run(BlockdevAmendJob *s, Error **errp)
{
if (s->bs->drv->bdrv_amend_pre_run) {
return s->bs->drv->bdrv_amend_pre_run(s->bs, errp);
}
return 0;
}
static void blockdev_amend_free(Job *job)
{
BlockdevAmendJob *s = container_of(job, BlockdevAmendJob, common);
if (s->bs->drv->bdrv_amend_clean) {
s->bs->drv->bdrv_amend_clean(s->bs);
}
bdrv_unref(s->bs);
}
static const JobDriver blockdev_amend_job_driver = {
.instance_size = sizeof(BlockdevAmendJob),
.job_type = JOB_TYPE_AMEND,
.run = blockdev_amend_run,
.free = blockdev_amend_free,
};
void qmp_x_blockdev_amend(const char *job_id,
@ -110,8 +131,15 @@ void qmp_x_blockdev_amend(const char *job_id,
return;
}
bdrv_ref(bs);
s->bs = bs,
s->opts = QAPI_CLONE(BlockdevAmendOptions, options),
s->force = has_force ? force : false;
if (blockdev_amend_pre_run(s, errp)) {
job_early_fail(&s->common);
return;
}
job_start(&s->common);
}

View File

@ -372,6 +372,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
assert(bs);
assert(target);
GLOBAL_STATE_CODE();
/* QMP interface protects us from these cases */
assert(sync_mode != MIRROR_SYNC_MODE_INCREMENTAL);

File diff suppressed because it is too large Load Diff

View File

@ -253,6 +253,8 @@ void commit_start(const char *job_id, BlockDriverState *bs,
uint64_t base_perms, iter_shared_perms;
int ret;
GLOBAL_STATE_CODE();
assert(top != bs);
if (bdrv_skip_filters(top) == bdrv_skip_filters(base)) {
error_setg(errp, "Invalid files for merge: top and base are the same");
@ -432,6 +434,8 @@ int bdrv_commit(BlockDriverState *bs)
QEMU_AUTO_VFREE uint8_t *buf = NULL;
Error *local_err = NULL;
GLOBAL_STATE_CODE();
if (!drv)
return -ENOMEDIUM;

View File

@ -223,6 +223,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
QDict *opts;
assert(source->total_sectors == target->total_sectors);
GLOBAL_STATE_CODE();
opts = qdict_new();
qdict_put_str(opts, "driver", "copy-before-write");
@ -245,6 +246,7 @@ BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
void bdrv_cbw_drop(BlockDriverState *bs)
{
GLOBAL_STATE_CODE();
bdrv_drop_filter(bs, &error_abort);
bdrv_unref(bs);
}

View File

@ -29,6 +29,13 @@
#include "block/block_int.h"
#include "block/block-copy.h"
/*
* Global state (GS) API. These functions run under the BQL.
*
* See include/block/block-global-state.h for more information about
* the GS API.
*/
BlockDriverState *bdrv_cbw_append(BlockDriverState *source,
BlockDriverState *target,
const char *filter_node_name,

View File

@ -30,17 +30,17 @@
/* For blk_bs() in generated block/block-gen.c */
#include "sysemu/block-backend.h"
/*
* I/O API functions. These functions are thread-safe.
*
* See include/block/block-io.h for more information about
* the I/O API.
*/
int coroutine_fn bdrv_co_check(BlockDriverState *bs,
BdrvCheckResult *res, BdrvCheckMode fix);
int coroutine_fn bdrv_co_invalidate_cache(BlockDriverState *bs, Error **errp);
int generated_co_wrapper
bdrv_preadv(BdrvChild *child, int64_t offset, unsigned int bytes,
QEMUIOVector *qiov, BdrvRequestFlags flags);
int generated_co_wrapper
bdrv_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes,
QEMUIOVector *qiov, BdrvRequestFlags flags);
int coroutine_fn
bdrv_co_common_block_status_above(BlockDriverState *bs,
BlockDriverState *base,
@ -52,6 +52,51 @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
int64_t *map,
BlockDriverState **file,
int *depth);
int coroutine_fn bdrv_co_readv_vmstate(BlockDriverState *bs,
QEMUIOVector *qiov, int64_t pos);
int coroutine_fn bdrv_co_writev_vmstate(BlockDriverState *bs,
QEMUIOVector *qiov, int64_t pos);
int coroutine_fn
nbd_co_do_establish_connection(BlockDriverState *bs, Error **errp);
int coroutine_fn
blk_co_do_preadv(BlockBackend *blk, int64_t offset, int64_t bytes,
QEMUIOVector *qiov, BdrvRequestFlags flags);
int coroutine_fn
blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes,
QEMUIOVector *qiov, size_t qiov_offset,
BdrvRequestFlags flags);
int coroutine_fn
blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf);
int coroutine_fn
blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes);
int coroutine_fn blk_co_do_flush(BlockBackend *blk);
/*
* "I/O or GS" API functions. These functions can run without
* the BQL, but only in one specific iothread/main loop.
*
* See include/block/block-io.h for more information about
* the "I/O or GS" API.
*/
int generated_co_wrapper
bdrv_preadv(BdrvChild *child, int64_t offset, unsigned int bytes,
QEMUIOVector *qiov, BdrvRequestFlags flags);
int generated_co_wrapper
bdrv_pwritev(BdrvChild *child, int64_t offset, unsigned int bytes,
QEMUIOVector *qiov, BdrvRequestFlags flags);
int generated_co_wrapper
bdrv_common_block_status_above(BlockDriverState *bs,
BlockDriverState *base,
@ -63,46 +108,24 @@ bdrv_common_block_status_above(BlockDriverState *bs,
int64_t *map,
BlockDriverState **file,
int *depth);
int coroutine_fn bdrv_co_readv_vmstate(BlockDriverState *bs,
QEMUIOVector *qiov, int64_t pos);
int coroutine_fn bdrv_co_writev_vmstate(BlockDriverState *bs,
QEMUIOVector *qiov, int64_t pos);
int generated_co_wrapper
nbd_do_establish_connection(BlockDriverState *bs, Error **errp);
int coroutine_fn
nbd_co_do_establish_connection(BlockDriverState *bs, Error **errp);
int generated_co_wrapper
blk_do_preadv(BlockBackend *blk, int64_t offset, int64_t bytes,
QEMUIOVector *qiov, BdrvRequestFlags flags);
int coroutine_fn
blk_co_do_preadv(BlockBackend *blk, int64_t offset, int64_t bytes,
QEMUIOVector *qiov, BdrvRequestFlags flags);
int generated_co_wrapper
blk_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes,
QEMUIOVector *qiov, size_t qiov_offset,
BdrvRequestFlags flags);
int coroutine_fn
blk_co_do_pwritev_part(BlockBackend *blk, int64_t offset, int64_t bytes,
QEMUIOVector *qiov, size_t qiov_offset,
BdrvRequestFlags flags);
int generated_co_wrapper
blk_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf);
int coroutine_fn
blk_co_do_ioctl(BlockBackend *blk, unsigned long int req, void *buf);
int generated_co_wrapper
blk_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes);
int coroutine_fn
blk_co_do_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes);
int generated_co_wrapper blk_do_flush(BlockBackend *blk);
int coroutine_fn blk_co_do_flush(BlockBackend *blk);
#endif /* BLOCK_COROUTINES_INT_H */

View File

@ -42,6 +42,8 @@ static int coroutine_fn blockdev_create_run(Job *job, Error **errp)
BlockdevCreateJob *s = container_of(job, BlockdevCreateJob, common);
int ret;
GLOBAL_STATE_CODE();
job_progress_set_remaining(&s->common, 1);
ret = s->drv->bdrv_co_create(s->opts, errp);
job_progress_update(&s->common, 1);

View File

@ -777,6 +777,37 @@ block_crypto_get_specific_info_luks(BlockDriverState *bs, Error **errp)
return spec_info;
}
static int
block_crypto_amend_prepare(BlockDriverState *bs, Error **errp)
{
BlockCrypto *crypto = bs->opaque;
int ret;
/* apply for exclusive read/write permissions to the underlying file */
crypto->updating_keys = true;
ret = bdrv_child_refresh_perms(bs, bs->file, errp);
if (ret < 0) {
/* Well, in this case we will not be updating any keys */
crypto->updating_keys = false;
}
return ret;
}
static void
block_crypto_amend_cleanup(BlockDriverState *bs)
{
BlockCrypto *crypto = bs->opaque;
Error *errp = NULL;
/* release exclusive read/write permissions to the underlying file */
crypto->updating_keys = false;
bdrv_child_refresh_perms(bs, bs->file, &errp);
if (errp) {
error_report_err(errp);
}
}
static int
block_crypto_amend_options_generic_luks(BlockDriverState *bs,
QCryptoBlockAmendOptions *amend_options,
@ -784,30 +815,17 @@ block_crypto_amend_options_generic_luks(BlockDriverState *bs,
Error **errp)
{
BlockCrypto *crypto = bs->opaque;
int ret;
assert(crypto);
assert(crypto->block);
/* apply for exclusive read/write permissions to the underlying file*/
crypto->updating_keys = true;
ret = bdrv_child_refresh_perms(bs, bs->file, errp);
if (ret) {
goto cleanup;
}
ret = qcrypto_block_amend_options(crypto->block,
block_crypto_read_func,
block_crypto_write_func,
bs,
amend_options,
force,
errp);
cleanup:
/* release exclusive read/write permissions to the underlying file*/
crypto->updating_keys = false;
bdrv_child_refresh_perms(bs, bs->file, errp);
return ret;
return qcrypto_block_amend_options(crypto->block,
block_crypto_read_func,
block_crypto_write_func,
bs,
amend_options,
force,
errp);
}
static int
@ -833,8 +851,16 @@ block_crypto_amend_options_luks(BlockDriverState *bs,
if (!amend_options) {
goto cleanup;
}
ret = block_crypto_amend_prepare(bs, errp);
if (ret) {
goto perm_cleanup;
}
ret = block_crypto_amend_options_generic_luks(bs, amend_options,
force, errp);
perm_cleanup:
block_crypto_amend_cleanup(bs);
cleanup:
qapi_free_QCryptoBlockAmendOptions(amend_options);
return ret;
@ -931,6 +957,8 @@ static BlockDriver bdrv_crypto_luks = {
.bdrv_get_specific_info = block_crypto_get_specific_info_luks,
.bdrv_amend_options = block_crypto_amend_options_luks,
.bdrv_co_amend = block_crypto_co_amend_luks,
.bdrv_amend_pre_run = block_crypto_amend_prepare,
.bdrv_amend_clean = block_crypto_amend_cleanup,
.is_format = true,

View File

@ -496,6 +496,7 @@ static void coroutine_fn bdrv_co_can_store_new_dirty_bitmap_entry(void *opaque)
bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
uint32_t granularity, Error **errp)
{
IO_CODE();
if (qemu_in_coroutine()) {
return bdrv_co_can_store_new_dirty_bitmap(bs, name, granularity, errp);
} else {
@ -656,6 +657,7 @@ void bdrv_reset_dirty_bitmap(BdrvDirtyBitmap *bitmap,
void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out)
{
IO_CODE();
assert(!bdrv_dirty_bitmap_readonly(bitmap));
bdrv_dirty_bitmaps_lock(bitmap->bs);
if (!out) {
@ -673,6 +675,7 @@ void bdrv_restore_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *backup)
{
HBitmap *tmp = bitmap->bitmap;
assert(!bdrv_dirty_bitmap_readonly(bitmap));
GLOBAL_STATE_CODE();
bitmap->bitmap = backup;
hbitmap_free(tmp);
}
@ -737,6 +740,7 @@ void bdrv_dirty_bitmap_deserialize_finish(BdrvDirtyBitmap *bitmap)
void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes)
{
BdrvDirtyBitmap *bitmap;
IO_CODE();
if (QLIST_EMPTY(&bs->dirty_bitmaps)) {
return;
@ -928,6 +932,7 @@ bool bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest,
bool lock)
{
bool ret;
IO_CODE();
assert(!bdrv_dirty_bitmap_readonly(dest));
assert(!bdrv_dirty_bitmap_inconsistent(dest));

View File

@ -139,7 +139,7 @@ BlockExport *blk_exp_add(BlockExportOptions *export, Error **errp)
* access since the export could be available before migration handover.
* ctx was acquired in the caller.
*/
bdrv_invalidate_cache(bs, NULL);
bdrv_activate(bs, NULL);
perm = BLK_PERM_CONSISTENT_READ;
if (export->writable) {

View File

@ -86,8 +86,8 @@ static int fuse_export_create(BlockExport *blk_exp,
assert(blk_exp_args->type == BLOCK_EXPORT_TYPE_FUSE);
/* For growable exports, take the RESIZE permission */
if (args->growable) {
/* For growable and writable exports, take the RESIZE permission */
if (args->growable || blk_exp_args->writable) {
uint64_t blk_perm, blk_shared_perm;
blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm);
@ -392,14 +392,23 @@ static int fuse_do_truncate(const FuseExport *exp, int64_t size,
{
uint64_t blk_perm, blk_shared_perm;
BdrvRequestFlags truncate_flags = 0;
int ret;
bool add_resize_perm;
int ret, ret_check;
/* Growable and writable exports have a permanent RESIZE permission */
add_resize_perm = !exp->growable && !exp->writable;
if (req_zero_write) {
truncate_flags |= BDRV_REQ_ZERO_WRITE;
}
/* Growable exports have a permanent RESIZE permission */
if (!exp->growable) {
if (add_resize_perm) {
if (!qemu_in_main_thread()) {
/* Changing permissions like below only works in the main thread */
return -EPERM;
}
blk_get_perm(exp->common.blk, &blk_perm, &blk_shared_perm);
ret = blk_set_perm(exp->common.blk, blk_perm | BLK_PERM_RESIZE,
@ -412,9 +421,11 @@ static int fuse_do_truncate(const FuseExport *exp, int64_t size,
ret = blk_truncate(exp->common.blk, size, true, prealloc,
truncate_flags, NULL);
if (!exp->growable) {
if (add_resize_perm) {
/* Must succeed, because we are only giving up the RESIZE permission */
blk_set_perm(exp->common.blk, blk_perm, blk_shared_perm, &error_abort);
ret_check = blk_set_perm(exp->common.blk, blk_perm,
blk_shared_perm, &error_abort);
assert(ret_check == 0);
}
return ret;

View File

@ -70,6 +70,7 @@ static void bdrv_parent_drained_end_single_no_poll(BdrvChild *c,
void bdrv_parent_drained_end_single(BdrvChild *c)
{
int drained_end_counter = 0;
IO_OR_GS_CODE();
bdrv_parent_drained_end_single_no_poll(c, &drained_end_counter);
BDRV_POLL_WHILE(c->bs, qatomic_read(&drained_end_counter) > 0);
}
@ -114,6 +115,7 @@ static bool bdrv_parent_drained_poll(BlockDriverState *bs, BdrvChild *ignore,
void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll)
{
IO_OR_GS_CODE();
c->parent_quiesce_counter++;
if (c->klass->drained_begin) {
c->klass->drained_begin(c);
@ -164,6 +166,8 @@ void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp)
BdrvChild *c;
bool have_limits;
GLOBAL_STATE_CODE();
if (tran) {
BdrvRefreshLimitsState *s = g_new(BdrvRefreshLimitsState, 1);
*s = (BdrvRefreshLimitsState) {
@ -189,10 +193,6 @@ void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp)
QLIST_FOREACH(c, &bs->children, next) {
if (c->role & (BDRV_CHILD_DATA | BDRV_CHILD_FILTERED | BDRV_CHILD_COW))
{
bdrv_refresh_limits(c->bs, tran, errp);
if (*errp) {
return;
}
bdrv_merge_limits(&bs->bl, &c->bs->bl);
have_limits = true;
}
@ -226,12 +226,14 @@ void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp)
*/
void bdrv_enable_copy_on_read(BlockDriverState *bs)
{
IO_CODE();
qatomic_inc(&bs->copy_on_read);
}
void bdrv_disable_copy_on_read(BlockDriverState *bs)
{
int old = qatomic_fetch_dec(&bs->copy_on_read);
IO_CODE();
assert(old >= 1);
}
@ -303,6 +305,7 @@ bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
BdrvChild *ignore_parent, bool ignore_bds_parents)
{
BdrvChild *child, *next;
IO_OR_GS_CODE();
if (bdrv_parent_drained_poll(bs, ignore_parent, ignore_bds_parents)) {
return true;
@ -426,6 +429,7 @@ static void coroutine_fn bdrv_co_yield_to_drain(BlockDriverState *bs,
void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
BdrvChild *parent, bool ignore_bds_parents)
{
IO_OR_GS_CODE();
assert(!qemu_in_coroutine());
/* Stop things in parent-to-child order */
@ -477,11 +481,13 @@ static void bdrv_do_drained_begin(BlockDriverState *bs, bool recursive,
void bdrv_drained_begin(BlockDriverState *bs)
{
IO_OR_GS_CODE();
bdrv_do_drained_begin(bs, false, NULL, false, true);
}
void bdrv_subtree_drained_begin(BlockDriverState *bs)
{
IO_OR_GS_CODE();
bdrv_do_drained_begin(bs, true, NULL, false, true);
}
@ -538,18 +544,21 @@ static void bdrv_do_drained_end(BlockDriverState *bs, bool recursive,
void bdrv_drained_end(BlockDriverState *bs)
{
int drained_end_counter = 0;
IO_OR_GS_CODE();
bdrv_do_drained_end(bs, false, NULL, false, &drained_end_counter);
BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
}
void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter)
{
IO_CODE();
bdrv_do_drained_end(bs, false, NULL, false, drained_end_counter);
}
void bdrv_subtree_drained_end(BlockDriverState *bs)
{
int drained_end_counter = 0;
IO_OR_GS_CODE();
bdrv_do_drained_end(bs, true, NULL, false, &drained_end_counter);
BDRV_POLL_WHILE(bs, qatomic_read(&drained_end_counter) > 0);
}
@ -557,6 +566,7 @@ void bdrv_subtree_drained_end(BlockDriverState *bs)
void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent)
{
int i;
IO_OR_GS_CODE();
for (i = 0; i < new_parent->recursive_quiesce_counter; i++) {
bdrv_do_drained_begin(child->bs, true, child, false, true);
@ -567,6 +577,7 @@ void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
{
int drained_end_counter = 0;
int i;
IO_OR_GS_CODE();
for (i = 0; i < old_parent->recursive_quiesce_counter; i++) {
bdrv_do_drained_end(child->bs, true, child, false,
@ -585,6 +596,7 @@ void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent)
*/
void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
{
IO_OR_GS_CODE();
assert(qemu_in_coroutine());
bdrv_drained_begin(bs);
bdrv_drained_end(bs);
@ -592,6 +604,7 @@ void coroutine_fn bdrv_co_drain(BlockDriverState *bs)
void bdrv_drain(BlockDriverState *bs)
{
IO_OR_GS_CODE();
bdrv_drained_begin(bs);
bdrv_drained_end(bs);
}
@ -612,6 +625,7 @@ static bool bdrv_drain_all_poll(void)
{
BlockDriverState *bs = NULL;
bool result = false;
GLOBAL_STATE_CODE();
/* bdrv_drain_poll() can't make changes to the graph and we are holding the
* main AioContext lock, so iterating bdrv_next_all_states() is safe. */
@ -640,6 +654,7 @@ static bool bdrv_drain_all_poll(void)
void bdrv_drain_all_begin(void)
{
BlockDriverState *bs = NULL;
GLOBAL_STATE_CODE();
if (qemu_in_coroutine()) {
bdrv_co_yield_to_drain(NULL, true, false, NULL, true, true, NULL);
@ -682,6 +697,7 @@ void bdrv_drain_all_begin(void)
void bdrv_drain_all_end_quiesce(BlockDriverState *bs)
{
int drained_end_counter = 0;
GLOBAL_STATE_CODE();
g_assert(bs->quiesce_counter > 0);
g_assert(!bs->refcnt);
@ -696,6 +712,7 @@ void bdrv_drain_all_end(void)
{
BlockDriverState *bs = NULL;
int drained_end_counter = 0;
GLOBAL_STATE_CODE();
/*
* bdrv queue is managed by record/replay,
@ -723,6 +740,7 @@ void bdrv_drain_all_end(void)
void bdrv_drain_all(void)
{
GLOBAL_STATE_CODE();
bdrv_drain_all_begin();
bdrv_drain_all_end();
}
@ -867,6 +885,7 @@ BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs)
{
BdrvTrackedRequest *req;
Coroutine *self = qemu_coroutine_self();
IO_CODE();
QLIST_FOREACH(req, &bs->tracked_requests, list) {
if (req->co == self) {
@ -886,7 +905,7 @@ void bdrv_round_to_clusters(BlockDriverState *bs,
int64_t *cluster_bytes)
{
BlockDriverInfo bdi;
IO_CODE();
if (bdrv_get_info(bs, &bdi) < 0 || bdi.cluster_size == 0) {
*cluster_offset = offset;
*cluster_bytes = bytes;
@ -912,16 +931,19 @@ static int bdrv_get_cluster_size(BlockDriverState *bs)
void bdrv_inc_in_flight(BlockDriverState *bs)
{
IO_CODE();
qatomic_inc(&bs->in_flight);
}
void bdrv_wakeup(BlockDriverState *bs)
{
IO_CODE();
aio_wait_kick();
}
void bdrv_dec_in_flight(BlockDriverState *bs)
{
IO_CODE();
qatomic_dec(&bs->in_flight);
bdrv_wakeup(bs);
}
@ -946,6 +968,7 @@ bool coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
uint64_t align)
{
bool waited;
IO_CODE();
qemu_co_mutex_lock(&req->bs->reqs_lock);
@ -1040,6 +1063,7 @@ static int bdrv_check_request32(int64_t offset, int64_t bytes,
int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
int64_t bytes, BdrvRequestFlags flags)
{
IO_CODE();
return bdrv_pwritev(child, offset, bytes, NULL,
BDRV_REQ_ZERO_WRITE | flags);
}
@ -1058,6 +1082,7 @@ int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags)
int ret;
int64_t target_size, bytes, offset = 0;
BlockDriverState *bs = child->bs;
IO_CODE();
target_size = bdrv_getlength(bs);
if (target_size < 0) {
@ -1090,6 +1115,7 @@ int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int64_t bytes)
{
int ret;
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
IO_CODE();
if (bytes < 0) {
return -EINVAL;
@ -1111,6 +1137,7 @@ int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf,
{
int ret;
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
IO_CODE();
if (bytes < 0) {
return -EINVAL;
@ -1131,6 +1158,7 @@ int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
const void *buf, int64_t count)
{
int ret;
IO_CODE();
ret = bdrv_pwrite(child, offset, buf, count);
if (ret < 0) {
@ -1797,6 +1825,7 @@ int coroutine_fn bdrv_co_preadv(BdrvChild *child,
int64_t offset, int64_t bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
IO_CODE();
return bdrv_co_preadv_part(child, offset, bytes, qiov, 0, flags);
}
@ -1809,6 +1838,7 @@ int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
BdrvTrackedRequest req;
BdrvRequestPadding pad;
int ret;
IO_CODE();
trace_bdrv_co_preadv_part(bs, offset, bytes, flags);
@ -2230,6 +2260,7 @@ int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
int64_t offset, int64_t bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags)
{
IO_CODE();
return bdrv_co_pwritev_part(child, offset, bytes, qiov, 0, flags);
}
@ -2243,6 +2274,7 @@ int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
BdrvRequestPadding pad;
int ret;
bool padded = false;
IO_CODE();
trace_bdrv_co_pwritev_part(child->bs, offset, bytes, flags);
@ -2326,6 +2358,7 @@ out:
int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
int64_t bytes, BdrvRequestFlags flags)
{
IO_CODE();
trace_bdrv_co_pwrite_zeroes(child->bs, offset, bytes, flags);
if (!(child->bs->open_flags & BDRV_O_UNMAP)) {
@ -2345,6 +2378,8 @@ int bdrv_flush_all(void)
BlockDriverState *bs = NULL;
int result = 0;
GLOBAL_STATE_CODE();
/*
* bdrv queue is managed by record/replay,
* creating new flush request for stopping
@ -2639,6 +2674,7 @@ bdrv_co_common_block_status_above(BlockDriverState *bs,
BlockDriverState *p;
int64_t eof = 0;
int dummy;
IO_CODE();
assert(!include_base || base); /* Can't include NULL base */
@ -2728,6 +2764,7 @@ int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
int64_t offset, int64_t bytes, int64_t *pnum,
int64_t *map, BlockDriverState **file)
{
IO_CODE();
return bdrv_common_block_status_above(bs, base, false, true, offset, bytes,
pnum, map, file, NULL);
}
@ -2735,6 +2772,7 @@ int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes,
int64_t *pnum, int64_t *map, BlockDriverState **file)
{
IO_CODE();
return bdrv_block_status_above(bs, bdrv_filter_or_cow_bs(bs),
offset, bytes, pnum, map, file);
}
@ -2751,6 +2789,7 @@ int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset,
{
int ret;
int64_t pnum = bytes;
IO_CODE();
if (!bytes) {
return 1;
@ -2771,6 +2810,7 @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
{
int ret;
int64_t dummy;
IO_CODE();
ret = bdrv_common_block_status_above(bs, bs, true, false, offset,
bytes, pnum ? pnum : &dummy, NULL,
@ -2807,6 +2847,7 @@ int bdrv_is_allocated_above(BlockDriverState *top,
int ret = bdrv_common_block_status_above(top, base, include_base, false,
offset, bytes, pnum, NULL, NULL,
&depth);
IO_CODE();
if (ret < 0) {
return ret;
}
@ -2823,6 +2864,7 @@ bdrv_co_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
BlockDriver *drv = bs->drv;
BlockDriverState *child_bs = bdrv_primary_bs(bs);
int ret;
IO_CODE();
ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL);
if (ret < 0) {
@ -2854,6 +2896,7 @@ bdrv_co_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos)
BlockDriver *drv = bs->drv;
BlockDriverState *child_bs = bdrv_primary_bs(bs);
int ret;
IO_CODE();
ret = bdrv_check_qiov_request(pos, qiov->size, qiov, 0, NULL);
if (ret < 0) {
@ -2884,6 +2927,7 @@ int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
{
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
int ret = bdrv_writev_vmstate(bs, &qiov, pos);
IO_CODE();
return ret < 0 ? ret : size;
}
@ -2893,6 +2937,7 @@ int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
{
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, size);
int ret = bdrv_readv_vmstate(bs, &qiov, pos);
IO_CODE();
return ret < 0 ? ret : size;
}
@ -2902,6 +2947,7 @@ int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
void bdrv_aio_cancel(BlockAIOCB *acb)
{
IO_CODE();
qemu_aio_ref(acb);
bdrv_aio_cancel_async(acb);
while (acb->refcnt > 1) {
@ -2926,6 +2972,7 @@ void bdrv_aio_cancel(BlockAIOCB *acb)
* In either case the completion callback must be called. */
void bdrv_aio_cancel_async(BlockAIOCB *acb)
{
IO_CODE();
if (acb->aiocb_info->cancel_async) {
acb->aiocb_info->cancel_async(acb);
}
@ -2940,6 +2987,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
BdrvChild *child;
int current_gen;
int ret = 0;
IO_CODE();
bdrv_inc_in_flight(bs);
@ -3065,6 +3113,7 @@ int coroutine_fn bdrv_co_pdiscard(BdrvChild *child, int64_t offset,
int64_t max_pdiscard;
int head, tail, align;
BlockDriverState *bs = child->bs;
IO_CODE();
if (!bs || !bs->drv || !bdrv_is_inserted(bs)) {
return -ENOMEDIUM;
@ -3183,6 +3232,7 @@ int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf)
.coroutine = qemu_coroutine_self(),
};
BlockAIOCB *acb;
IO_CODE();
bdrv_inc_in_flight(bs);
if (!drv || (!drv->bdrv_aio_ioctl && !drv->bdrv_co_ioctl)) {
@ -3207,17 +3257,20 @@ out:
void *qemu_blockalign(BlockDriverState *bs, size_t size)
{
IO_CODE();
return qemu_memalign(bdrv_opt_mem_align(bs), size);
}
void *qemu_blockalign0(BlockDriverState *bs, size_t size)
{
IO_CODE();
return memset(qemu_blockalign(bs, size), 0, size);
}
void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
{
size_t align = bdrv_opt_mem_align(bs);
IO_CODE();
/* Ensure that NULL is never returned on success */
assert(align > 0);
@ -3231,6 +3284,7 @@ void *qemu_try_blockalign(BlockDriverState *bs, size_t size)
void *qemu_try_blockalign0(BlockDriverState *bs, size_t size)
{
void *mem = qemu_try_blockalign(bs, size);
IO_CODE();
if (mem) {
memset(mem, 0, size);
@ -3246,6 +3300,7 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
{
int i;
size_t alignment = bdrv_min_mem_align(bs);
IO_CODE();
for (i = 0; i < qiov->niov; i++) {
if ((uintptr_t) qiov->iov[i].iov_base % alignment) {
@ -3262,6 +3317,7 @@ bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov)
void bdrv_io_plug(BlockDriverState *bs)
{
BdrvChild *child;
IO_CODE();
QLIST_FOREACH(child, &bs->children, next) {
bdrv_io_plug(child->bs);
@ -3278,6 +3334,7 @@ void bdrv_io_plug(BlockDriverState *bs)
void bdrv_io_unplug(BlockDriverState *bs)
{
BdrvChild *child;
IO_CODE();
assert(bs->io_plugged);
if (qatomic_fetch_dec(&bs->io_plugged) == 1) {
@ -3296,6 +3353,7 @@ void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size)
{
BdrvChild *child;
GLOBAL_STATE_CODE();
if (bs->drv && bs->drv->bdrv_register_buf) {
bs->drv->bdrv_register_buf(bs, host, size);
}
@ -3308,6 +3366,7 @@ void bdrv_unregister_buf(BlockDriverState *bs, void *host)
{
BdrvChild *child;
GLOBAL_STATE_CODE();
if (bs->drv && bs->drv->bdrv_unregister_buf) {
bs->drv->bdrv_unregister_buf(bs, host);
}
@ -3402,6 +3461,7 @@ int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset,
BdrvRequestFlags read_flags,
BdrvRequestFlags write_flags)
{
IO_CODE();
trace_bdrv_co_copy_range_from(src, src_offset, dst, dst_offset, bytes,
read_flags, write_flags);
return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
@ -3418,6 +3478,7 @@ int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset,
BdrvRequestFlags read_flags,
BdrvRequestFlags write_flags)
{
IO_CODE();
trace_bdrv_co_copy_range_to(src, src_offset, dst, dst_offset, bytes,
read_flags, write_flags);
return bdrv_co_copy_range_internal(src, src_offset, dst, dst_offset,
@ -3429,6 +3490,7 @@ int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
int64_t bytes, BdrvRequestFlags read_flags,
BdrvRequestFlags write_flags)
{
IO_CODE();
return bdrv_co_copy_range_from(src, src_offset,
dst, dst_offset,
bytes, read_flags, write_flags);
@ -3461,7 +3523,7 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
BdrvTrackedRequest req;
int64_t old_size, new_bytes;
int ret;
IO_CODE();
/* if bs->drv == NULL, bs is closed, so there's nothing to do here */
if (!drv) {
@ -3579,6 +3641,7 @@ out:
void bdrv_cancel_in_flight(BlockDriverState *bs)
{
GLOBAL_STATE_CODE();
if (!bs || !bs->drv) {
return;
}

View File

@ -131,8 +131,11 @@ block_ss.add(module_block_h)
wrapper_py = find_program('../scripts/block-coroutine-wrapper.py')
block_gen_c = custom_target('block-gen.c',
output: 'block-gen.c',
input: files('../include/block/block.h',
'coroutines.h'),
input: files(
'../include/block/block-io.h',
'../include/block/block-global-state.h',
'coroutines.h'
),
command: [wrapper_py, '@OUTPUT@', '@INPUT@'])
block_ss.add(block_gen_c)

View File

@ -1864,6 +1864,8 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
bool is_none_mode;
BlockDriverState *base;
GLOBAL_STATE_CODE();
if ((mode == MIRROR_SYNC_MODE_INCREMENTAL) ||
(mode == MIRROR_SYNC_MODE_BITMAP)) {
error_setg(errp, "Sync mode '%s' not supported",
@ -1889,6 +1891,8 @@ BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
bool base_read_only;
BlockJob *job;
GLOBAL_STATE_CODE();
base_read_only = bdrv_is_read_only(base);
if (base_read_only) {

View File

@ -56,6 +56,8 @@ BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node,
BlockDriverState *bs;
BdrvDirtyBitmap *bitmap;
GLOBAL_STATE_CODE();
if (!node) {
error_setg(errp, "Node cannot be NULL");
return NULL;
@ -155,6 +157,8 @@ BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name,
BdrvDirtyBitmap *bitmap;
AioContext *aio_context;
GLOBAL_STATE_CODE();
bitmap = block_dirty_bitmap_lookup(node, name, &bs, errp);
if (!bitmap || !bs) {
return NULL;
@ -261,6 +265,8 @@ BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target,
BlockDirtyBitmapMergeSourceList *lst;
Error *local_err = NULL;
GLOBAL_STATE_CODE();
dst = block_dirty_bitmap_lookup(node, target, &bs, errp);
if (!dst) {
return NULL;

View File

@ -313,6 +313,7 @@ int coroutine_fn nbd_co_do_establish_connection(BlockDriverState *bs,
BDRVNBDState *s = (BDRVNBDState *)bs->opaque;
int ret;
bool blocking = nbd_client_connecting_wait(s);
IO_CODE();
assert(!s->ioc);

View File

@ -873,7 +873,7 @@ static int parallels_open(BlockDriverState *bs, QDict *options, int flags,
s->bat_dirty_bmap =
bitmap_new(DIV_ROUND_UP(s->header_size, s->bat_dirty_block));
/* Disable migration until bdrv_invalidate_cache method is added */
/* Disable migration until bdrv_activate method is added */
error_setg(&s->migration_blocker, "The Parallels format used by node '%s' "
"does not support live migration",
bdrv_get_device_or_node_name(bs));

View File

@ -57,6 +57,8 @@ int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info,
QEMUSnapshotInfo *sn_tab, *sn;
int nb_sns, i, ret;
GLOBAL_STATE_CODE();
ret = -ENOENT;
nb_sns = bdrv_snapshot_list(bs, &sn_tab);
if (nb_sns < 0) {
@ -105,6 +107,7 @@ bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs,
bool ret = false;
assert(id || name);
GLOBAL_STATE_CODE();
nb_sns = bdrv_snapshot_list(bs, &sn_tab);
if (nb_sns < 0) {
@ -200,6 +203,7 @@ static BlockDriverState *bdrv_snapshot_fallback(BlockDriverState *bs)
int bdrv_can_snapshot(BlockDriverState *bs)
{
BlockDriver *drv = bs->drv;
GLOBAL_STATE_CODE();
if (!drv || !bdrv_is_inserted(bs) || bdrv_is_read_only(bs)) {
return 0;
}
@ -220,6 +224,9 @@ int bdrv_snapshot_create(BlockDriverState *bs,
{
BlockDriver *drv = bs->drv;
BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs);
GLOBAL_STATE_CODE();
if (!drv) {
return -ENOMEDIUM;
}
@ -240,6 +247,8 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
BdrvChild **fallback_ptr;
int ret, open_ret;
GLOBAL_STATE_CODE();
if (!drv) {
error_setg(errp, "Block driver is closed");
return -ENOMEDIUM;
@ -348,6 +357,8 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs);
int ret;
GLOBAL_STATE_CODE();
if (!drv) {
error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs));
return -ENOMEDIUM;
@ -380,6 +391,8 @@ int bdrv_snapshot_list(BlockDriverState *bs,
{
BlockDriver *drv = bs->drv;
BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs);
GLOBAL_STATE_CODE();
if (!drv) {
return -ENOMEDIUM;
}
@ -419,6 +432,8 @@ int bdrv_snapshot_load_tmp(BlockDriverState *bs,
{
BlockDriver *drv = bs->drv;
GLOBAL_STATE_CODE();
if (!drv) {
error_setg(errp, QERR_DEVICE_HAS_NO_MEDIUM, bdrv_get_device_name(bs));
return -ENOMEDIUM;
@ -447,6 +462,8 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
int ret;
Error *local_err = NULL;
GLOBAL_STATE_CODE();
ret = bdrv_snapshot_load_tmp(bs, id_or_name, NULL, &local_err);
if (ret == -ENOENT || ret == -EINVAL) {
error_free(local_err);
@ -515,6 +532,8 @@ bool bdrv_all_can_snapshot(bool has_devices, strList *devices,
g_autoptr(GList) bdrvs = NULL;
GList *iterbdrvs;
GLOBAL_STATE_CODE();
if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
return false;
}
@ -549,6 +568,8 @@ int bdrv_all_delete_snapshot(const char *name,
g_autoptr(GList) bdrvs = NULL;
GList *iterbdrvs;
GLOBAL_STATE_CODE();
if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
return -1;
}
@ -588,6 +609,8 @@ int bdrv_all_goto_snapshot(const char *name,
g_autoptr(GList) bdrvs = NULL;
GList *iterbdrvs;
GLOBAL_STATE_CODE();
if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
return -1;
}
@ -622,6 +645,8 @@ int bdrv_all_has_snapshot(const char *name,
g_autoptr(GList) bdrvs = NULL;
GList *iterbdrvs;
GLOBAL_STATE_CODE();
if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
return -1;
}
@ -663,6 +688,7 @@ int bdrv_all_create_snapshot(QEMUSnapshotInfo *sn,
{
g_autoptr(GList) bdrvs = NULL;
GList *iterbdrvs;
GLOBAL_STATE_CODE();
if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
return -1;
@ -703,6 +729,8 @@ BlockDriverState *bdrv_all_find_vmstate_bs(const char *vmstate_bs,
g_autoptr(GList) bdrvs = NULL;
GList *iterbdrvs;
GLOBAL_STATE_CODE();
if (bdrv_all_get_snapshot_devices(has_devices, devices, &bdrvs, errp) < 0) {
return NULL;
}

View File

@ -220,6 +220,8 @@ void stream_start(const char *job_id, BlockDriverState *bs,
QDict *opts;
int ret;
GLOBAL_STATE_CODE();
assert(!(base && bottom));
assert(!(backing_file_str && bottom));

View File

@ -63,11 +63,13 @@
#include "qemu/main-loop.h"
#include "qemu/throttle-options.h"
/* Protected by BQL */
QTAILQ_HEAD(, BlockDriverState) monitor_bdrv_states =
QTAILQ_HEAD_INITIALIZER(monitor_bdrv_states);
void bdrv_set_monitor_owned(BlockDriverState *bs)
{
GLOBAL_STATE_CODE();
QTAILQ_INSERT_TAIL(&monitor_bdrv_states, bs, monitor_list);
}
@ -111,6 +113,8 @@ void override_max_devs(BlockInterfaceType type, int max_devs)
BlockBackend *blk;
DriveInfo *dinfo;
GLOBAL_STATE_CODE();
if (max_devs <= 0) {
return;
}
@ -140,6 +144,8 @@ void blockdev_mark_auto_del(BlockBackend *blk)
DriveInfo *dinfo = blk_legacy_dinfo(blk);
BlockJob *job;
GLOBAL_STATE_CODE();
if (!dinfo) {
return;
}
@ -161,6 +167,7 @@ void blockdev_mark_auto_del(BlockBackend *blk)
void blockdev_auto_del(BlockBackend *blk)
{
DriveInfo *dinfo = blk_legacy_dinfo(blk);
GLOBAL_STATE_CODE();
if (dinfo && dinfo->auto_del) {
monitor_remove_blk(blk);
@ -185,6 +192,8 @@ QemuOpts *drive_add(BlockInterfaceType type, int index, const char *file,
{
QemuOpts *opts;
GLOBAL_STATE_CODE();
opts = qemu_opts_parse_noisily(qemu_find_opts("drive"), optstr, false);
if (!opts) {
return NULL;
@ -205,6 +214,8 @@ DriveInfo *drive_get(BlockInterfaceType type, int bus, int unit)
BlockBackend *blk;
DriveInfo *dinfo;
GLOBAL_STATE_CODE();
for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
dinfo = blk_legacy_dinfo(blk);
if (dinfo && dinfo->type == type
@ -227,6 +238,8 @@ void drive_check_orphaned(void)
Location loc;
bool orphans = false;
GLOBAL_STATE_CODE();
for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
dinfo = blk_legacy_dinfo(blk);
/*
@ -260,6 +273,7 @@ void drive_check_orphaned(void)
DriveInfo *drive_get_by_index(BlockInterfaceType type, int index)
{
GLOBAL_STATE_CODE();
return drive_get(type,
drive_index_to_bus_id(type, index),
drive_index_to_unit_id(type, index));
@ -271,6 +285,8 @@ int drive_get_max_bus(BlockInterfaceType type)
BlockBackend *blk;
DriveInfo *dinfo;
GLOBAL_STATE_CODE();
max_bus = -1;
for (blk = blk_next(NULL); blk; blk = blk_next(blk)) {
dinfo = blk_legacy_dinfo(blk);
@ -628,6 +644,7 @@ BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp)
{
int bdrv_flags = 0;
GLOBAL_STATE_CODE();
/* bdrv_open() defaults to the values in bdrv_flags (for compatibility
* with other callers) rather than what we want as the real defaults.
* Apply the defaults here instead. */
@ -646,6 +663,7 @@ void blockdev_close_all_bdrv_states(void)
{
BlockDriverState *bs, *next_bs;
GLOBAL_STATE_CODE();
QTAILQ_FOREACH_SAFE(bs, &monitor_bdrv_states, monitor_list, next_bs) {
AioContext *ctx = bdrv_get_aio_context(bs);
@ -658,6 +676,7 @@ void blockdev_close_all_bdrv_states(void)
/* Iterates over the list of monitor-owned BlockDriverStates */
BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs)
{
GLOBAL_STATE_CODE();
return bs ? QTAILQ_NEXT(bs, monitor_list)
: QTAILQ_FIRST(&monitor_bdrv_states);
}
@ -754,6 +773,8 @@ DriveInfo *drive_new(QemuOpts *all_opts, BlockInterfaceType block_default_type,
const char *filename;
int i;
GLOBAL_STATE_CODE();
/* Change legacy command line options into QMP ones */
static const struct {
const char *from;
@ -1174,6 +1195,8 @@ typedef struct BlkActionState BlkActionState;
*
* Only prepare() may fail. In a single transaction, only one of commit() or
* abort() will be called. clean() will always be called if it is present.
*
* Always run under BQL.
*/
typedef struct BlkActionOps {
size_t instance_size;
@ -2283,6 +2306,8 @@ static TransactionProperties *get_transaction_properties(
/*
* 'Atomic' group operations. The operations are performed as a set, and if
* any fail then we roll back all operations in the group.
*
* Always run under BQL.
*/
void qmp_transaction(TransactionActionList *dev_list,
bool has_props,
@ -2294,6 +2319,8 @@ void qmp_transaction(TransactionActionList *dev_list,
BlkActionState *state, *next;
Error *local_err = NULL;
GLOBAL_STATE_CODE();
QTAILQ_HEAD(, BlkActionState) snap_bdrv_states;
QTAILQ_INIT(&snap_bdrv_states);
@ -3596,6 +3623,8 @@ void qmp_blockdev_del(const char *node_name, Error **errp)
AioContext *aio_context;
BlockDriverState *bs;
GLOBAL_STATE_CODE();
bs = bdrv_find_node(node_name);
if (!bs) {
error_setg(errp, "Failed to find node with node-name='%s'", node_name);

View File

@ -62,6 +62,7 @@ static bool is_block_job(Job *job)
BlockJob *block_job_next(BlockJob *bjob)
{
Job *job = bjob ? &bjob->job : NULL;
GLOBAL_STATE_CODE();
do {
job = job_next(job);
@ -73,6 +74,7 @@ BlockJob *block_job_next(BlockJob *bjob)
BlockJob *block_job_get(const char *id)
{
Job *job = job_get(id);
GLOBAL_STATE_CODE();
if (job && is_block_job(job)) {
return container_of(job, BlockJob, job);
@ -84,6 +86,7 @@ BlockJob *block_job_get(const char *id)
void block_job_free(Job *job)
{
BlockJob *bjob = container_of(job, BlockJob, job);
GLOBAL_STATE_CODE();
block_job_remove_all_bdrv(bjob);
ratelimit_destroy(&bjob->limit);
@ -183,6 +186,7 @@ static const BdrvChildClass child_job = {
void block_job_remove_all_bdrv(BlockJob *job)
{
GLOBAL_STATE_CODE();
/*
* bdrv_root_unref_child() may reach child_job_[can_]set_aio_ctx(),
* which will also traverse job->nodes, so consume the list one by
@ -205,6 +209,7 @@ void block_job_remove_all_bdrv(BlockJob *job)
bool block_job_has_bdrv(BlockJob *job, BlockDriverState *bs)
{
GSList *el;
GLOBAL_STATE_CODE();
for (el = job->nodes; el; el = el->next) {
BdrvChild *c = el->data;
@ -221,6 +226,7 @@ int block_job_add_bdrv(BlockJob *job, const char *name, BlockDriverState *bs,
{
BdrvChild *c;
bool need_context_ops;
GLOBAL_STATE_CODE();
bdrv_ref(bs);
@ -270,6 +276,8 @@ bool block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
const BlockJobDriver *drv = block_job_driver(job);
int64_t old_speed = job->speed;
GLOBAL_STATE_CODE();
if (job_apply_verb(&job->job, JOB_VERB_SET_SPEED, errp) < 0) {
return false;
}
@ -299,6 +307,7 @@ bool block_job_set_speed(BlockJob *job, int64_t speed, Error **errp)
int64_t block_job_ratelimit_get_delay(BlockJob *job, uint64_t n)
{
IO_CODE();
return ratelimit_calculate_delay(&job->limit, n);
}
@ -307,6 +316,8 @@ BlockJobInfo *block_job_query(BlockJob *job, Error **errp)
BlockJobInfo *info;
uint64_t progress_current, progress_total;
GLOBAL_STATE_CODE();
if (block_job_is_internal(job)) {
error_setg(errp, "Cannot query QEMU internal jobs");
return NULL;
@ -434,6 +445,7 @@ void *block_job_create(const char *job_id, const BlockJobDriver *driver,
{
BlockJob *job;
int ret;
GLOBAL_STATE_CODE();
if (job_id == NULL && !(flags & JOB_INTERNAL)) {
job_id = bdrv_get_device_name(bs);
@ -488,6 +500,7 @@ fail:
void block_job_iostatus_reset(BlockJob *job)
{
GLOBAL_STATE_CODE();
if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) {
return;
}
@ -498,6 +511,7 @@ void block_job_iostatus_reset(BlockJob *job)
void block_job_user_resume(Job *job)
{
BlockJob *bjob = container_of(job, BlockJob, job);
GLOBAL_STATE_CODE();
block_job_iostatus_reset(bjob);
}
@ -505,6 +519,7 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err,
int is_read, int error)
{
BlockErrorAction action;
IO_CODE();
switch (on_err) {
case BLOCKDEV_ON_ERROR_ENOSPC:
@ -543,5 +558,6 @@ BlockErrorAction block_job_error_action(BlockJob *job, BlockdevOnError on_err,
AioContext *block_job_get_aio_context(BlockJob *job)
{
GLOBAL_STATE_CODE();
return job->job.aio_context;
}

View File

@ -154,6 +154,13 @@ Standard options:
created but before accepting connections. The daemon has started successfully
when the pid file is written and clients may begin connecting.
.. option:: --daemonize
Daemonize the process. The parent process will exit once startup is complete
(i.e., after the pid file has been or would have been written) or failure
occurs. Its exit code reflects whether the child has started up successfully
or failed to do so.
Examples
--------
Launch the daemon with QMP monitor socket ``qmp.sock`` so clients can execute

View File

@ -1023,7 +1023,7 @@ static void postload_update_cb(void *opaque, bool running, RunState state)
{
PFlashCFI01 *pfl = opaque;
/* This is called after bdrv_invalidate_cache_all. */
/* This is called after bdrv_activate_all. */
qemu_del_vm_change_state_handler(pfl->vmstate);
pfl->vmstate = NULL;

View File

@ -219,7 +219,7 @@ static void postload_update_cb(void *opaque, bool running, RunState state)
{
SpaprNvram *nvram = opaque;
/* This is called after bdrv_invalidate_cache_all. */
/* This is called after bdrv_activate_all. */
qemu_del_vm_change_state_handler(nvram->vmstate);
nvram->vmstate = NULL;

View File

@ -0,0 +1,418 @@
/*
* QEMU System Emulator block driver
*
* Copyright (c) 2003 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef BLOCK_COMMON_H
#define BLOCK_COMMON_H
#include "block/aio.h"
#include "block/aio-wait.h"
#include "qemu/iov.h"
#include "qemu/coroutine.h"
#include "block/accounting.h"
#include "block/dirty-bitmap.h"
#include "block/blockjob.h"
#include "qemu/hbitmap.h"
#include "qemu/transactions.h"
/*
* generated_co_wrapper
*
* Function specifier, which does nothing but mark functions to be
* generated by scripts/block-coroutine-wrapper.py
*
* Read more in docs/devel/block-coroutine-wrapper.rst
*/
#define generated_co_wrapper
/* block.c */
typedef struct BlockDriver BlockDriver;
typedef struct BdrvChild BdrvChild;
typedef struct BdrvChildClass BdrvChildClass;
typedef struct BlockDriverInfo {
/* in bytes, 0 if irrelevant */
int cluster_size;
/* offset at which the VM state can be saved (0 if not possible) */
int64_t vm_state_offset;
bool is_dirty;
/*
* True if this block driver only supports compressed writes
*/
bool needs_compressed_writes;
} BlockDriverInfo;
typedef struct BlockFragInfo {
uint64_t allocated_clusters;
uint64_t total_clusters;
uint64_t fragmented_clusters;
uint64_t compressed_clusters;
} BlockFragInfo;
typedef enum {
BDRV_REQ_COPY_ON_READ = 0x1,
BDRV_REQ_ZERO_WRITE = 0x2,
/*
* The BDRV_REQ_MAY_UNMAP flag is used in write_zeroes requests to indicate
* that the block driver should unmap (discard) blocks if it is guaranteed
* that the result will read back as zeroes. The flag is only passed to the
* driver if the block device is opened with BDRV_O_UNMAP.
*/
BDRV_REQ_MAY_UNMAP = 0x4,
BDRV_REQ_FUA = 0x10,
BDRV_REQ_WRITE_COMPRESSED = 0x20,
/*
* Signifies that this write request will not change the visible disk
* content.
*/
BDRV_REQ_WRITE_UNCHANGED = 0x40,
/*
* Forces request serialisation. Use only with write requests.
*/
BDRV_REQ_SERIALISING = 0x80,
/*
* Execute the request only if the operation can be offloaded or otherwise
* be executed efficiently, but return an error instead of using a slow
* fallback.
*/
BDRV_REQ_NO_FALLBACK = 0x100,
/*
* BDRV_REQ_PREFETCH makes sense only in the context of copy-on-read
* (i.e., together with the BDRV_REQ_COPY_ON_READ flag or when a COR
* filter is involved), in which case it signals that the COR operation
* need not read the data into memory (qiov) but only ensure they are
* copied to the top layer (i.e., that COR operation is done).
*/
BDRV_REQ_PREFETCH = 0x200,
/*
* If we need to wait for other requests, just fail immediately. Used
* only together with BDRV_REQ_SERIALISING.
*/
BDRV_REQ_NO_WAIT = 0x400,
/* Mask of valid flags */
BDRV_REQ_MASK = 0x7ff,
} BdrvRequestFlags;
#define BDRV_O_NO_SHARE 0x0001 /* don't share permissions */
#define BDRV_O_RDWR 0x0002
#define BDRV_O_RESIZE 0x0004 /* request permission for resizing the node */
#define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save
writes in a snapshot */
#define BDRV_O_TEMPORARY 0x0010 /* delete the file after use */
#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */
#define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the
thread pool */
#define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */
#define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */
#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */
#define BDRV_O_INACTIVE 0x0800 /* consistency hint for migration handoff */
#define BDRV_O_CHECK 0x1000 /* open solely for consistency check */
#define BDRV_O_ALLOW_RDWR 0x2000 /* allow reopen to change from r/o to r/w */
#define BDRV_O_UNMAP 0x4000 /* execute guest UNMAP/TRIM operations */
#define BDRV_O_PROTOCOL 0x8000 /* if no block driver is explicitly given:
select an appropriate protocol driver,
ignoring the format layer */
#define BDRV_O_NO_IO 0x10000 /* don't initialize for I/O */
#define BDRV_O_AUTO_RDONLY 0x20000 /* degrade to read-only if opening
read-write fails */
#define BDRV_O_IO_URING 0x40000 /* use io_uring instead of the thread pool */
#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH)
/* Option names of options parsed by the block layer */
#define BDRV_OPT_CACHE_WB "cache.writeback"
#define BDRV_OPT_CACHE_DIRECT "cache.direct"
#define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush"
#define BDRV_OPT_READ_ONLY "read-only"
#define BDRV_OPT_AUTO_READ_ONLY "auto-read-only"
#define BDRV_OPT_DISCARD "discard"
#define BDRV_OPT_FORCE_SHARE "force-share"
#define BDRV_SECTOR_BITS 9
#define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS)
#define BDRV_REQUEST_MAX_SECTORS MIN_CONST(SIZE_MAX >> BDRV_SECTOR_BITS, \
INT_MAX >> BDRV_SECTOR_BITS)
#define BDRV_REQUEST_MAX_BYTES (BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS)
/*
* We want allow aligning requests and disk length up to any 32bit alignment
* and don't afraid of overflow.
* To achieve it, and in the same time use some pretty number as maximum disk
* size, let's define maximum "length" (a limit for any offset/bytes request and
* for disk size) to be the greatest power of 2 less than INT64_MAX.
*/
#define BDRV_MAX_ALIGNMENT (1L << 30)
#define BDRV_MAX_LENGTH (QEMU_ALIGN_DOWN(INT64_MAX, BDRV_MAX_ALIGNMENT))
/*
* Allocation status flags for bdrv_block_status() and friends.
*
* Public flags:
* BDRV_BLOCK_DATA: allocation for data at offset is tied to this layer
* BDRV_BLOCK_ZERO: offset reads as zero
* BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data
* BDRV_BLOCK_ALLOCATED: the content of the block is determined by this
* layer rather than any backing, set by block layer
* BDRV_BLOCK_EOF: the returned pnum covers through end of file for this
* layer, set by block layer
*
* Internal flags:
* BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request
* that the block layer recompute the answer from the returned
* BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID.
* BDRV_BLOCK_RECURSE: request that the block layer will recursively search for
* zeroes in file child of current block node inside
* returned region. Only valid together with both
* BDRV_BLOCK_DATA and BDRV_BLOCK_OFFSET_VALID. Should not
* appear with BDRV_BLOCK_ZERO.
*
* If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the
* host offset within the returned BDS that is allocated for the
* corresponding raw guest data. However, whether that offset
* actually contains data also depends on BDRV_BLOCK_DATA, as follows:
*
* DATA ZERO OFFSET_VALID
* t t t sectors read as zero, returned file is zero at offset
* t f t sectors read as valid from file at offset
* f t t sectors preallocated, read as zero, returned file not
* necessarily zero at offset
* f f t sectors preallocated but read from backing_hd,
* returned file contains garbage at offset
* t t f sectors preallocated, read as zero, unknown offset
* t f f sectors read from unknown file or offset
* f t f not allocated or unknown offset, read as zero
* f f f not allocated or unknown offset, read from backing_hd
*/
#define BDRV_BLOCK_DATA 0x01
#define BDRV_BLOCK_ZERO 0x02
#define BDRV_BLOCK_OFFSET_VALID 0x04
#define BDRV_BLOCK_RAW 0x08
#define BDRV_BLOCK_ALLOCATED 0x10
#define BDRV_BLOCK_EOF 0x20
#define BDRV_BLOCK_RECURSE 0x40
typedef QTAILQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
typedef struct BDRVReopenState {
BlockDriverState *bs;
int flags;
BlockdevDetectZeroesOptions detect_zeroes;
bool backing_missing;
BlockDriverState *old_backing_bs; /* keep pointer for permissions update */
BlockDriverState *old_file_bs; /* keep pointer for permissions update */
QDict *options;
QDict *explicit_options;
void *opaque;
} BDRVReopenState;
/*
* Block operation types
*/
typedef enum BlockOpType {
BLOCK_OP_TYPE_BACKUP_SOURCE,
BLOCK_OP_TYPE_BACKUP_TARGET,
BLOCK_OP_TYPE_CHANGE,
BLOCK_OP_TYPE_COMMIT_SOURCE,
BLOCK_OP_TYPE_COMMIT_TARGET,
BLOCK_OP_TYPE_DATAPLANE,
BLOCK_OP_TYPE_DRIVE_DEL,
BLOCK_OP_TYPE_EJECT,
BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT,
BLOCK_OP_TYPE_INTERNAL_SNAPSHOT,
BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE,
BLOCK_OP_TYPE_MIRROR_SOURCE,
BLOCK_OP_TYPE_MIRROR_TARGET,
BLOCK_OP_TYPE_RESIZE,
BLOCK_OP_TYPE_STREAM,
BLOCK_OP_TYPE_REPLACE,
BLOCK_OP_TYPE_MAX,
} BlockOpType;
/* Block node permission constants */
enum {
/**
* A user that has the "permission" of consistent reads is guaranteed that
* their view of the contents of the block device is complete and
* self-consistent, representing the contents of a disk at a specific
* point.
*
* For most block devices (including their backing files) this is true, but
* the property cannot be maintained in a few situations like for
* intermediate nodes of a commit block job.
*/
BLK_PERM_CONSISTENT_READ = 0x01,
/** This permission is required to change the visible disk contents. */
BLK_PERM_WRITE = 0x02,
/**
* This permission (which is weaker than BLK_PERM_WRITE) is both enough and
* required for writes to the block node when the caller promises that
* the visible disk content doesn't change.
*
* As the BLK_PERM_WRITE permission is strictly stronger, either is
* sufficient to perform an unchanging write.
*/
BLK_PERM_WRITE_UNCHANGED = 0x04,
/** This permission is required to change the size of a block node. */
BLK_PERM_RESIZE = 0x08,
/**
* There was a now-removed bit BLK_PERM_GRAPH_MOD, with value of 0x10. QEMU
* 6.1 and earlier may still lock the corresponding byte in block/file-posix
* locking. So, implementing some new permission should be very careful to
* not interfere with this old unused thing.
*/
BLK_PERM_ALL = 0x0f,
DEFAULT_PERM_PASSTHROUGH = BLK_PERM_CONSISTENT_READ
| BLK_PERM_WRITE
| BLK_PERM_WRITE_UNCHANGED
| BLK_PERM_RESIZE,
DEFAULT_PERM_UNCHANGED = BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH,
};
/*
* Flags that parent nodes assign to child nodes to specify what kind of
* role(s) they take.
*
* At least one of DATA, METADATA, FILTERED, or COW must be set for
* every child.
*/
enum BdrvChildRoleBits {
/*
* This child stores data.
* Any node may have an arbitrary number of such children.
*/
BDRV_CHILD_DATA = (1 << 0),
/*
* This child stores metadata.
* Any node may have an arbitrary number of metadata-storing
* children.
*/
BDRV_CHILD_METADATA = (1 << 1),
/*
* A child that always presents exactly the same visible data as
* the parent, e.g. by virtue of the parent forwarding all reads
* and writes.
* This flag is mutually exclusive with DATA, METADATA, and COW.
* Any node may have at most one filtered child at a time.
*/
BDRV_CHILD_FILTERED = (1 << 2),
/*
* Child from which to read all data that isn't allocated in the
* parent (i.e., the backing child); such data is copied to the
* parent through COW (and optionally COR).
* This field is mutually exclusive with DATA, METADATA, and
* FILTERED.
* Any node may have at most one such backing child at a time.
*/
BDRV_CHILD_COW = (1 << 3),
/*
* The primary child. For most drivers, this is the child whose
* filename applies best to the parent node.
* Any node may have at most one primary child at a time.
*/
BDRV_CHILD_PRIMARY = (1 << 4),
/* Useful combination of flags */
BDRV_CHILD_IMAGE = BDRV_CHILD_DATA
| BDRV_CHILD_METADATA
| BDRV_CHILD_PRIMARY,
};
/* Mask of BdrvChildRoleBits values */
typedef unsigned int BdrvChildRole;
typedef struct BdrvCheckResult {
int corruptions;
int leaks;
int check_errors;
int corruptions_fixed;
int leaks_fixed;
int64_t image_end_offset;
BlockFragInfo bfi;
} BdrvCheckResult;
typedef enum {
BDRV_FIX_LEAKS = 1,
BDRV_FIX_ERRORS = 2,
} BdrvCheckMode;
typedef struct BlockSizes {
uint32_t phys;
uint32_t log;
} BlockSizes;
typedef struct HDGeometry {
uint32_t heads;
uint32_t sectors;
uint32_t cylinders;
} HDGeometry;
/*
* Common functions that are neither I/O nor Global State.
*
* These functions must never call any function from other categories
* (I/O, "I/O or GS", Global State) except this one, but can be invoked by
* all of them.
*/
char *bdrv_perm_names(uint64_t perm);
uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm);
void bdrv_init_with_whitelist(void);
bool bdrv_uses_whitelist(void);
int bdrv_is_whitelisted(BlockDriver *drv, bool read_only);
int bdrv_parse_aio(const char *mode, int *flags);
int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough);
int bdrv_parse_discard_flags(const char *mode, int *flags);
int path_has_protocol(const char *path);
int path_is_absolute(const char *path);
char *path_combine(const char *base_path, const char *filename);
char *bdrv_get_full_backing_filename_from_filename(const char *backed,
const char *backing,
Error **errp);
#endif /* BLOCK_COMMON_H */

View File

@ -0,0 +1,253 @@
/*
* QEMU System Emulator block driver
*
* Copyright (c) 2003 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef BLOCK_GLOBAL_STATE_H
#define BLOCK_GLOBAL_STATE_H
#include "block-common.h"
/*
* Global state (GS) API. These functions run under the BQL.
*
* If a function modifies the graph, it also uses drain and/or
* aio_context_acquire/release to be sure it has unique access.
* aio_context locking is needed together with BQL because of
* the thread-safe I/O API that concurrently runs and accesses
* the graph without the BQL.
*
* It is important to note that not all of these functions are
* necessarily limited to running under the BQL, but they would
* require additional auditing and many small thread-safety changes
* to move them into the I/O API. Often it's not worth doing that
* work since the APIs are only used with the BQL held at the
* moment, so they have been placed in the GS API (for now).
*
* These functions can call any function from this and other categories
* (I/O, "I/O or GS", Common), but must be invoked only by other GS APIs.
*
* All functions in this header must use the macro
* GLOBAL_STATE_CODE();
* to catch when they are accidentally called without the BQL.
*/
void bdrv_init(void);
BlockDriver *bdrv_find_protocol(const char *filename,
bool allow_protocol_prefix,
Error **errp);
BlockDriver *bdrv_find_format(const char *format_name);
int bdrv_create(BlockDriver *drv, const char* filename,
QemuOpts *opts, Error **errp);
int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp);
BlockDriverState *bdrv_new(void);
int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
Error **errp);
int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
Error **errp);
int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
Error **errp);
BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options,
int flags, Error **errp);
int bdrv_drop_filter(BlockDriverState *bs, Error **errp);
BdrvChild *bdrv_open_child(const char *filename,
QDict *options, const char *bdref_key,
BlockDriverState *parent,
const BdrvChildClass *child_class,
BdrvChildRole child_role,
bool allow_none, Error **errp);
BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp);
int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
Error **errp);
int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
const char *bdref_key, Error **errp);
BlockDriverState *bdrv_open(const char *filename, const char *reference,
QDict *options, int flags, Error **errp);
BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv,
const char *node_name,
QDict *options, int flags,
Error **errp);
BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
int flags, Error **errp);
BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
BlockDriverState *bs, QDict *options,
bool keep_old_opts);
void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue);
int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
Error **errp);
int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
Error **errp);
BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
const char *backing_file);
void bdrv_refresh_filename(BlockDriverState *bs);
void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp);
int bdrv_commit(BlockDriverState *bs);
int bdrv_make_empty(BdrvChild *c, Error **errp);
int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file,
const char *backing_fmt, bool warn);
void bdrv_register(BlockDriver *bdrv);
int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
const char *backing_file_str);
BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
BlockDriverState *bs);
BlockDriverState *bdrv_find_base(BlockDriverState *bs);
bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base,
Error **errp);
int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base,
Error **errp);
void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base);
/*
* The units of offset and total_work_size may be chosen arbitrarily by the
* block driver; total_work_size may change during the course of the amendment
* operation
*/
typedef void BlockDriverAmendStatusCB(BlockDriverState *bs, int64_t offset,
int64_t total_work_size, void *opaque);
int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts,
BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
bool force,
Error **errp);
/* check if a named node can be replaced when doing drive-mirror */
BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
const char *node_name, Error **errp);
int bdrv_activate(BlockDriverState *bs, Error **errp);
void bdrv_activate_all(Error **errp);
int bdrv_inactivate_all(void);
int bdrv_flush_all(void);
void bdrv_close_all(void);
void bdrv_drain_all_begin(void);
void bdrv_drain_all_end(void);
void bdrv_drain_all(void);
int bdrv_has_zero_init_1(BlockDriverState *bs);
int bdrv_has_zero_init(BlockDriverState *bs);
BlockDriverState *bdrv_find_node(const char *node_name);
BlockDeviceInfoList *bdrv_named_nodes_list(bool flat, Error **errp);
XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp);
BlockDriverState *bdrv_lookup_bs(const char *device,
const char *node_name,
Error **errp);
bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base);
BlockDriverState *bdrv_next_node(BlockDriverState *bs);
BlockDriverState *bdrv_next_all_states(BlockDriverState *bs);
typedef struct BdrvNextIterator {
enum {
BDRV_NEXT_BACKEND_ROOTS,
BDRV_NEXT_MONITOR_OWNED,
} phase;
BlockBackend *blk;
BlockDriverState *bs;
} BdrvNextIterator;
BlockDriverState *bdrv_first(BdrvNextIterator *it);
BlockDriverState *bdrv_next(BdrvNextIterator *it);
void bdrv_next_cleanup(BdrvNextIterator *it);
BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs);
void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
void *opaque, bool read_only);
int bdrv_get_flags(BlockDriverState *bs);
char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp);
char *bdrv_dirname(BlockDriverState *bs, Error **errp);
void bdrv_img_create(const char *filename, const char *fmt,
const char *base_filename, const char *base_fmt,
char *options, uint64_t img_size, int flags,
bool quiet, Error **errp);
void bdrv_ref(BlockDriverState *bs);
void bdrv_unref(BlockDriverState *bs);
void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
BlockDriverState *child_bs,
const char *child_name,
const BdrvChildClass *child_class,
BdrvChildRole child_role,
Error **errp);
bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp);
void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason);
void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason);
void bdrv_op_block_all(BlockDriverState *bs, Error *reason);
void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason);
bool bdrv_op_blocker_is_empty(BlockDriverState *bs);
int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
const char *tag);
int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag);
int bdrv_debug_resume(BlockDriverState *bs, const char *tag);
bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag);
/**
* Locks the AioContext of @bs if it's not the current AioContext. This avoids
* double locking which could lead to deadlocks: This is a coroutine_fn, so we
* know we already own the lock of the current AioContext.
*
* May only be called in the main thread.
*/
void coroutine_fn bdrv_co_lock(BlockDriverState *bs);
/**
* Unlocks the AioContext of @bs if it's not the current AioContext.
*/
void coroutine_fn bdrv_co_unlock(BlockDriverState *bs);
void bdrv_set_aio_context_ignore(BlockDriverState *bs,
AioContext *new_context, GSList **ignore);
int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
Error **errp);
int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
BdrvChild *ignore_child, Error **errp);
bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx,
GSList **ignore, Error **errp);
bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx,
GSList **ignore, Error **errp);
AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c);
int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz);
int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo);
void bdrv_add_child(BlockDriverState *parent, BlockDriverState *child,
Error **errp);
void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp);
/**
*
* bdrv_register_buf/bdrv_unregister_buf:
*
* Register/unregister a buffer for I/O. For example, VFIO drivers are
* interested to know the memory areas that would later be used for I/O, so
* that they can prepare IOMMU mapping etc., to get better performance.
*/
void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size);
void bdrv_unregister_buf(BlockDriverState *bs, void *host);
void bdrv_cancel_in_flight(BlockDriverState *bs);
#endif /* BLOCK_GLOBAL_STATE_H */

368
include/block/block-io.h Normal file
View File

@ -0,0 +1,368 @@
/*
* QEMU System Emulator block driver
*
* Copyright (c) 2003 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef BLOCK_IO_H
#define BLOCK_IO_H
#include "block-common.h"
/*
* I/O API functions. These functions are thread-safe, and therefore
* can run in any thread as long as the thread has called
* aio_context_acquire/release().
*
* These functions can only call functions from I/O and Common categories,
* but can be invoked by GS, "I/O or GS" and I/O APIs.
*
* All functions in this category must use the macro
* IO_CODE();
* to catch when they are accidentally called by the wrong API.
*/
int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
int64_t bytes, BdrvRequestFlags flags);
int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags);
int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int64_t bytes);
int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf,
int64_t bytes);
int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
const void *buf, int64_t bytes);
/*
* Efficiently zero a region of the disk image. Note that this is a regular
* I/O request like read or write and should have a reasonable size. This
* function is not suitable for zeroing the entire image in a single request
* because it may allocate memory for the entire region.
*/
int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
int64_t bytes, BdrvRequestFlags flags);
int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
PreallocMode prealloc, BdrvRequestFlags flags,
Error **errp);
int64_t bdrv_nb_sectors(BlockDriverState *bs);
int64_t bdrv_getlength(BlockDriverState *bs);
int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
BlockDriverState *in_bs, Error **errp);
void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp);
void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs);
/* async block I/O */
void bdrv_aio_cancel(BlockAIOCB *acb);
void bdrv_aio_cancel_async(BlockAIOCB *acb);
/* sg packet commands */
int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf);
/* Ensure contents are flushed to disk. */
int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
int bdrv_block_status(BlockDriverState *bs, int64_t offset,
int64_t bytes, int64_t *pnum, int64_t *map,
BlockDriverState **file);
int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
int64_t offset, int64_t bytes, int64_t *pnum,
int64_t *map, BlockDriverState **file);
int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
int64_t *pnum);
int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
bool include_base, int64_t offset, int64_t bytes,
int64_t *pnum);
int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset,
int64_t bytes);
int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
bool ignore_allow_rdw, Error **errp);
int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg,
Error **errp);
bool bdrv_is_read_only(BlockDriverState *bs);
bool bdrv_is_writable(BlockDriverState *bs);
bool bdrv_is_sg(BlockDriverState *bs);
bool bdrv_is_inserted(BlockDriverState *bs);
void bdrv_lock_medium(BlockDriverState *bs, bool locked);
void bdrv_eject(BlockDriverState *bs, bool eject_flag);
const char *bdrv_get_format_name(BlockDriverState *bs);
bool bdrv_supports_compressed_writes(BlockDriverState *bs);
const char *bdrv_get_node_name(const BlockDriverState *bs);
const char *bdrv_get_device_name(const BlockDriverState *bs);
const char *bdrv_get_device_or_node_name(const BlockDriverState *bs);
int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs,
Error **errp);
BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs);
void bdrv_round_to_clusters(BlockDriverState *bs,
int64_t offset, int64_t bytes,
int64_t *cluster_offset,
int64_t *cluster_bytes);
void bdrv_get_backing_filename(BlockDriverState *bs,
char *filename, int filename_size);
int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
int64_t pos, int size);
int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
int64_t pos, int size);
/*
* Returns the alignment in bytes that is required so that no bounce buffer
* is required throughout the stack
*/
size_t bdrv_min_mem_align(BlockDriverState *bs);
/* Returns optimal alignment in bytes for bounce buffer */
size_t bdrv_opt_mem_align(BlockDriverState *bs);
void *qemu_blockalign(BlockDriverState *bs, size_t size);
void *qemu_blockalign0(BlockDriverState *bs, size_t size);
void *qemu_try_blockalign(BlockDriverState *bs, size_t size);
void *qemu_try_blockalign0(BlockDriverState *bs, size_t size);
bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
void bdrv_enable_copy_on_read(BlockDriverState *bs);
void bdrv_disable_copy_on_read(BlockDriverState *bs);
void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event);
#define BLKDBG_EVENT(child, evt) \
do { \
if (child) { \
bdrv_debug_event(child->bs, evt); \
} \
} while (0)
/**
* bdrv_get_aio_context:
*
* Returns: the currently bound #AioContext
*/
AioContext *bdrv_get_aio_context(BlockDriverState *bs);
/**
* Move the current coroutine to the AioContext of @bs and return the old
* AioContext of the coroutine. Increase bs->in_flight so that draining @bs
* will wait for the operation to proceed until the corresponding
* bdrv_co_leave().
*
* Consequently, you can't call drain inside a bdrv_co_enter/leave() section as
* this will deadlock.
*/
AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs);
/**
* Ends a section started by bdrv_co_enter(). Move the current coroutine back
* to old_ctx and decrease bs->in_flight again.
*/
void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx);
/**
* Transfer control to @co in the aio context of @bs
*/
void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co);
AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c);
void bdrv_io_plug(BlockDriverState *bs);
void bdrv_io_unplug(BlockDriverState *bs);
bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
uint32_t granularity, Error **errp);
/**
*
* bdrv_co_copy_range:
*
* Do offloaded copy between two children. If the operation is not implemented
* by the driver, or if the backend storage doesn't support it, a negative
* error code will be returned.
*
* Note: block layer doesn't emulate or fallback to a bounce buffer approach
* because usually the caller shouldn't attempt offloaded copy any more (e.g.
* calling copy_file_range(2)) after the first error, thus it should fall back
* to a read+write path in the caller level.
*
* @src: Source child to copy data from
* @src_offset: offset in @src image to read data
* @dst: Destination child to copy data to
* @dst_offset: offset in @dst image to write data
* @bytes: number of bytes to copy
* @flags: request flags. Supported flags:
* BDRV_REQ_ZERO_WRITE - treat the @src range as zero data and do zero
* write on @dst as if bdrv_co_pwrite_zeroes is
* called. Used to simplify caller code, or
* during BlockDriver.bdrv_co_copy_range_from()
* recursion.
* BDRV_REQ_NO_SERIALISING - do not serialize with other overlapping
* requests currently in flight.
*
* Returns: 0 if succeeded; negative error code if failed.
**/
int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
BdrvChild *dst, int64_t dst_offset,
int64_t bytes, BdrvRequestFlags read_flags,
BdrvRequestFlags write_flags);
/**
* bdrv_drained_end_no_poll:
*
* Same as bdrv_drained_end(), but do not poll for the subgraph to
* actually become unquiesced. Therefore, no graph changes will occur
* with this function.
*
* *drained_end_counter is incremented for every background operation
* that is scheduled, and will be decremented for every operation once
* it settles. The caller must poll until it reaches 0. The counter
* should be accessed using atomic operations only.
*/
void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter);
/*
* "I/O or GS" API functions. These functions can run without
* the BQL, but only in one specific iothread/main loop.
*
* More specifically, these functions use BDRV_POLL_WHILE(bs), which
* requires the caller to be either in the main thread and hold
* the BlockdriverState (bs) AioContext lock, or directly in the
* home thread that runs the bs AioContext. Calling them from
* another thread in another AioContext would cause deadlocks.
*
* Therefore, these functions are not proper I/O, because they
* can't run in *any* iothreads, but only in a specific one.
*
* These functions can call any function from I/O, Common and this
* categories, but must be invoked only by other "I/O or GS" and GS APIs.
*
* All functions in this category must use the macro
* IO_OR_GS_CODE();
* to catch when they are accidentally called by the wrong API.
*/
#define BDRV_POLL_WHILE(bs, cond) ({ \
BlockDriverState *bs_ = (bs); \
IO_OR_GS_CODE(); \
AIO_WAIT_WHILE(bdrv_get_aio_context(bs_), \
cond); })
void bdrv_drain(BlockDriverState *bs);
void coroutine_fn bdrv_co_drain(BlockDriverState *bs);
int generated_co_wrapper
bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
int generated_co_wrapper bdrv_check(BlockDriverState *bs, BdrvCheckResult *res,
BdrvCheckMode fix);
/* Invalidate any cached metadata used by image formats */
int generated_co_wrapper bdrv_invalidate_cache(BlockDriverState *bs,
Error **errp);
int generated_co_wrapper bdrv_flush(BlockDriverState *bs);
int generated_co_wrapper bdrv_pdiscard(BdrvChild *child, int64_t offset,
int64_t bytes);
int generated_co_wrapper
bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
int generated_co_wrapper
bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
/**
* bdrv_parent_drained_begin_single:
*
* Begin a quiesced section for the parent of @c. If @poll is true, wait for
* any pending activity to cease.
*/
void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
/**
* bdrv_parent_drained_end_single:
*
* End a quiesced section for the parent of @c.
*
* This polls @bs's AioContext until all scheduled sub-drained_ends
* have settled, which may result in graph changes.
*/
void bdrv_parent_drained_end_single(BdrvChild *c);
/**
* bdrv_drain_poll:
*
* Poll for pending requests in @bs, its parents (except for @ignore_parent),
* and if @recursive is true its children as well (used for subtree drain).
*
* If @ignore_bds_parents is true, parents that are BlockDriverStates must
* ignore the drain request because they will be drained separately (used for
* drain_all).
*
* This is part of bdrv_drained_begin.
*/
bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
BdrvChild *ignore_parent, bool ignore_bds_parents);
/**
* bdrv_drained_begin:
*
* Begin a quiesced section for exclusive access to the BDS, by disabling
* external request sources including NBD server, block jobs, and device model.
*
* This function can be recursive.
*/
void bdrv_drained_begin(BlockDriverState *bs);
/**
* bdrv_do_drained_begin_quiesce:
*
* Quiesces a BDS like bdrv_drained_begin(), but does not wait for already
* running requests to complete.
*/
void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
BdrvChild *parent, bool ignore_bds_parents);
/**
* Like bdrv_drained_begin, but recursively begins a quiesced section for
* exclusive access to all child nodes as well.
*/
void bdrv_subtree_drained_begin(BlockDriverState *bs);
/**
* bdrv_drained_end:
*
* End a quiescent section started by bdrv_drained_begin().
*
* This polls @bs's AioContext until all scheduled sub-drained_ends
* have settled. On one hand, that may result in graph changes. On
* the other, this requires that the caller either runs in the main
* loop; or that all involved nodes (@bs and all of its parents) are
* in the caller's AioContext.
*/
void bdrv_drained_end(BlockDriverState *bs);
/**
* End a quiescent section started by bdrv_subtree_drained_begin().
*/
void bdrv_subtree_drained_end(BlockDriverState *bs);
#endif /* BLOCK_IO_H */

View File

@ -1,864 +1,32 @@
/*
* QEMU System Emulator block driver
*
* Copyright (c) 2003 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef BLOCK_H
#define BLOCK_H
#include "block/aio.h"
#include "block/aio-wait.h"
#include "qemu/iov.h"
#include "qemu/coroutine.h"
#include "block/accounting.h"
#include "block/dirty-bitmap.h"
#include "block/blockjob.h"
#include "qemu/hbitmap.h"
#include "qemu/transactions.h"
#include "block-global-state.h"
#include "block-io.h"
/*
* generated_co_wrapper
*
* Function specifier, which does nothing but mark functions to be
* generated by scripts/block-coroutine-wrapper.py
*
* Read more in docs/devel/block-coroutine-wrapper.rst
*/
#define generated_co_wrapper
/* DO NOT ADD ANYTHING IN HERE. USE ONE OF THE HEADERS INCLUDED ABOVE */
/* block.c */
typedef struct BlockDriver BlockDriver;
typedef struct BdrvChild BdrvChild;
typedef struct BdrvChildClass BdrvChildClass;
typedef struct BlockDriverInfo {
/* in bytes, 0 if irrelevant */
int cluster_size;
/* offset at which the VM state can be saved (0 if not possible) */
int64_t vm_state_offset;
bool is_dirty;
/*
* True if this block driver only supports compressed writes
*/
bool needs_compressed_writes;
} BlockDriverInfo;
typedef struct BlockFragInfo {
uint64_t allocated_clusters;
uint64_t total_clusters;
uint64_t fragmented_clusters;
uint64_t compressed_clusters;
} BlockFragInfo;
typedef enum {
BDRV_REQ_COPY_ON_READ = 0x1,
BDRV_REQ_ZERO_WRITE = 0x2,
/*
* The BDRV_REQ_MAY_UNMAP flag is used in write_zeroes requests to indicate
* that the block driver should unmap (discard) blocks if it is guaranteed
* that the result will read back as zeroes. The flag is only passed to the
* driver if the block device is opened with BDRV_O_UNMAP.
*/
BDRV_REQ_MAY_UNMAP = 0x4,
BDRV_REQ_FUA = 0x10,
BDRV_REQ_WRITE_COMPRESSED = 0x20,
/* Signifies that this write request will not change the visible disk
* content. */
BDRV_REQ_WRITE_UNCHANGED = 0x40,
/* Forces request serialisation. Use only with write requests. */
BDRV_REQ_SERIALISING = 0x80,
/* Execute the request only if the operation can be offloaded or otherwise
* be executed efficiently, but return an error instead of using a slow
* fallback. */
BDRV_REQ_NO_FALLBACK = 0x100,
/*
* BDRV_REQ_PREFETCH makes sense only in the context of copy-on-read
* (i.e., together with the BDRV_REQ_COPY_ON_READ flag or when a COR
* filter is involved), in which case it signals that the COR operation
* need not read the data into memory (qiov) but only ensure they are
* copied to the top layer (i.e., that COR operation is done).
*/
BDRV_REQ_PREFETCH = 0x200,
/*
* If we need to wait for other requests, just fail immediately. Used
* only together with BDRV_REQ_SERIALISING.
*/
BDRV_REQ_NO_WAIT = 0x400,
/* Mask of valid flags */
BDRV_REQ_MASK = 0x7ff,
} BdrvRequestFlags;
typedef struct BlockSizes {
uint32_t phys;
uint32_t log;
} BlockSizes;
typedef struct HDGeometry {
uint32_t heads;
uint32_t sectors;
uint32_t cylinders;
} HDGeometry;
#define BDRV_O_NO_SHARE 0x0001 /* don't share permissions */
#define BDRV_O_RDWR 0x0002
#define BDRV_O_RESIZE 0x0004 /* request permission for resizing the node */
#define BDRV_O_SNAPSHOT 0x0008 /* open the file read only and save writes in a snapshot */
#define BDRV_O_TEMPORARY 0x0010 /* delete the file after use */
#define BDRV_O_NOCACHE 0x0020 /* do not use the host page cache */
#define BDRV_O_NATIVE_AIO 0x0080 /* use native AIO instead of the thread pool */
#define BDRV_O_NO_BACKING 0x0100 /* don't open the backing file */
#define BDRV_O_NO_FLUSH 0x0200 /* disable flushing on this disk */
#define BDRV_O_COPY_ON_READ 0x0400 /* copy read backing sectors into image */
#define BDRV_O_INACTIVE 0x0800 /* consistency hint for migration handoff */
#define BDRV_O_CHECK 0x1000 /* open solely for consistency check */
#define BDRV_O_ALLOW_RDWR 0x2000 /* allow reopen to change from r/o to r/w */
#define BDRV_O_UNMAP 0x4000 /* execute guest UNMAP/TRIM operations */
#define BDRV_O_PROTOCOL 0x8000 /* if no block driver is explicitly given:
select an appropriate protocol driver,
ignoring the format layer */
#define BDRV_O_NO_IO 0x10000 /* don't initialize for I/O */
#define BDRV_O_AUTO_RDONLY 0x20000 /* degrade to read-only if opening read-write fails */
#define BDRV_O_IO_URING 0x40000 /* use io_uring instead of the thread pool */
#define BDRV_O_CACHE_MASK (BDRV_O_NOCACHE | BDRV_O_NO_FLUSH)
/* Option names of options parsed by the block layer */
#define BDRV_OPT_CACHE_WB "cache.writeback"
#define BDRV_OPT_CACHE_DIRECT "cache.direct"
#define BDRV_OPT_CACHE_NO_FLUSH "cache.no-flush"
#define BDRV_OPT_READ_ONLY "read-only"
#define BDRV_OPT_AUTO_READ_ONLY "auto-read-only"
#define BDRV_OPT_DISCARD "discard"
#define BDRV_OPT_FORCE_SHARE "force-share"
#define BDRV_SECTOR_BITS 9
#define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS)
#define BDRV_REQUEST_MAX_SECTORS MIN_CONST(SIZE_MAX >> BDRV_SECTOR_BITS, \
INT_MAX >> BDRV_SECTOR_BITS)
#define BDRV_REQUEST_MAX_BYTES (BDRV_REQUEST_MAX_SECTORS << BDRV_SECTOR_BITS)
/*
* We want allow aligning requests and disk length up to any 32bit alignment
* and don't afraid of overflow.
* To achieve it, and in the same time use some pretty number as maximum disk
* size, let's define maximum "length" (a limit for any offset/bytes request and
* for disk size) to be the greatest power of 2 less than INT64_MAX.
*/
#define BDRV_MAX_ALIGNMENT (1L << 30)
#define BDRV_MAX_LENGTH (QEMU_ALIGN_DOWN(INT64_MAX, BDRV_MAX_ALIGNMENT))
/*
* Allocation status flags for bdrv_block_status() and friends.
*
* Public flags:
* BDRV_BLOCK_DATA: allocation for data at offset is tied to this layer
* BDRV_BLOCK_ZERO: offset reads as zero
* BDRV_BLOCK_OFFSET_VALID: an associated offset exists for accessing raw data
* BDRV_BLOCK_ALLOCATED: the content of the block is determined by this
* layer rather than any backing, set by block layer
* BDRV_BLOCK_EOF: the returned pnum covers through end of file for this
* layer, set by block layer
*
* Internal flags:
* BDRV_BLOCK_RAW: for use by passthrough drivers, such as raw, to request
* that the block layer recompute the answer from the returned
* BDS; must be accompanied by just BDRV_BLOCK_OFFSET_VALID.
* BDRV_BLOCK_RECURSE: request that the block layer will recursively search for
* zeroes in file child of current block node inside
* returned region. Only valid together with both
* BDRV_BLOCK_DATA and BDRV_BLOCK_OFFSET_VALID. Should not
* appear with BDRV_BLOCK_ZERO.
*
* If BDRV_BLOCK_OFFSET_VALID is set, the map parameter represents the
* host offset within the returned BDS that is allocated for the
* corresponding raw guest data. However, whether that offset
* actually contains data also depends on BDRV_BLOCK_DATA, as follows:
*
* DATA ZERO OFFSET_VALID
* t t t sectors read as zero, returned file is zero at offset
* t f t sectors read as valid from file at offset
* f t t sectors preallocated, read as zero, returned file not
* necessarily zero at offset
* f f t sectors preallocated but read from backing_hd,
* returned file contains garbage at offset
* t t f sectors preallocated, read as zero, unknown offset
* t f f sectors read from unknown file or offset
* f t f not allocated or unknown offset, read as zero
* f f f not allocated or unknown offset, read from backing_hd
*/
#define BDRV_BLOCK_DATA 0x01
#define BDRV_BLOCK_ZERO 0x02
#define BDRV_BLOCK_OFFSET_VALID 0x04
#define BDRV_BLOCK_RAW 0x08
#define BDRV_BLOCK_ALLOCATED 0x10
#define BDRV_BLOCK_EOF 0x20
#define BDRV_BLOCK_RECURSE 0x40
typedef QTAILQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue;
typedef struct BDRVReopenState {
BlockDriverState *bs;
int flags;
BlockdevDetectZeroesOptions detect_zeroes;
bool backing_missing;
BlockDriverState *old_backing_bs; /* keep pointer for permissions update */
BlockDriverState *old_file_bs; /* keep pointer for permissions update */
QDict *options;
QDict *explicit_options;
void *opaque;
} BDRVReopenState;
/*
* Block operation types
*/
typedef enum BlockOpType {
BLOCK_OP_TYPE_BACKUP_SOURCE,
BLOCK_OP_TYPE_BACKUP_TARGET,
BLOCK_OP_TYPE_CHANGE,
BLOCK_OP_TYPE_COMMIT_SOURCE,
BLOCK_OP_TYPE_COMMIT_TARGET,
BLOCK_OP_TYPE_DATAPLANE,
BLOCK_OP_TYPE_DRIVE_DEL,
BLOCK_OP_TYPE_EJECT,
BLOCK_OP_TYPE_EXTERNAL_SNAPSHOT,
BLOCK_OP_TYPE_INTERNAL_SNAPSHOT,
BLOCK_OP_TYPE_INTERNAL_SNAPSHOT_DELETE,
BLOCK_OP_TYPE_MIRROR_SOURCE,
BLOCK_OP_TYPE_MIRROR_TARGET,
BLOCK_OP_TYPE_RESIZE,
BLOCK_OP_TYPE_STREAM,
BLOCK_OP_TYPE_REPLACE,
BLOCK_OP_TYPE_MAX,
} BlockOpType;
/* Block node permission constants */
enum {
/**
* A user that has the "permission" of consistent reads is guaranteed that
* their view of the contents of the block device is complete and
* self-consistent, representing the contents of a disk at a specific
* point.
*
* For most block devices (including their backing files) this is true, but
* the property cannot be maintained in a few situations like for
* intermediate nodes of a commit block job.
*/
BLK_PERM_CONSISTENT_READ = 0x01,
/** This permission is required to change the visible disk contents. */
BLK_PERM_WRITE = 0x02,
/**
* This permission (which is weaker than BLK_PERM_WRITE) is both enough and
* required for writes to the block node when the caller promises that
* the visible disk content doesn't change.
*
* As the BLK_PERM_WRITE permission is strictly stronger, either is
* sufficient to perform an unchanging write.
*/
BLK_PERM_WRITE_UNCHANGED = 0x04,
/** This permission is required to change the size of a block node. */
BLK_PERM_RESIZE = 0x08,
/**
* There was a now-removed bit BLK_PERM_GRAPH_MOD, with value of 0x10. QEMU
* 6.1 and earlier may still lock the corresponding byte in block/file-posix
* locking. So, implementing some new permission should be very careful to
* not interfere with this old unused thing.
*/
BLK_PERM_ALL = 0x0f,
DEFAULT_PERM_PASSTHROUGH = BLK_PERM_CONSISTENT_READ
| BLK_PERM_WRITE
| BLK_PERM_WRITE_UNCHANGED
| BLK_PERM_RESIZE,
DEFAULT_PERM_UNCHANGED = BLK_PERM_ALL & ~DEFAULT_PERM_PASSTHROUGH,
};
/*
* Flags that parent nodes assign to child nodes to specify what kind of
* role(s) they take.
*
* At least one of DATA, METADATA, FILTERED, or COW must be set for
* every child.
*/
enum BdrvChildRoleBits {
/*
* This child stores data.
* Any node may have an arbitrary number of such children.
*/
BDRV_CHILD_DATA = (1 << 0),
/*
* This child stores metadata.
* Any node may have an arbitrary number of metadata-storing
* children.
*/
BDRV_CHILD_METADATA = (1 << 1),
/*
* A child that always presents exactly the same visible data as
* the parent, e.g. by virtue of the parent forwarding all reads
* and writes.
* This flag is mutually exclusive with DATA, METADATA, and COW.
* Any node may have at most one filtered child at a time.
*/
BDRV_CHILD_FILTERED = (1 << 2),
/*
* Child from which to read all data that isn't allocated in the
* parent (i.e., the backing child); such data is copied to the
* parent through COW (and optionally COR).
* This field is mutually exclusive with DATA, METADATA, and
* FILTERED.
* Any node may have at most one such backing child at a time.
*/
BDRV_CHILD_COW = (1 << 3),
/*
* The primary child. For most drivers, this is the child whose
* filename applies best to the parent node.
* Any node may have at most one primary child at a time.
*/
BDRV_CHILD_PRIMARY = (1 << 4),
/* Useful combination of flags */
BDRV_CHILD_IMAGE = BDRV_CHILD_DATA
| BDRV_CHILD_METADATA
| BDRV_CHILD_PRIMARY,
};
/* Mask of BdrvChildRoleBits values */
typedef unsigned int BdrvChildRole;
char *bdrv_perm_names(uint64_t perm);
uint64_t bdrv_qapi_perm_to_blk_perm(BlockPermission qapi_perm);
void bdrv_init(void);
void bdrv_init_with_whitelist(void);
bool bdrv_uses_whitelist(void);
int bdrv_is_whitelisted(BlockDriver *drv, bool read_only);
BlockDriver *bdrv_find_protocol(const char *filename,
bool allow_protocol_prefix,
Error **errp);
BlockDriver *bdrv_find_format(const char *format_name);
int bdrv_create(BlockDriver *drv, const char* filename,
QemuOpts *opts, Error **errp);
int bdrv_create_file(const char *filename, QemuOpts *opts, Error **errp);
BlockDriverState *bdrv_new(void);
int bdrv_append(BlockDriverState *bs_new, BlockDriverState *bs_top,
Error **errp);
int bdrv_replace_node(BlockDriverState *from, BlockDriverState *to,
Error **errp);
int bdrv_replace_child_bs(BdrvChild *child, BlockDriverState *new_bs,
Error **errp);
BlockDriverState *bdrv_insert_node(BlockDriverState *bs, QDict *node_options,
int flags, Error **errp);
int bdrv_drop_filter(BlockDriverState *bs, Error **errp);
int bdrv_parse_aio(const char *mode, int *flags);
int bdrv_parse_cache_mode(const char *mode, int *flags, bool *writethrough);
int bdrv_parse_discard_flags(const char *mode, int *flags);
BdrvChild *bdrv_open_child(const char *filename,
QDict *options, const char *bdref_key,
BlockDriverState* parent,
const BdrvChildClass *child_class,
BdrvChildRole child_role,
bool allow_none, Error **errp);
BlockDriverState *bdrv_open_blockdev_ref(BlockdevRef *ref, Error **errp);
int bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
Error **errp);
int bdrv_open_backing_file(BlockDriverState *bs, QDict *parent_options,
const char *bdref_key, Error **errp);
BlockDriverState *bdrv_open(const char *filename, const char *reference,
QDict *options, int flags, Error **errp);
BlockDriverState *bdrv_new_open_driver_opts(BlockDriver *drv,
const char *node_name,
QDict *options, int flags,
Error **errp);
BlockDriverState *bdrv_new_open_driver(BlockDriver *drv, const char *node_name,
int flags, Error **errp);
BlockReopenQueue *bdrv_reopen_queue(BlockReopenQueue *bs_queue,
BlockDriverState *bs, QDict *options,
bool keep_old_opts);
void bdrv_reopen_queue_free(BlockReopenQueue *bs_queue);
int bdrv_reopen_multiple(BlockReopenQueue *bs_queue, Error **errp);
int bdrv_reopen(BlockDriverState *bs, QDict *opts, bool keep_old_opts,
Error **errp);
int bdrv_reopen_set_read_only(BlockDriverState *bs, bool read_only,
Error **errp);
int bdrv_pwrite_zeroes(BdrvChild *child, int64_t offset,
int64_t bytes, BdrvRequestFlags flags);
int bdrv_make_zero(BdrvChild *child, BdrvRequestFlags flags);
int bdrv_pread(BdrvChild *child, int64_t offset, void *buf, int64_t bytes);
int bdrv_pwrite(BdrvChild *child, int64_t offset, const void *buf,
int64_t bytes);
int bdrv_pwrite_sync(BdrvChild *child, int64_t offset,
const void *buf, int64_t bytes);
/*
* Efficiently zero a region of the disk image. Note that this is a regular
* I/O request like read or write and should have a reasonable size. This
* function is not suitable for zeroing the entire image in a single request
* because it may allocate memory for the entire region.
*/
int coroutine_fn bdrv_co_pwrite_zeroes(BdrvChild *child, int64_t offset,
int64_t bytes, BdrvRequestFlags flags);
BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
const char *backing_file);
void bdrv_refresh_filename(BlockDriverState *bs);
int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
PreallocMode prealloc, BdrvRequestFlags flags,
Error **errp);
int generated_co_wrapper
bdrv_truncate(BdrvChild *child, int64_t offset, bool exact,
PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
int64_t bdrv_nb_sectors(BlockDriverState *bs);
int64_t bdrv_getlength(BlockDriverState *bs);
int64_t bdrv_get_allocated_file_size(BlockDriverState *bs);
BlockMeasureInfo *bdrv_measure(BlockDriver *drv, QemuOpts *opts,
BlockDriverState *in_bs, Error **errp);
void bdrv_get_geometry(BlockDriverState *bs, uint64_t *nb_sectors_ptr);
void bdrv_refresh_limits(BlockDriverState *bs, Transaction *tran, Error **errp);
int bdrv_commit(BlockDriverState *bs);
int bdrv_make_empty(BdrvChild *c, Error **errp);
int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file,
const char *backing_fmt, bool warn);
void bdrv_register(BlockDriver *bdrv);
int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
const char *backing_file_str);
BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
BlockDriverState *bs);
BlockDriverState *bdrv_find_base(BlockDriverState *bs);
bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base,
Error **errp);
int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base,
Error **errp);
void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base);
int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp);
void coroutine_fn bdrv_co_delete_file_noerr(BlockDriverState *bs);
typedef struct BdrvCheckResult {
int corruptions;
int leaks;
int check_errors;
int corruptions_fixed;
int leaks_fixed;
int64_t image_end_offset;
BlockFragInfo bfi;
} BdrvCheckResult;
typedef enum {
BDRV_FIX_LEAKS = 1,
BDRV_FIX_ERRORS = 2,
} BdrvCheckMode;
int generated_co_wrapper bdrv_check(BlockDriverState *bs, BdrvCheckResult *res,
BdrvCheckMode fix);
/* The units of offset and total_work_size may be chosen arbitrarily by the
* block driver; total_work_size may change during the course of the amendment
* operation */
typedef void BlockDriverAmendStatusCB(BlockDriverState *bs, int64_t offset,
int64_t total_work_size, void *opaque);
int bdrv_amend_options(BlockDriverState *bs_new, QemuOpts *opts,
BlockDriverAmendStatusCB *status_cb, void *cb_opaque,
bool force,
Error **errp);
/* check if a named node can be replaced when doing drive-mirror */
BlockDriverState *check_to_replace_node(BlockDriverState *parent_bs,
const char *node_name, Error **errp);
/* async block I/O */
void bdrv_aio_cancel(BlockAIOCB *acb);
void bdrv_aio_cancel_async(BlockAIOCB *acb);
/* sg packet commands */
int bdrv_co_ioctl(BlockDriverState *bs, int req, void *buf);
/* Invalidate any cached metadata used by image formats */
int generated_co_wrapper bdrv_invalidate_cache(BlockDriverState *bs,
Error **errp);
void bdrv_invalidate_cache_all(Error **errp);
int bdrv_inactivate_all(void);
/* Ensure contents are flushed to disk. */
int generated_co_wrapper bdrv_flush(BlockDriverState *bs);
int coroutine_fn bdrv_co_flush(BlockDriverState *bs);
int bdrv_flush_all(void);
void bdrv_close_all(void);
void bdrv_drain(BlockDriverState *bs);
void coroutine_fn bdrv_co_drain(BlockDriverState *bs);
void bdrv_drain_all_begin(void);
void bdrv_drain_all_end(void);
void bdrv_drain_all(void);
#define BDRV_POLL_WHILE(bs, cond) ({ \
BlockDriverState *bs_ = (bs); \
AIO_WAIT_WHILE(bdrv_get_aio_context(bs_), \
cond); })
int generated_co_wrapper bdrv_pdiscard(BdrvChild *child, int64_t offset,
int64_t bytes);
int bdrv_co_pdiscard(BdrvChild *child, int64_t offset, int64_t bytes);
int bdrv_has_zero_init_1(BlockDriverState *bs);
int bdrv_has_zero_init(BlockDriverState *bs);
bool bdrv_can_write_zeroes_with_unmap(BlockDriverState *bs);
int bdrv_block_status(BlockDriverState *bs, int64_t offset,
int64_t bytes, int64_t *pnum, int64_t *map,
BlockDriverState **file);
int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
int64_t offset, int64_t bytes, int64_t *pnum,
int64_t *map, BlockDriverState **file);
int bdrv_is_allocated(BlockDriverState *bs, int64_t offset, int64_t bytes,
int64_t *pnum);
int bdrv_is_allocated_above(BlockDriverState *top, BlockDriverState *base,
bool include_base, int64_t offset, int64_t bytes,
int64_t *pnum);
int coroutine_fn bdrv_co_is_zero_fast(BlockDriverState *bs, int64_t offset,
int64_t bytes);
bool bdrv_is_read_only(BlockDriverState *bs);
int bdrv_can_set_read_only(BlockDriverState *bs, bool read_only,
bool ignore_allow_rdw, Error **errp);
int bdrv_apply_auto_read_only(BlockDriverState *bs, const char *errmsg,
Error **errp);
bool bdrv_is_writable(BlockDriverState *bs);
bool bdrv_is_sg(BlockDriverState *bs);
bool bdrv_is_inserted(BlockDriverState *bs);
void bdrv_lock_medium(BlockDriverState *bs, bool locked);
void bdrv_eject(BlockDriverState *bs, bool eject_flag);
const char *bdrv_get_format_name(BlockDriverState *bs);
BlockDriverState *bdrv_find_node(const char *node_name);
BlockDeviceInfoList *bdrv_named_nodes_list(bool flat, Error **errp);
XDbgBlockGraph *bdrv_get_xdbg_block_graph(Error **errp);
BlockDriverState *bdrv_lookup_bs(const char *device,
const char *node_name,
Error **errp);
bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base);
BlockDriverState *bdrv_next_node(BlockDriverState *bs);
BlockDriverState *bdrv_next_all_states(BlockDriverState *bs);
typedef struct BdrvNextIterator {
enum {
BDRV_NEXT_BACKEND_ROOTS,
BDRV_NEXT_MONITOR_OWNED,
} phase;
BlockBackend *blk;
BlockDriverState *bs;
} BdrvNextIterator;
BlockDriverState *bdrv_first(BdrvNextIterator *it);
BlockDriverState *bdrv_next(BdrvNextIterator *it);
void bdrv_next_cleanup(BdrvNextIterator *it);
BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs);
bool bdrv_supports_compressed_writes(BlockDriverState *bs);
void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
void *opaque, bool read_only);
const char *bdrv_get_node_name(const BlockDriverState *bs);
const char *bdrv_get_device_name(const BlockDriverState *bs);
const char *bdrv_get_device_or_node_name(const BlockDriverState *bs);
int bdrv_get_flags(BlockDriverState *bs);
int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi);
ImageInfoSpecific *bdrv_get_specific_info(BlockDriverState *bs,
Error **errp);
BlockStatsSpecific *bdrv_get_specific_stats(BlockDriverState *bs);
void bdrv_round_to_clusters(BlockDriverState *bs,
int64_t offset, int64_t bytes,
int64_t *cluster_offset,
int64_t *cluster_bytes);
void bdrv_get_backing_filename(BlockDriverState *bs,
char *filename, int filename_size);
char *bdrv_get_full_backing_filename(BlockDriverState *bs, Error **errp);
char *bdrv_get_full_backing_filename_from_filename(const char *backed,
const char *backing,
Error **errp);
char *bdrv_dirname(BlockDriverState *bs, Error **errp);
int path_has_protocol(const char *path);
int path_is_absolute(const char *path);
char *path_combine(const char *base_path, const char *filename);
int generated_co_wrapper
bdrv_readv_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
int generated_co_wrapper
bdrv_writev_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos);
int bdrv_save_vmstate(BlockDriverState *bs, const uint8_t *buf,
int64_t pos, int size);
int bdrv_load_vmstate(BlockDriverState *bs, uint8_t *buf,
int64_t pos, int size);
void bdrv_img_create(const char *filename, const char *fmt,
const char *base_filename, const char *base_fmt,
char *options, uint64_t img_size, int flags,
bool quiet, Error **errp);
/* Returns the alignment in bytes that is required so that no bounce buffer
* is required throughout the stack */
size_t bdrv_min_mem_align(BlockDriverState *bs);
/* Returns optimal alignment in bytes for bounce buffer */
size_t bdrv_opt_mem_align(BlockDriverState *bs);
void *qemu_blockalign(BlockDriverState *bs, size_t size);
void *qemu_blockalign0(BlockDriverState *bs, size_t size);
void *qemu_try_blockalign(BlockDriverState *bs, size_t size);
void *qemu_try_blockalign0(BlockDriverState *bs, size_t size);
bool bdrv_qiov_is_aligned(BlockDriverState *bs, QEMUIOVector *qiov);
void bdrv_enable_copy_on_read(BlockDriverState *bs);
void bdrv_disable_copy_on_read(BlockDriverState *bs);
void bdrv_ref(BlockDriverState *bs);
void bdrv_unref(BlockDriverState *bs);
void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child);
BdrvChild *bdrv_attach_child(BlockDriverState *parent_bs,
BlockDriverState *child_bs,
const char *child_name,
const BdrvChildClass *child_class,
BdrvChildRole child_role,
Error **errp);
bool bdrv_op_is_blocked(BlockDriverState *bs, BlockOpType op, Error **errp);
void bdrv_op_block(BlockDriverState *bs, BlockOpType op, Error *reason);
void bdrv_op_unblock(BlockDriverState *bs, BlockOpType op, Error *reason);
void bdrv_op_block_all(BlockDriverState *bs, Error *reason);
void bdrv_op_unblock_all(BlockDriverState *bs, Error *reason);
bool bdrv_op_blocker_is_empty(BlockDriverState *bs);
#define BLKDBG_EVENT(child, evt) \
do { \
if (child) { \
bdrv_debug_event(child->bs, evt); \
} \
} while (0)
void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event);
int bdrv_debug_breakpoint(BlockDriverState *bs, const char *event,
const char *tag);
int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag);
int bdrv_debug_resume(BlockDriverState *bs, const char *tag);
bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag);
/**
* bdrv_get_aio_context:
*
* Returns: the currently bound #AioContext
*/
AioContext *bdrv_get_aio_context(BlockDriverState *bs);
/**
* Move the current coroutine to the AioContext of @bs and return the old
* AioContext of the coroutine. Increase bs->in_flight so that draining @bs
* will wait for the operation to proceed until the corresponding
* bdrv_co_leave().
*
* Consequently, you can't call drain inside a bdrv_co_enter/leave() section as
* this will deadlock.
*/
AioContext *coroutine_fn bdrv_co_enter(BlockDriverState *bs);
/**
* Ends a section started by bdrv_co_enter(). Move the current coroutine back
* to old_ctx and decrease bs->in_flight again.
*/
void coroutine_fn bdrv_co_leave(BlockDriverState *bs, AioContext *old_ctx);
/**
* Locks the AioContext of @bs if it's not the current AioContext. This avoids
* double locking which could lead to deadlocks: This is a coroutine_fn, so we
* know we already own the lock of the current AioContext.
*
* May only be called in the main thread.
*/
void coroutine_fn bdrv_co_lock(BlockDriverState *bs);
/**
* Unlocks the AioContext of @bs if it's not the current AioContext.
*/
void coroutine_fn bdrv_co_unlock(BlockDriverState *bs);
/**
* Transfer control to @co in the aio context of @bs
*/
void bdrv_coroutine_enter(BlockDriverState *bs, Coroutine *co);
void bdrv_set_aio_context_ignore(BlockDriverState *bs,
AioContext *new_context, GSList **ignore);
int bdrv_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
Error **errp);
int bdrv_child_try_set_aio_context(BlockDriverState *bs, AioContext *ctx,
BdrvChild *ignore_child, Error **errp);
bool bdrv_child_can_set_aio_context(BdrvChild *c, AioContext *ctx,
GSList **ignore, Error **errp);
bool bdrv_can_set_aio_context(BlockDriverState *bs, AioContext *ctx,
GSList **ignore, Error **errp);
AioContext *bdrv_child_get_parent_aio_context(BdrvChild *c);
AioContext *child_of_bds_get_parent_aio_context(BdrvChild *c);
int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz);
int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo);
void bdrv_io_plug(BlockDriverState *bs);
void bdrv_io_unplug(BlockDriverState *bs);
/**
* bdrv_parent_drained_begin_single:
*
* Begin a quiesced section for the parent of @c. If @poll is true, wait for
* any pending activity to cease.
*/
void bdrv_parent_drained_begin_single(BdrvChild *c, bool poll);
/**
* bdrv_parent_drained_end_single:
*
* End a quiesced section for the parent of @c.
*
* This polls @bs's AioContext until all scheduled sub-drained_ends
* have settled, which may result in graph changes.
*/
void bdrv_parent_drained_end_single(BdrvChild *c);
/**
* bdrv_drain_poll:
*
* Poll for pending requests in @bs, its parents (except for @ignore_parent),
* and if @recursive is true its children as well (used for subtree drain).
*
* If @ignore_bds_parents is true, parents that are BlockDriverStates must
* ignore the drain request because they will be drained separately (used for
* drain_all).
*
* This is part of bdrv_drained_begin.
*/
bool bdrv_drain_poll(BlockDriverState *bs, bool recursive,
BdrvChild *ignore_parent, bool ignore_bds_parents);
/**
* bdrv_drained_begin:
*
* Begin a quiesced section for exclusive access to the BDS, by disabling
* external request sources including NBD server, block jobs, and device model.
*
* This function can be recursive.
*/
void bdrv_drained_begin(BlockDriverState *bs);
/**
* bdrv_do_drained_begin_quiesce:
*
* Quiesces a BDS like bdrv_drained_begin(), but does not wait for already
* running requests to complete.
*/
void bdrv_do_drained_begin_quiesce(BlockDriverState *bs,
BdrvChild *parent, bool ignore_bds_parents);
/**
* Like bdrv_drained_begin, but recursively begins a quiesced section for
* exclusive access to all child nodes as well.
*/
void bdrv_subtree_drained_begin(BlockDriverState *bs);
/**
* bdrv_drained_end:
*
* End a quiescent section started by bdrv_drained_begin().
*
* This polls @bs's AioContext until all scheduled sub-drained_ends
* have settled. On one hand, that may result in graph changes. On
* the other, this requires that the caller either runs in the main
* loop; or that all involved nodes (@bs and all of its parents) are
* in the caller's AioContext.
*/
void bdrv_drained_end(BlockDriverState *bs);
/**
* bdrv_drained_end_no_poll:
*
* Same as bdrv_drained_end(), but do not poll for the subgraph to
* actually become unquiesced. Therefore, no graph changes will occur
* with this function.
*
* *drained_end_counter is incremented for every background operation
* that is scheduled, and will be decremented for every operation once
* it settles. The caller must poll until it reaches 0. The counter
* should be accessed using atomic operations only.
*/
void bdrv_drained_end_no_poll(BlockDriverState *bs, int *drained_end_counter);
/**
* End a quiescent section started by bdrv_subtree_drained_begin().
*/
void bdrv_subtree_drained_end(BlockDriverState *bs);
void bdrv_add_child(BlockDriverState *parent, BlockDriverState *child,
Error **errp);
void bdrv_del_child(BlockDriverState *parent, BdrvChild *child, Error **errp);
bool bdrv_can_store_new_dirty_bitmap(BlockDriverState *bs, const char *name,
uint32_t granularity, Error **errp);
/**
*
* bdrv_register_buf/bdrv_unregister_buf:
*
* Register/unregister a buffer for I/O. For example, VFIO drivers are
* interested to know the memory areas that would later be used for I/O, so
* that they can prepare IOMMU mapping etc., to get better performance.
*/
void bdrv_register_buf(BlockDriverState *bs, void *host, size_t size);
void bdrv_unregister_buf(BlockDriverState *bs, void *host);
/**
*
* bdrv_co_copy_range:
*
* Do offloaded copy between two children. If the operation is not implemented
* by the driver, or if the backend storage doesn't support it, a negative
* error code will be returned.
*
* Note: block layer doesn't emulate or fallback to a bounce buffer approach
* because usually the caller shouldn't attempt offloaded copy any more (e.g.
* calling copy_file_range(2)) after the first error, thus it should fall back
* to a read+write path in the caller level.
*
* @src: Source child to copy data from
* @src_offset: offset in @src image to read data
* @dst: Destination child to copy data to
* @dst_offset: offset in @dst image to write data
* @bytes: number of bytes to copy
* @flags: request flags. Supported flags:
* BDRV_REQ_ZERO_WRITE - treat the @src range as zero data and do zero
* write on @dst as if bdrv_co_pwrite_zeroes is
* called. Used to simplify caller code, or
* during BlockDriver.bdrv_co_copy_range_from()
* recursion.
* BDRV_REQ_NO_SERIALISING - do not serialize with other overlapping
* requests currently in flight.
*
* Returns: 0 if succeeded; negative error code if failed.
**/
int coroutine_fn bdrv_co_copy_range(BdrvChild *src, int64_t src_offset,
BdrvChild *dst, int64_t dst_offset,
int64_t bytes, BdrvRequestFlags read_flags,
BdrvRequestFlags write_flags);
void bdrv_cancel_in_flight(BlockDriverState *bs);
#endif
#endif /* BLOCK_H */

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,329 @@
/*
* QEMU System Emulator block driver
*
* Copyright (c) 2003 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef BLOCK_INT_GLOBAL_STATE_H
#define BLOCK_INT_GLOBAL_STATE_H
#include "block_int-common.h"
/*
* Global state (GS) API. These functions run under the BQL.
*
* See include/block/block-global-state.h for more information about
* the GS API.
*/
/**
* stream_start:
* @job_id: The id of the newly-created job, or %NULL to use the
* device name of @bs.
* @bs: Block device to operate on.
* @base: Block device that will become the new base, or %NULL to
* flatten the whole backing file chain onto @bs.
* @backing_file_str: The file name that will be written to @bs as the
* the new backing file if the job completes. Ignored if @base is %NULL.
* @creation_flags: Flags that control the behavior of the Job lifetime.
* See @BlockJobCreateFlags
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
* @on_error: The action to take upon error.
* @filter_node_name: The node name that should be assigned to the filter
* driver that the stream job inserts into the graph above
* @bs. NULL means that a node name should be autogenerated.
* @errp: Error object.
*
* Start a streaming operation on @bs. Clusters that are unallocated
* in @bs, but allocated in any image between @base and @bs (both
* exclusive) will be written to @bs. At the end of a successful
* streaming job, the backing file of @bs will be changed to
* @backing_file_str in the written image and to @base in the live
* BlockDriverState.
*/
void stream_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *base, const char *backing_file_str,
BlockDriverState *bottom,
int creation_flags, int64_t speed,
BlockdevOnError on_error,
const char *filter_node_name,
Error **errp);
/**
* commit_start:
* @job_id: The id of the newly-created job, or %NULL to use the
* device name of @bs.
* @bs: Active block device.
* @top: Top block device to be committed.
* @base: Block device that will be written into, and become the new top.
* @creation_flags: Flags that control the behavior of the Job lifetime.
* See @BlockJobCreateFlags
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
* @on_error: The action to take upon error.
* @backing_file_str: String to use as the backing file in @top's overlay
* @filter_node_name: The node name that should be assigned to the filter
* driver that the commit job inserts into the graph above @top. NULL means
* that a node name should be autogenerated.
* @errp: Error object.
*
*/
void commit_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *base, BlockDriverState *top,
int creation_flags, int64_t speed,
BlockdevOnError on_error, const char *backing_file_str,
const char *filter_node_name, Error **errp);
/**
* commit_active_start:
* @job_id: The id of the newly-created job, or %NULL to use the
* device name of @bs.
* @bs: Active block device to be committed.
* @base: Block device that will be written into, and become the new top.
* @creation_flags: Flags that control the behavior of the Job lifetime.
* See @BlockJobCreateFlags
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
* @on_error: The action to take upon error.
* @filter_node_name: The node name that should be assigned to the filter
* driver that the commit job inserts into the graph above @bs. NULL means that
* a node name should be autogenerated.
* @cb: Completion function for the job.
* @opaque: Opaque pointer value passed to @cb.
* @auto_complete: Auto complete the job.
* @errp: Error object.
*
*/
BlockJob *commit_active_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *base, int creation_flags,
int64_t speed, BlockdevOnError on_error,
const char *filter_node_name,
BlockCompletionFunc *cb, void *opaque,
bool auto_complete, Error **errp);
/*
* mirror_start:
* @job_id: The id of the newly-created job, or %NULL to use the
* device name of @bs.
* @bs: Block device to operate on.
* @target: Block device to write to.
* @replaces: Block graph node name to replace once the mirror is done. Can
* only be used when full mirroring is selected.
* @creation_flags: Flags that control the behavior of the Job lifetime.
* See @BlockJobCreateFlags
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
* @granularity: The chosen granularity for the dirty bitmap.
* @buf_size: The amount of data that can be in flight at one time.
* @mode: Whether to collapse all images in the chain to the target.
* @backing_mode: How to establish the target's backing chain after completion.
* @zero_target: Whether the target should be explicitly zero-initialized
* @on_source_error: The action to take upon error reading from the source.
* @on_target_error: The action to take upon error writing to the target.
* @unmap: Whether to unmap target where source sectors only contain zeroes.
* @filter_node_name: The node name that should be assigned to the filter
* driver that the mirror job inserts into the graph above @bs. NULL means that
* a node name should be autogenerated.
* @copy_mode: When to trigger writes to the target.
* @errp: Error object.
*
* Start a mirroring operation on @bs. Clusters that are allocated
* in @bs will be written to @target until the job is cancelled or
* manually completed. At the end of a successful mirroring job,
* @bs will be switched to read from @target.
*/
void mirror_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *target, const char *replaces,
int creation_flags, int64_t speed,
uint32_t granularity, int64_t buf_size,
MirrorSyncMode mode, BlockMirrorBackingMode backing_mode,
bool zero_target,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
bool unmap, const char *filter_node_name,
MirrorCopyMode copy_mode, Error **errp);
/*
* backup_job_create:
* @job_id: The id of the newly-created job, or %NULL to use the
* device name of @bs.
* @bs: Block device to operate on.
* @target: Block device to write to.
* @speed: The maximum speed, in bytes per second, or 0 for unlimited.
* @sync_mode: What parts of the disk image should be copied to the destination.
* @sync_bitmap: The dirty bitmap if sync_mode is 'bitmap' or 'incremental'
* @bitmap_mode: The bitmap synchronization policy to use.
* @perf: Performance options. All actual fields assumed to be present,
* all ".has_*" fields are ignored.
* @on_source_error: The action to take upon error reading from the source.
* @on_target_error: The action to take upon error writing to the target.
* @creation_flags: Flags that control the behavior of the Job lifetime.
* See @BlockJobCreateFlags
* @cb: Completion function for the job.
* @opaque: Opaque pointer value passed to @cb.
* @txn: Transaction that this job is part of (may be NULL).
*
* Create a backup operation on @bs. Clusters in @bs are written to @target
* until the job is cancelled or manually completed.
*/
BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
BlockDriverState *target, int64_t speed,
MirrorSyncMode sync_mode,
BdrvDirtyBitmap *sync_bitmap,
BitmapSyncMode bitmap_mode,
bool compress,
const char *filter_node_name,
BackupPerf *perf,
BlockdevOnError on_source_error,
BlockdevOnError on_target_error,
int creation_flags,
BlockCompletionFunc *cb, void *opaque,
JobTxn *txn, Error **errp);
BdrvChild *bdrv_root_attach_child(BlockDriverState *child_bs,
const char *child_name,
const BdrvChildClass *child_class,
BdrvChildRole child_role,
uint64_t perm, uint64_t shared_perm,
void *opaque, Error **errp);
void bdrv_root_unref_child(BdrvChild *child);
void bdrv_get_cumulative_perm(BlockDriverState *bs, uint64_t *perm,
uint64_t *shared_perm);
/**
* Sets a BdrvChild's permissions. Avoid if the parent is a BDS; use
* bdrv_child_refresh_perms() instead and make the parent's
* .bdrv_child_perm() implementation return the correct values.
*/
int bdrv_child_try_set_perm(BdrvChild *c, uint64_t perm, uint64_t shared,
Error **errp);
/**
* Calls bs->drv->bdrv_child_perm() and updates the child's permission
* masks with the result.
* Drivers should invoke this function whenever an event occurs that
* makes their .bdrv_child_perm() implementation return different
* values than before, but which will not result in the block layer
* automatically refreshing the permissions.
*/
int bdrv_child_refresh_perms(BlockDriverState *bs, BdrvChild *c, Error **errp);
bool bdrv_recurse_can_replace(BlockDriverState *bs,
BlockDriverState *to_replace);
/*
* Default implementation for BlockDriver.bdrv_child_perm() that can
* be used by block filters and image formats, as long as they use the
* child_of_bds child class and set an appropriate BdrvChildRole.
*/
void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c,
BdrvChildRole role, BlockReopenQueue *reopen_queue,
uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared);
void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp);
bool blk_dev_has_removable_media(BlockBackend *blk);
void blk_dev_eject_request(BlockBackend *blk, bool force);
bool blk_dev_is_medium_locked(BlockBackend *blk);
void bdrv_restore_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap *backup);
void bdrv_set_monitor_owned(BlockDriverState *bs);
void blockdev_close_all_bdrv_states(void);
BlockDriverState *bds_tree_init(QDict *bs_opts, Error **errp);
/**
* Simple implementation of bdrv_co_create_opts for protocol drivers
* which only support creation via opening a file
* (usually existing raw storage device)
*/
int coroutine_fn bdrv_co_create_opts_simple(BlockDriver *drv,
const char *filename,
QemuOpts *opts,
Error **errp);
BdrvDirtyBitmap *block_dirty_bitmap_lookup(const char *node,
const char *name,
BlockDriverState **pbs,
Error **errp);
BdrvDirtyBitmap *block_dirty_bitmap_merge(const char *node, const char *target,
BlockDirtyBitmapMergeSourceList *bms,
HBitmap **backup, Error **errp);
BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name,
bool release,
BlockDriverState **bitmap_bs,
Error **errp);
BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs);
/**
* bdrv_add_aio_context_notifier:
*
* If a long-running job intends to be always run in the same AioContext as a
* certain BDS, it may use this function to be notified of changes regarding the
* association of the BDS to an AioContext.
*
* attached_aio_context() is called after the target BDS has been attached to a
* new AioContext; detach_aio_context() is called before the target BDS is being
* detached from its old AioContext.
*/
void bdrv_add_aio_context_notifier(BlockDriverState *bs,
void (*attached_aio_context)(AioContext *new_context, void *opaque),
void (*detach_aio_context)(void *opaque), void *opaque);
/**
* bdrv_remove_aio_context_notifier:
*
* Unsubscribe of change notifications regarding the BDS's AioContext. The
* parameters given here have to be the same as those given to
* bdrv_add_aio_context_notifier().
*/
void bdrv_remove_aio_context_notifier(BlockDriverState *bs,
void (*aio_context_attached)(AioContext *,
void *),
void (*aio_context_detached)(void *),
void *opaque);
/**
* End all quiescent sections started by bdrv_drain_all_begin(). This is
* needed when deleting a BDS before bdrv_drain_all_end() is called.
*
* NOTE: this is an internal helper for bdrv_close() *only*. No one else
* should call it.
*/
void bdrv_drain_all_end_quiesce(BlockDriverState *bs);
/**
* Make sure that the function is running under both drain and BQL.
* The latter protects from concurrent writings
* from the GS API, while the former prevents concurrent reads
* from I/O.
*/
static inline void assert_bdrv_graph_writable(BlockDriverState *bs)
{
/*
* TODO: this function is incomplete. Because the users of this
* assert lack the necessary drains, check only for BQL.
* Once the necessary drains are added,
* assert also for qatomic_read(&bs->quiesce_counter) > 0
*/
assert(qemu_in_main_thread());
}
#endif /* BLOCK_INT_GLOBAL_STATE */

View File

@ -0,0 +1,185 @@
/*
* QEMU System Emulator block driver
*
* Copyright (c) 2003 Fabrice Bellard
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#ifndef BLOCK_INT_IO_H
#define BLOCK_INT_IO_H
#include "block_int-common.h"
/*
* I/O API functions. These functions are thread-safe.
*
* See include/block/block-io.h for more information about
* the I/O API.
*/
int coroutine_fn bdrv_co_preadv(BdrvChild *child,
int64_t offset, int64_t bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags);
int coroutine_fn bdrv_co_preadv_part(BdrvChild *child,
int64_t offset, int64_t bytes,
QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
int coroutine_fn bdrv_co_pwritev(BdrvChild *child,
int64_t offset, int64_t bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags);
int coroutine_fn bdrv_co_pwritev_part(BdrvChild *child,
int64_t offset, int64_t bytes,
QEMUIOVector *qiov, size_t qiov_offset, BdrvRequestFlags flags);
static inline int coroutine_fn bdrv_co_pread(BdrvChild *child,
int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags)
{
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
IO_CODE();
return bdrv_co_preadv(child, offset, bytes, &qiov, flags);
}
static inline int coroutine_fn bdrv_co_pwrite(BdrvChild *child,
int64_t offset, unsigned int bytes, void *buf, BdrvRequestFlags flags)
{
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
IO_CODE();
return bdrv_co_pwritev(child, offset, bytes, &qiov, flags);
}
bool coroutine_fn bdrv_make_request_serialising(BdrvTrackedRequest *req,
uint64_t align);
BdrvTrackedRequest *coroutine_fn bdrv_co_get_self_request(BlockDriverState *bs);
BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
const char *filename);
/**
* bdrv_wakeup:
* @bs: The BlockDriverState for which an I/O operation has been completed.
*
* Wake up the main thread if it is waiting on BDRV_POLL_WHILE. During
* synchronous I/O on a BlockDriverState that is attached to another
* I/O thread, the main thread lets the I/O thread's event loop run,
* waiting for the I/O operation to complete. A bdrv_wakeup will wake
* up the main thread if necessary.
*
* Manual calls to bdrv_wakeup are rarely necessary, because
* bdrv_dec_in_flight already calls it.
*/
void bdrv_wakeup(BlockDriverState *bs);
const char *bdrv_get_parent_name(const BlockDriverState *bs);
bool blk_dev_has_tray(BlockBackend *blk);
bool blk_dev_is_tray_open(BlockBackend *blk);
void bdrv_set_dirty(BlockDriverState *bs, int64_t offset, int64_t bytes);
void bdrv_clear_dirty_bitmap(BdrvDirtyBitmap *bitmap, HBitmap **out);
bool bdrv_dirty_bitmap_merge_internal(BdrvDirtyBitmap *dest,
const BdrvDirtyBitmap *src,
HBitmap **backup, bool lock);
void bdrv_inc_in_flight(BlockDriverState *bs);
void bdrv_dec_in_flight(BlockDriverState *bs);
int coroutine_fn bdrv_co_copy_range_from(BdrvChild *src, int64_t src_offset,
BdrvChild *dst, int64_t dst_offset,
int64_t bytes,
BdrvRequestFlags read_flags,
BdrvRequestFlags write_flags);
int coroutine_fn bdrv_co_copy_range_to(BdrvChild *src, int64_t src_offset,
BdrvChild *dst, int64_t dst_offset,
int64_t bytes,
BdrvRequestFlags read_flags,
BdrvRequestFlags write_flags);
int refresh_total_sectors(BlockDriverState *bs, int64_t hint);
BdrvChild *bdrv_cow_child(BlockDriverState *bs);
BdrvChild *bdrv_filter_child(BlockDriverState *bs);
BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs);
BdrvChild *bdrv_primary_child(BlockDriverState *bs);
BlockDriverState *bdrv_skip_filters(BlockDriverState *bs);
BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs);
static inline BlockDriverState *bdrv_cow_bs(BlockDriverState *bs)
{
IO_CODE();
return child_bs(bdrv_cow_child(bs));
}
static inline BlockDriverState *bdrv_filter_bs(BlockDriverState *bs)
{
IO_CODE();
return child_bs(bdrv_filter_child(bs));
}
static inline BlockDriverState *bdrv_filter_or_cow_bs(BlockDriverState *bs)
{
IO_CODE();
return child_bs(bdrv_filter_or_cow_child(bs));
}
static inline BlockDriverState *bdrv_primary_bs(BlockDriverState *bs)
{
IO_CODE();
return child_bs(bdrv_primary_child(bs));
}
/**
* Check whether the given offset is in the cached block-status data
* region.
*
* If it is, and @pnum is not NULL, *pnum is set to
* `bsc.data_end - offset`, i.e. how many bytes, starting from
* @offset, are data (according to the cache).
* Otherwise, *pnum is not touched.
*/
bool bdrv_bsc_is_data(BlockDriverState *bs, int64_t offset, int64_t *pnum);
/**
* If [offset, offset + bytes) overlaps with the currently cached
* block-status region, invalidate the cache.
*
* (To be used by I/O paths that cause data regions to be zero or
* holes.)
*/
void bdrv_bsc_invalidate_range(BlockDriverState *bs,
int64_t offset, int64_t bytes);
/**
* Mark the range [offset, offset + bytes) as a data region.
*/
void bdrv_bsc_fill(BlockDriverState *bs, int64_t offset, int64_t bytes);
/*
* "I/O or GS" API functions. These functions can run without
* the BQL, but only in one specific iothread/main loop.
*
* See include/block/block-io.h for more information about
* the "I/O or GS" API.
*/
void bdrv_apply_subtree_drain(BdrvChild *child, BlockDriverState *new_parent);
void bdrv_unapply_subtree_drain(BdrvChild *child, BlockDriverState *old_parent);
#endif /* BLOCK_INT_IO_H */

File diff suppressed because it is too large Load Diff

View File

@ -74,6 +74,13 @@ typedef struct BlockJob {
GSList *nodes;
} BlockJob;
/*
* Global state (GS) API. These functions run under the BQL.
*
* See include/block/block-global-state.h for more information about
* the GS API.
*/
/**
* block_job_next:
* @job: A block job, or %NULL.
@ -155,6 +162,21 @@ BlockJobInfo *block_job_query(BlockJob *job, Error **errp);
*/
void block_job_iostatus_reset(BlockJob *job);
/*
* block_job_get_aio_context:
*
* Returns aio context associated with a block job.
*/
AioContext *block_job_get_aio_context(BlockJob *job);
/*
* Common functions that are neither I/O nor Global State.
*
* See include/block/block-common.h for more information about
* the Common API.
*/
/**
* block_job_is_internal:
* @job: The job to determine if it is user-visible or not.
@ -170,11 +192,4 @@ bool block_job_is_internal(BlockJob *job);
*/
const BlockJobDriver *block_job_driver(BlockJob *job);
/*
* block_job_get_aio_context:
*
* Returns aio context associated with a block job.
*/
AioContext *block_job_get_aio_context(BlockJob *job);
#endif

View File

@ -38,6 +38,13 @@ struct BlockJobDriver {
/** Generic JobDriver callbacks and settings */
JobDriver job_driver;
/*
* I/O API functions. These functions are thread-safe.
*
* See include/block/block-io.h for more information about
* the I/O API.
*/
/*
* Returns whether the job has pending requests for the child or will
* submit new requests before the next pause point. This callback is polled
@ -46,6 +53,13 @@ struct BlockJobDriver {
*/
bool (*drained_poll)(BlockJob *job);
/*
* Global state (GS) API. These functions run under the BQL.
*
* See include/block/block-global-state.h for more information about
* the GS API.
*/
/*
* If the callback is not NULL, it will be invoked before the job is
* resumed in a new AioContext. This is the place to move any resources
@ -56,6 +70,13 @@ struct BlockJobDriver {
void (*set_speed)(BlockJob *job, int64_t speed);
};
/*
* Global state (GS) API. These functions run under the BQL.
*
* See include/block/block-global-state.h for more information about
* the GS API.
*/
/**
* block_job_create:
* @job_id: The id of the newly-created job, or %NULL to have one
@ -98,6 +119,13 @@ void block_job_free(Job *job);
*/
void block_job_user_resume(Job *job);
/*
* I/O API functions. These functions are thread-safe.
*
* See include/block/block-io.h for more information about
* the I/O API.
*/
/**
* block_job_ratelimit_get_delay:
*

View File

@ -45,6 +45,13 @@ typedef struct QEMUSnapshotInfo {
uint64_t icount; /* record/replay step */
} QEMUSnapshotInfo;
/*
* Global state (GS) API. These functions run under the BQL.
*
* See include/block/block-global-state.h for more information about
* the GS API.
*/
int bdrv_snapshot_find(BlockDriverState *bs, QEMUSnapshotInfo *sn_info,
const char *name);
bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs,
@ -73,9 +80,11 @@ int bdrv_snapshot_load_tmp_by_id_or_name(BlockDriverState *bs,
Error **errp);
/* Group operations. All block drivers are involved.
/*
* Group operations. All block drivers are involved.
* These functions will properly handle dataplane (take aio_context_acquire
* when appropriate for appropriate block drivers */
* when appropriate for appropriate block drivers
*/
bool bdrv_all_can_snapshot(bool has_devices, strList *devices,
Error **errp);

View File

@ -0,0 +1,165 @@
/*
* QEMU Thread Local Storage for coroutines
*
* Copyright Red Hat
*
* SPDX-License-Identifier: LGPL-2.1-or-later
*
* This work is licensed under the terms of the GNU LGPL, version 2.1 or later.
* See the COPYING.LIB file in the top-level directory.
*
* It is forbidden to access Thread Local Storage in coroutines because
* compiler optimizations may cause values to be cached across coroutine
* re-entry. Coroutines can run in more than one thread through the course of
* their life, leading bugs when stale TLS values from the wrong thread are
* used as a result of compiler optimization.
*
* An example is:
*
* ..code-block:: c
* :caption: A coroutine that may see the wrong TLS value
*
* static __thread AioContext *current_aio_context;
* ...
* static void coroutine_fn foo(void)
* {
* aio_notify(current_aio_context);
* qemu_coroutine_yield();
* aio_notify(current_aio_context); // <-- may be stale after yielding!
* }
*
* This header provides macros for safely defining variables in Thread Local
* Storage:
*
* ..code-block:: c
* :caption: A coroutine that safely uses TLS
*
* QEMU_DEFINE_STATIC_CO_TLS(AioContext *, current_aio_context)
* ...
* static void coroutine_fn foo(void)
* {
* aio_notify(get_current_aio_context());
* qemu_coroutine_yield();
* aio_notify(get_current_aio_context()); // <-- safe
* }
*/
#ifndef QEMU_COROUTINE_TLS_H
#define QEMU_COROUTINE_TLS_H
/*
* To stop the compiler from caching TLS values we define accessor functions
* with __attribute__((noinline)) plus asm volatile("") to prevent
* optimizations that override noinline.
*
* The compiler can still analyze noinline code and make optimizations based on
* that knowledge, so an inline asm output operand is used to prevent
* optimizations that make assumptions about the address of the TLS variable.
*
* This is fragile and ultimately needs to be solved by a mechanism that is
* guaranteed to work by the compiler (e.g. stackless coroutines), but for now
* we use this approach to prevent issues.
*/
/**
* QEMU_DECLARE_CO_TLS:
* @type: the variable's C type
* @var: the variable name
*
* Declare an extern variable in Thread Local Storage from a header file:
*
* .. code-block:: c
* :caption: Declaring an extern variable in Thread Local Storage
*
* QEMU_DECLARE_CO_TLS(int, my_count)
* ...
* int c = get_my_count();
* set_my_count(c + 1);
* *get_ptr_my_count() = 0;
*
* This is a coroutine-safe replacement for the __thread keyword and is
* equivalent to the following code:
*
* .. code-block:: c
* :caption: Declaring a TLS variable using __thread
*
* extern __thread int my_count;
* ...
* int c = my_count;
* my_count = c + 1;
* *(&my_count) = 0;
*/
#define QEMU_DECLARE_CO_TLS(type, var) \
__attribute__((noinline)) type get_##var(void); \
__attribute__((noinline)) void set_##var(type v); \
__attribute__((noinline)) type *get_ptr_##var(void);
/**
* QEMU_DEFINE_CO_TLS:
* @type: the variable's C type
* @var: the variable name
*
* Define a variable in Thread Local Storage that was previously declared from
* a header file with QEMU_DECLARE_CO_TLS():
*
* .. code-block:: c
* :caption: Defining a variable in Thread Local Storage
*
* QEMU_DEFINE_CO_TLS(int, my_count)
*
* This is a coroutine-safe replacement for the __thread keyword and is
* equivalent to the following code:
*
* .. code-block:: c
* :caption: Defining a TLS variable using __thread
*
* __thread int my_count;
*/
#define QEMU_DEFINE_CO_TLS(type, var) \
static __thread type co_tls_##var; \
type get_##var(void) { asm volatile(""); return co_tls_##var; } \
void set_##var(type v) { asm volatile(""); co_tls_##var = v; } \
type *get_ptr_##var(void) \
{ type *ptr = &co_tls_##var; asm volatile("" : "+rm" (ptr)); return ptr; }
/**
* QEMU_DEFINE_STATIC_CO_TLS:
* @type: the variable's C type
* @var: the variable name
*
* Define a static variable in Thread Local Storage:
*
* .. code-block:: c
* :caption: Defining a static variable in Thread Local Storage
*
* QEMU_DEFINE_STATIC_CO_TLS(int, my_count)
* ...
* int c = get_my_count();
* set_my_count(c + 1);
* *get_ptr_my_count() = 0;
*
* This is a coroutine-safe replacement for the __thread keyword and is
* equivalent to the following code:
*
* .. code-block:: c
* :caption: Defining a static TLS variable using __thread
*
* static __thread int my_count;
* ...
* int c = my_count;
* my_count = c + 1;
* *(&my_count) = 0;
*/
#define QEMU_DEFINE_STATIC_CO_TLS(type, var) \
static __thread type co_tls_##var; \
static __attribute__((noinline, unused)) \
type get_##var(void) \
{ asm volatile(""); return co_tls_##var; } \
static __attribute__((noinline, unused)) \
void set_##var(type v) \
{ asm volatile(""); co_tls_##var = v; } \
static __attribute__((noinline, unused)) \
type *get_ptr_##var(void) \
{ type *ptr = &co_tls_##var; asm volatile("" : "+rm" (ptr)); return ptr; }
#endif /* QEMU_COROUTINE_TLS_H */

View File

@ -169,6 +169,12 @@ typedef struct Job {
* Callbacks and other information about a Job driver.
*/
struct JobDriver {
/*
* These fields are initialized when this object is created,
* and are never changed afterwards
*/
/** Derived Job struct size */
size_t instance_size;
@ -184,9 +190,18 @@ struct JobDriver {
* aborted. If it returns zero, the job moves into the WAITING state. If it
* is the last job to complete in its transaction, all jobs in the
* transaction move from WAITING to PENDING.
*
* This callback must be run in the job's context.
*/
int coroutine_fn (*run)(Job *job, Error **errp);
/*
* Functions run without regard to the BQL that may run in any
* arbitrary thread. These functions do not need to be thread-safe
* because the caller ensures that they are invoked from one
* thread at time.
*/
/**
* If the callback is not NULL, it will be invoked when the job transitions
* into the paused state. Paused jobs must not perform any asynchronous
@ -201,6 +216,13 @@ struct JobDriver {
*/
void coroutine_fn (*resume)(Job *job);
/*
* Global state (GS) API. These functions run under the BQL.
*
* See include/block/block-global-state.h for more information about
* the GS API.
*/
/**
* Called when the job is resumed by the user (i.e. user_paused becomes
* false). .user_resume is called before .resume.

View File

@ -242,9 +242,51 @@ AioContext *iohandler_get_aio_context(void);
* must always be taken outside other locks. This function helps
* functions take different paths depending on whether the current
* thread is running within the main loop mutex.
*
* This function should never be used in the block layer, because
* unit tests, block layer tools and qemu-storage-daemon do not
* have a BQL.
* Please instead refer to qemu_in_main_thread().
*/
bool qemu_mutex_iothread_locked(void);
/**
* qemu_in_main_thread: return whether it's possible to safely access
* the global state of the block layer.
*
* Global state of the block layer is not accessible from I/O threads
* or worker threads; only from threads that "own" the default
* AioContext that qemu_get_aio_context() returns. For tests, block
* layer tools and qemu-storage-daemon there is a designated thread that
* runs the event loop for qemu_get_aio_context(), and that is the
* main thread.
*
* For emulators, however, any thread that holds the BQL can act
* as the block layer main thread; this will be any of the actual
* main thread, the vCPU threads or the RCU thread.
*
* For clarity, do not use this function outside the block layer.
*/
bool qemu_in_main_thread(void);
/* Mark and check that the function is part of the global state API. */
#define GLOBAL_STATE_CODE() \
do { \
assert(qemu_in_main_thread()); \
} while (0)
/* Mark and check that the function is part of the I/O API. */
#define IO_CODE() \
do { \
/* nop */ \
} while (0)
/* Mark and check that the function is part of the "I/O OR GS" API. */
#define IO_OR_GS_CODE() \
do { \
/* nop */ \
} while (0)
/**
* qemu_mutex_lock_iothread: Lock the main loop mutex.
*

View File

@ -29,6 +29,7 @@
#include "qemu/atomic.h"
#include "qemu/notify.h"
#include "qemu/sys_membarrier.h"
#include "qemu/coroutine-tls.h"
#ifdef __cplusplus
extern "C" {
@ -76,11 +77,11 @@ struct rcu_reader_data {
NotifierList force_rcu;
};
extern __thread struct rcu_reader_data rcu_reader;
QEMU_DECLARE_CO_TLS(struct rcu_reader_data, rcu_reader)
static inline void rcu_read_lock(void)
{
struct rcu_reader_data *p_rcu_reader = &rcu_reader;
struct rcu_reader_data *p_rcu_reader = get_ptr_rcu_reader();
unsigned ctr;
if (p_rcu_reader->depth++ > 0) {
@ -96,7 +97,7 @@ static inline void rcu_read_lock(void)
static inline void rcu_read_unlock(void)
{
struct rcu_reader_data *p_rcu_reader = &rcu_reader;
struct rcu_reader_data *p_rcu_reader = get_ptr_rcu_reader();
assert(p_rcu_reader->depth != 0);
if (--p_rcu_reader->depth > 0) {

View File

@ -0,0 +1,102 @@
/*
* QEMU Block backends
*
* Copyright (C) 2014-2016 Red Hat, Inc.
*
* Authors:
* Markus Armbruster <armbru@redhat.com>,
*
* This work is licensed under the terms of the GNU LGPL, version 2.1
* or later. See the COPYING.LIB file in the top-level directory.
*/
#ifndef BLOCK_BACKEND_COMMON_H
#define BLOCK_BACKEND_COMMON_H
#include "qemu/iov.h"
#include "block/throttle-groups.h"
/*
* TODO Have to include block/block.h for a bunch of block layer
* types. Unfortunately, this pulls in the whole BlockDriverState
* API, which we don't want used by many BlockBackend users. Some of
* the types belong here, and the rest should be split into a common
* header and one for the BlockDriverState API.
*/
#include "block/block.h"
/* Callbacks for block device models */
typedef struct BlockDevOps {
/*
* Global state (GS) API. These functions run under the BQL.
*
* See include/block/block-global-state.h for more information about
* the GS API.
*/
/*
* Runs when virtual media changed (monitor commands eject, change)
* Argument load is true on load and false on eject.
* Beware: doesn't run when a host device's physical media
* changes. Sure would be useful if it did.
* Device models with removable media must implement this callback.
*/
void (*change_media_cb)(void *opaque, bool load, Error **errp);
/*
* Runs when an eject request is issued from the monitor, the tray
* is closed, and the medium is locked.
* Device models that do not implement is_medium_locked will not need
* this callback. Device models that can lock the medium or tray might
* want to implement the callback and unlock the tray when "force" is
* true, even if they do not support eject requests.
*/
void (*eject_request_cb)(void *opaque, bool force);
/*
* Is the virtual medium locked into the device?
* Device models implement this only when device has such a lock.
*/
bool (*is_medium_locked)(void *opaque);
/*
* I/O API functions. These functions are thread-safe.
*
* See include/block/block-io.h for more information about
* the I/O API.
*/
/*
* Is the virtual tray open?
* Device models implement this only when the device has a tray.
*/
bool (*is_tray_open)(void *opaque);
/*
* Runs when the size changed (e.g. monitor command block_resize)
*/
void (*resize_cb)(void *opaque);
/*
* Runs when the backend receives a drain request.
*/
void (*drained_begin)(void *opaque);
/*
* Runs when the backend's last drain request ends.
*/
void (*drained_end)(void *opaque);
/*
* Is the device still busy?
*/
bool (*drained_poll)(void *opaque);
} BlockDevOps;
/*
* This struct is embedded in (the private) BlockBackend struct and contains
* fields that must be public. This is in particular for QLIST_ENTRY() and
* friends so that BlockBackends can be kept in lists outside block-backend.c
*/
typedef struct BlockBackendPublic {
ThrottleGroupMember throttle_group_member;
} BlockBackendPublic;
#endif /* BLOCK_BACKEND_COMMON_H */

View File

@ -0,0 +1,116 @@
/*
* QEMU Block backends
*
* Copyright (C) 2014-2016 Red Hat, Inc.
*
* Authors:
* Markus Armbruster <armbru@redhat.com>,
*
* This work is licensed under the terms of the GNU LGPL, version 2.1
* or later. See the COPYING.LIB file in the top-level directory.
*/
#ifndef BLOCK_BACKEND_GS_H
#define BLOCK_BACKEND_GS_H
#include "block-backend-common.h"
/*
* Global state (GS) API. These functions run under the BQL.
*
* See include/block/block-global-state.h for more information about
* the GS API.
*/
BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm);
BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
uint64_t shared_perm, Error **errp);
BlockBackend *blk_new_open(const char *filename, const char *reference,
QDict *options, int flags, Error **errp);
int blk_get_refcnt(BlockBackend *blk);
void blk_ref(BlockBackend *blk);
void blk_unref(BlockBackend *blk);
void blk_remove_all_bs(void);
BlockBackend *blk_by_name(const char *name);
BlockBackend *blk_next(BlockBackend *blk);
BlockBackend *blk_all_next(BlockBackend *blk);
bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp);
void monitor_remove_blk(BlockBackend *blk);
BlockBackendPublic *blk_get_public(BlockBackend *blk);
BlockBackend *blk_by_public(BlockBackendPublic *public);
void blk_remove_bs(BlockBackend *blk);
int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp);
int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp);
bool bdrv_has_blk(BlockDriverState *bs);
bool bdrv_is_root_node(BlockDriverState *bs);
int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
Error **errp);
void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm);
void blk_iostatus_enable(BlockBackend *blk);
BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk);
void blk_iostatus_disable(BlockBackend *blk);
void blk_iostatus_reset(BlockBackend *blk);
int blk_attach_dev(BlockBackend *blk, DeviceState *dev);
void blk_detach_dev(BlockBackend *blk, DeviceState *dev);
DeviceState *blk_get_attached_dev(BlockBackend *blk);
BlockBackend *blk_by_dev(void *dev);
BlockBackend *blk_by_qdev_id(const char *id, Error **errp);
void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque);
void blk_activate(BlockBackend *blk, Error **errp);
int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags);
void blk_aio_cancel(BlockAIOCB *acb);
int blk_commit_all(void);
void blk_drain(BlockBackend *blk);
void blk_drain_all(void);
void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
BlockdevOnError on_write_error);
bool blk_supports_write_perm(BlockBackend *blk);
bool blk_is_sg(BlockBackend *blk);
void blk_set_enable_write_cache(BlockBackend *blk, bool wce);
int blk_get_flags(BlockBackend *blk);
bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp);
void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason);
void blk_op_block_all(BlockBackend *blk, Error *reason);
void blk_op_unblock_all(BlockBackend *blk, Error *reason);
int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
Error **errp);
void blk_add_aio_context_notifier(BlockBackend *blk,
void (*attached_aio_context)(AioContext *new_context, void *opaque),
void (*detach_aio_context)(void *opaque), void *opaque);
void blk_remove_aio_context_notifier(BlockBackend *blk,
void (*attached_aio_context)(AioContext *,
void *),
void (*detach_aio_context)(void *),
void *opaque);
void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify);
void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify);
BlockBackendRootState *blk_get_root_state(BlockBackend *blk);
void blk_update_root_state(BlockBackend *blk);
bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk);
int blk_get_open_flags_from_root_state(BlockBackend *blk);
int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
int64_t pos, int size);
int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size);
int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz);
int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo);
void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg);
void blk_io_limits_disable(BlockBackend *blk);
void blk_io_limits_enable(BlockBackend *blk, const char *group);
void blk_io_limits_update_group(BlockBackend *blk, const char *group);
void blk_set_force_allow_inactivate(BlockBackend *blk);
void blk_register_buf(BlockBackend *blk, void *host, size_t size);
void blk_unregister_buf(BlockBackend *blk, void *host);
const BdrvChild *blk_root(BlockBackend *blk);
int blk_make_empty(BlockBackend *blk, Error **errp);
#endif /* BLOCK_BACKEND_GS_H */

View File

@ -0,0 +1,161 @@
/*
* QEMU Block backends
*
* Copyright (C) 2014-2016 Red Hat, Inc.
*
* Authors:
* Markus Armbruster <armbru@redhat.com>,
*
* This work is licensed under the terms of the GNU LGPL, version 2.1
* or later. See the COPYING.LIB file in the top-level directory.
*/
#ifndef BLOCK_BACKEND_IO_H
#define BLOCK_BACKEND_IO_H
#include "block-backend-common.h"
/*
* I/O API functions. These functions are thread-safe.
*
* See include/block/block-io.h for more information about
* the I/O API.
*/
const char *blk_name(const BlockBackend *blk);
BlockDriverState *blk_bs(BlockBackend *blk);
void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow);
void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow);
void blk_set_disable_request_queuing(BlockBackend *blk, bool disable);
bool blk_iostatus_is_enabled(const BlockBackend *blk);
char *blk_get_attached_dev_id(BlockBackend *blk);
BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
int64_t bytes, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque);
BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
QEMUIOVector *qiov, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque);
BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
QEMUIOVector *qiov, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque);
BlockAIOCB *blk_aio_flush(BlockBackend *blk,
BlockCompletionFunc *cb, void *opaque);
BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes,
BlockCompletionFunc *cb, void *opaque);
void blk_aio_cancel_async(BlockAIOCB *acb);
BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
BlockCompletionFunc *cb, void *opaque);
void blk_inc_in_flight(BlockBackend *blk);
void blk_dec_in_flight(BlockBackend *blk);
bool blk_is_inserted(BlockBackend *blk);
bool blk_is_available(BlockBackend *blk);
void blk_lock_medium(BlockBackend *blk, bool locked);
void blk_eject(BlockBackend *blk, bool eject_flag);
int64_t blk_getlength(BlockBackend *blk);
void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr);
int64_t blk_nb_sectors(BlockBackend *blk);
void *blk_try_blockalign(BlockBackend *blk, size_t size);
void *blk_blockalign(BlockBackend *blk, size_t size);
bool blk_is_writable(BlockBackend *blk);
bool blk_enable_write_cache(BlockBackend *blk);
BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read);
BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
int error);
void blk_error_action(BlockBackend *blk, BlockErrorAction action,
bool is_read, int error);
void blk_iostatus_set_err(BlockBackend *blk, int error);
int blk_get_max_iov(BlockBackend *blk);
int blk_get_max_hw_iov(BlockBackend *blk);
void blk_set_guest_block_size(BlockBackend *blk, int align);
void blk_io_plug(BlockBackend *blk);
void blk_io_unplug(BlockBackend *blk);
AioContext *blk_get_aio_context(BlockBackend *blk);
BlockAcctStats *blk_get_stats(BlockBackend *blk);
void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
BlockCompletionFunc *cb, void *opaque);
BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
BlockCompletionFunc *cb,
void *opaque, int ret);
uint32_t blk_get_request_alignment(BlockBackend *blk);
uint32_t blk_get_max_transfer(BlockBackend *blk);
uint64_t blk_get_max_hw_transfer(BlockBackend *blk);
int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
BlockBackend *blk_out, int64_t off_out,
int64_t bytes, BdrvRequestFlags read_flags,
BdrvRequestFlags write_flags);
/*
* "I/O or GS" API functions. These functions can run without
* the BQL, but only in one specific iothread/main loop.
*
* See include/block/block-io.h for more information about
* the "I/O or GS" API.
*/
int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int bytes);
int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int bytes,
BdrvRequestFlags flags);
int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
int64_t bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags);
int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
int64_t bytes,
QEMUIOVector *qiov, size_t qiov_offset,
BdrvRequestFlags flags);
int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
int64_t bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags);
static inline int coroutine_fn blk_co_pread(BlockBackend *blk, int64_t offset,
int64_t bytes, void *buf,
BdrvRequestFlags flags)
{
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
IO_OR_GS_CODE();
assert(bytes <= SIZE_MAX);
return blk_co_preadv(blk, offset, bytes, &qiov, flags);
}
static inline int coroutine_fn blk_co_pwrite(BlockBackend *blk, int64_t offset,
int64_t bytes, void *buf,
BdrvRequestFlags flags)
{
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
IO_OR_GS_CODE();
assert(bytes <= SIZE_MAX);
return blk_co_pwritev(blk, offset, bytes, &qiov, flags);
}
int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset,
int64_t bytes);
int coroutine_fn blk_co_flush(BlockBackend *blk);
int blk_flush(BlockBackend *blk);
int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf);
int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
int64_t bytes);
int blk_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes);
int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
int64_t bytes, BdrvRequestFlags flags);
int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
int64_t bytes, BdrvRequestFlags flags);
int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
#endif /* BLOCK_BACKEND_IO_H */

View File

@ -13,272 +13,9 @@
#ifndef BLOCK_BACKEND_H
#define BLOCK_BACKEND_H
#include "qemu/iov.h"
#include "block/throttle-groups.h"
#include "block-backend-global-state.h"
#include "block-backend-io.h"
/*
* TODO Have to include block/block.h for a bunch of block layer
* types. Unfortunately, this pulls in the whole BlockDriverState
* API, which we don't want used by many BlockBackend users. Some of
* the types belong here, and the rest should be split into a common
* header and one for the BlockDriverState API.
*/
#include "block/block.h"
/* Callbacks for block device models */
typedef struct BlockDevOps {
/*
* Runs when virtual media changed (monitor commands eject, change)
* Argument load is true on load and false on eject.
* Beware: doesn't run when a host device's physical media
* changes. Sure would be useful if it did.
* Device models with removable media must implement this callback.
*/
void (*change_media_cb)(void *opaque, bool load, Error **errp);
/*
* Runs when an eject request is issued from the monitor, the tray
* is closed, and the medium is locked.
* Device models that do not implement is_medium_locked will not need
* this callback. Device models that can lock the medium or tray might
* want to implement the callback and unlock the tray when "force" is
* true, even if they do not support eject requests.
*/
void (*eject_request_cb)(void *opaque, bool force);
/*
* Is the virtual tray open?
* Device models implement this only when the device has a tray.
*/
bool (*is_tray_open)(void *opaque);
/*
* Is the virtual medium locked into the device?
* Device models implement this only when device has such a lock.
*/
bool (*is_medium_locked)(void *opaque);
/*
* Runs when the size changed (e.g. monitor command block_resize)
*/
void (*resize_cb)(void *opaque);
/*
* Runs when the backend receives a drain request.
*/
void (*drained_begin)(void *opaque);
/*
* Runs when the backend's last drain request ends.
*/
void (*drained_end)(void *opaque);
/*
* Is the device still busy?
*/
bool (*drained_poll)(void *opaque);
} BlockDevOps;
/* This struct is embedded in (the private) BlockBackend struct and contains
* fields that must be public. This is in particular for QLIST_ENTRY() and
* friends so that BlockBackends can be kept in lists outside block-backend.c
* */
typedef struct BlockBackendPublic {
ThrottleGroupMember throttle_group_member;
} BlockBackendPublic;
BlockBackend *blk_new(AioContext *ctx, uint64_t perm, uint64_t shared_perm);
BlockBackend *blk_new_with_bs(BlockDriverState *bs, uint64_t perm,
uint64_t shared_perm, Error **errp);
BlockBackend *blk_new_open(const char *filename, const char *reference,
QDict *options, int flags, Error **errp);
int blk_get_refcnt(BlockBackend *blk);
void blk_ref(BlockBackend *blk);
void blk_unref(BlockBackend *blk);
void blk_remove_all_bs(void);
const char *blk_name(const BlockBackend *blk);
BlockBackend *blk_by_name(const char *name);
BlockBackend *blk_next(BlockBackend *blk);
BlockBackend *blk_all_next(BlockBackend *blk);
bool monitor_add_blk(BlockBackend *blk, const char *name, Error **errp);
void monitor_remove_blk(BlockBackend *blk);
BlockBackendPublic *blk_get_public(BlockBackend *blk);
BlockBackend *blk_by_public(BlockBackendPublic *public);
BlockDriverState *blk_bs(BlockBackend *blk);
void blk_remove_bs(BlockBackend *blk);
int blk_insert_bs(BlockBackend *blk, BlockDriverState *bs, Error **errp);
int blk_replace_bs(BlockBackend *blk, BlockDriverState *new_bs, Error **errp);
bool bdrv_has_blk(BlockDriverState *bs);
bool bdrv_is_root_node(BlockDriverState *bs);
int blk_set_perm(BlockBackend *blk, uint64_t perm, uint64_t shared_perm,
Error **errp);
void blk_get_perm(BlockBackend *blk, uint64_t *perm, uint64_t *shared_perm);
void blk_set_allow_write_beyond_eof(BlockBackend *blk, bool allow);
void blk_set_allow_aio_context_change(BlockBackend *blk, bool allow);
void blk_set_disable_request_queuing(BlockBackend *blk, bool disable);
void blk_iostatus_enable(BlockBackend *blk);
bool blk_iostatus_is_enabled(const BlockBackend *blk);
BlockDeviceIoStatus blk_iostatus(const BlockBackend *blk);
void blk_iostatus_disable(BlockBackend *blk);
void blk_iostatus_reset(BlockBackend *blk);
void blk_iostatus_set_err(BlockBackend *blk, int error);
int blk_attach_dev(BlockBackend *blk, DeviceState *dev);
void blk_detach_dev(BlockBackend *blk, DeviceState *dev);
DeviceState *blk_get_attached_dev(BlockBackend *blk);
char *blk_get_attached_dev_id(BlockBackend *blk);
BlockBackend *blk_by_dev(void *dev);
BlockBackend *blk_by_qdev_id(const char *id, Error **errp);
void blk_set_dev_ops(BlockBackend *blk, const BlockDevOps *ops, void *opaque);
int coroutine_fn blk_co_preadv(BlockBackend *blk, int64_t offset,
int64_t bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags);
int coroutine_fn blk_co_pwritev_part(BlockBackend *blk, int64_t offset,
int64_t bytes,
QEMUIOVector *qiov, size_t qiov_offset,
BdrvRequestFlags flags);
int coroutine_fn blk_co_pwritev(BlockBackend *blk, int64_t offset,
int64_t bytes, QEMUIOVector *qiov,
BdrvRequestFlags flags);
static inline int coroutine_fn blk_co_pread(BlockBackend *blk, int64_t offset,
int64_t bytes, void *buf,
BdrvRequestFlags flags)
{
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
assert(bytes <= SIZE_MAX);
return blk_co_preadv(blk, offset, bytes, &qiov, flags);
}
static inline int coroutine_fn blk_co_pwrite(BlockBackend *blk, int64_t offset,
int64_t bytes, void *buf,
BdrvRequestFlags flags)
{
QEMUIOVector qiov = QEMU_IOVEC_INIT_BUF(qiov, buf, bytes);
assert(bytes <= SIZE_MAX);
return blk_co_pwritev(blk, offset, bytes, &qiov, flags);
}
int blk_pwrite_zeroes(BlockBackend *blk, int64_t offset,
int64_t bytes, BdrvRequestFlags flags);
BlockAIOCB *blk_aio_pwrite_zeroes(BlockBackend *blk, int64_t offset,
int64_t bytes, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque);
int blk_make_zero(BlockBackend *blk, BdrvRequestFlags flags);
int blk_pread(BlockBackend *blk, int64_t offset, void *buf, int bytes);
int blk_pwrite(BlockBackend *blk, int64_t offset, const void *buf, int bytes,
BdrvRequestFlags flags);
int64_t blk_getlength(BlockBackend *blk);
void blk_get_geometry(BlockBackend *blk, uint64_t *nb_sectors_ptr);
int64_t blk_nb_sectors(BlockBackend *blk);
BlockAIOCB *blk_aio_preadv(BlockBackend *blk, int64_t offset,
QEMUIOVector *qiov, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque);
BlockAIOCB *blk_aio_pwritev(BlockBackend *blk, int64_t offset,
QEMUIOVector *qiov, BdrvRequestFlags flags,
BlockCompletionFunc *cb, void *opaque);
BlockAIOCB *blk_aio_flush(BlockBackend *blk,
BlockCompletionFunc *cb, void *opaque);
BlockAIOCB *blk_aio_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes,
BlockCompletionFunc *cb, void *opaque);
void blk_aio_cancel(BlockAIOCB *acb);
void blk_aio_cancel_async(BlockAIOCB *acb);
int blk_ioctl(BlockBackend *blk, unsigned long int req, void *buf);
BlockAIOCB *blk_aio_ioctl(BlockBackend *blk, unsigned long int req, void *buf,
BlockCompletionFunc *cb, void *opaque);
int coroutine_fn blk_co_pdiscard(BlockBackend *blk, int64_t offset,
int64_t bytes);
int coroutine_fn blk_co_flush(BlockBackend *blk);
int blk_flush(BlockBackend *blk);
int blk_commit_all(void);
void blk_inc_in_flight(BlockBackend *blk);
void blk_dec_in_flight(BlockBackend *blk);
void blk_drain(BlockBackend *blk);
void blk_drain_all(void);
void blk_set_on_error(BlockBackend *blk, BlockdevOnError on_read_error,
BlockdevOnError on_write_error);
BlockdevOnError blk_get_on_error(BlockBackend *blk, bool is_read);
BlockErrorAction blk_get_error_action(BlockBackend *blk, bool is_read,
int error);
void blk_error_action(BlockBackend *blk, BlockErrorAction action,
bool is_read, int error);
bool blk_supports_write_perm(BlockBackend *blk);
bool blk_is_writable(BlockBackend *blk);
bool blk_is_sg(BlockBackend *blk);
bool blk_enable_write_cache(BlockBackend *blk);
void blk_set_enable_write_cache(BlockBackend *blk, bool wce);
void blk_invalidate_cache(BlockBackend *blk, Error **errp);
bool blk_is_inserted(BlockBackend *blk);
bool blk_is_available(BlockBackend *blk);
void blk_lock_medium(BlockBackend *blk, bool locked);
void blk_eject(BlockBackend *blk, bool eject_flag);
int blk_get_flags(BlockBackend *blk);
uint32_t blk_get_request_alignment(BlockBackend *blk);
uint32_t blk_get_max_transfer(BlockBackend *blk);
uint64_t blk_get_max_hw_transfer(BlockBackend *blk);
int blk_get_max_iov(BlockBackend *blk);
int blk_get_max_hw_iov(BlockBackend *blk);
void blk_set_guest_block_size(BlockBackend *blk, int align);
void *blk_try_blockalign(BlockBackend *blk, size_t size);
void *blk_blockalign(BlockBackend *blk, size_t size);
bool blk_op_is_blocked(BlockBackend *blk, BlockOpType op, Error **errp);
void blk_op_unblock(BlockBackend *blk, BlockOpType op, Error *reason);
void blk_op_block_all(BlockBackend *blk, Error *reason);
void blk_op_unblock_all(BlockBackend *blk, Error *reason);
AioContext *blk_get_aio_context(BlockBackend *blk);
int blk_set_aio_context(BlockBackend *blk, AioContext *new_context,
Error **errp);
void blk_add_aio_context_notifier(BlockBackend *blk,
void (*attached_aio_context)(AioContext *new_context, void *opaque),
void (*detach_aio_context)(void *opaque), void *opaque);
void blk_remove_aio_context_notifier(BlockBackend *blk,
void (*attached_aio_context)(AioContext *,
void *),
void (*detach_aio_context)(void *),
void *opaque);
void blk_add_remove_bs_notifier(BlockBackend *blk, Notifier *notify);
void blk_add_insert_bs_notifier(BlockBackend *blk, Notifier *notify);
void blk_io_plug(BlockBackend *blk);
void blk_io_unplug(BlockBackend *blk);
BlockAcctStats *blk_get_stats(BlockBackend *blk);
BlockBackendRootState *blk_get_root_state(BlockBackend *blk);
void blk_update_root_state(BlockBackend *blk);
bool blk_get_detect_zeroes_from_root_state(BlockBackend *blk);
int blk_get_open_flags_from_root_state(BlockBackend *blk);
void *blk_aio_get(const AIOCBInfo *aiocb_info, BlockBackend *blk,
BlockCompletionFunc *cb, void *opaque);
int coroutine_fn blk_co_pwrite_zeroes(BlockBackend *blk, int64_t offset,
int64_t bytes, BdrvRequestFlags flags);
int blk_pwrite_compressed(BlockBackend *blk, int64_t offset, const void *buf,
int64_t bytes);
int blk_truncate(BlockBackend *blk, int64_t offset, bool exact,
PreallocMode prealloc, BdrvRequestFlags flags, Error **errp);
int blk_pdiscard(BlockBackend *blk, int64_t offset, int64_t bytes);
int blk_save_vmstate(BlockBackend *blk, const uint8_t *buf,
int64_t pos, int size);
int blk_load_vmstate(BlockBackend *blk, uint8_t *buf, int64_t pos, int size);
int blk_probe_blocksizes(BlockBackend *blk, BlockSizes *bsz);
int blk_probe_geometry(BlockBackend *blk, HDGeometry *geo);
BlockAIOCB *blk_abort_aio_request(BlockBackend *blk,
BlockCompletionFunc *cb,
void *opaque, int ret);
void blk_set_io_limits(BlockBackend *blk, ThrottleConfig *cfg);
void blk_io_limits_disable(BlockBackend *blk);
void blk_io_limits_enable(BlockBackend *blk, const char *group);
void blk_io_limits_update_group(BlockBackend *blk, const char *group);
void blk_set_force_allow_inactivate(BlockBackend *blk);
void blk_register_buf(BlockBackend *blk, void *host, size_t size);
void blk_unregister_buf(BlockBackend *blk, void *host);
int coroutine_fn blk_co_copy_range(BlockBackend *blk_in, int64_t off_in,
BlockBackend *blk_out, int64_t off_out,
int64_t bytes, BdrvRequestFlags read_flags,
BdrvRequestFlags write_flags);
const BdrvChild *blk_root(BlockBackend *blk);
int blk_make_empty(BlockBackend *blk, Error **errp);
/* DO NOT ADD ANYTHING IN HERE. USE ONE OF THE HEADERS INCLUDED ABOVE */
#endif

View File

@ -13,9 +13,6 @@
#include "block/block.h"
#include "qemu/queue.h"
void blockdev_mark_auto_del(BlockBackend *blk);
void blockdev_auto_del(BlockBackend *blk);
typedef enum {
IF_DEFAULT = -1, /* for use with drive_add() only */
/*
@ -38,6 +35,16 @@ struct DriveInfo {
QTAILQ_ENTRY(DriveInfo) next;
};
/*
* Global state (GS) API. These functions run under the BQL.
*
* See include/block/block-global-state.h for more information about
* the GS API.
*/
void blockdev_mark_auto_del(BlockBackend *blk);
void blockdev_auto_del(BlockBackend *blk);
DriveInfo *blk_legacy_dinfo(BlockBackend *blk);
DriveInfo *blk_set_legacy_dinfo(BlockBackend *blk, DriveInfo *dinfo);
BlockBackend *blk_by_legacy_dinfo(DriveInfo *dinfo);

View File

@ -55,6 +55,7 @@ int os_mlock(void);
typedef struct timeval qemu_timeval;
#define qemu_gettimeofday(tp) gettimeofday(tp, NULL)
int os_set_daemonize(bool d);
bool is_daemonized(void);
/**

View File

@ -77,6 +77,14 @@ typedef struct {
} qemu_timeval;
int qemu_gettimeofday(qemu_timeval *tp);
static inline int os_set_daemonize(bool d)
{
if (d) {
return -ENOTSUP;
}
return 0;
}
static inline bool is_daemonized(void)
{
return false;

10
job.c
View File

@ -381,6 +381,8 @@ void job_ref(Job *job)
void job_unref(Job *job)
{
GLOBAL_STATE_CODE();
if (--job->refcnt == 0) {
assert(job->status == JOB_STATUS_NULL);
assert(!timer_pending(&job->sleep_timer));
@ -602,6 +604,7 @@ bool job_user_paused(Job *job)
void job_user_resume(Job *job, Error **errp)
{
assert(job);
GLOBAL_STATE_CODE();
if (!job->user_paused || job->pause_count <= 0) {
error_setg(errp, "Can't resume a job that was not paused");
return;
@ -672,6 +675,7 @@ static void job_update_rc(Job *job)
static void job_commit(Job *job)
{
assert(!job->ret);
GLOBAL_STATE_CODE();
if (job->driver->commit) {
job->driver->commit(job);
}
@ -680,6 +684,7 @@ static void job_commit(Job *job)
static void job_abort(Job *job)
{
assert(job->ret);
GLOBAL_STATE_CODE();
if (job->driver->abort) {
job->driver->abort(job);
}
@ -687,6 +692,7 @@ static void job_abort(Job *job)
static void job_clean(Job *job)
{
GLOBAL_STATE_CODE();
if (job->driver->clean) {
job->driver->clean(job);
}
@ -726,6 +732,7 @@ static int job_finalize_single(Job *job)
static void job_cancel_async(Job *job, bool force)
{
GLOBAL_STATE_CODE();
if (job->driver->cancel) {
force = job->driver->cancel(job, force);
} else {
@ -825,6 +832,7 @@ static void job_completed_txn_abort(Job *job)
static int job_prepare(Job *job)
{
GLOBAL_STATE_CODE();
if (job->ret == 0 && job->driver->prepare) {
job->ret = job->driver->prepare(job);
job_update_rc(job);
@ -952,6 +960,7 @@ static void coroutine_fn job_co_entry(void *opaque)
Job *job = opaque;
assert(job && job->driver && job->driver->run);
assert(job->aio_context == qemu_get_current_aio_context());
job_pause_point(job);
job->ret = job->driver->run(job, &job->err);
job->deferred_to_main_loop = true;
@ -1054,6 +1063,7 @@ void job_complete(Job *job, Error **errp)
{
/* Should not be reachable via external interface for internal jobs */
assert(job->id);
GLOBAL_STATE_CODE();
if (job_apply_verb(job, JOB_VERB_COMPLETE, errp)) {
return;
}

View File

@ -932,7 +932,7 @@ static int block_load(QEMUFile *f, void *opaque, int version_id)
return -EINVAL;
}
blk_invalidate_cache(blk, &local_err);
blk_activate(blk, &local_err);
if (local_err) {
error_report_err(local_err);
return -EINVAL;

View File

@ -503,9 +503,9 @@ static void process_incoming_migration_bh(void *opaque)
if (!migrate_late_block_activate() ||
(autostart && (!global_state_received() ||
global_state_get_runstate() == RUN_STATE_RUNNING))) {
/* Make sure all file formats flush their mutable metadata.
/* Make sure all file formats throw away their mutable metadata.
* If we get an error here, just don't restart the VM yet. */
bdrv_invalidate_cache_all(&local_err);
bdrv_activate_all(&local_err);
if (local_err) {
error_report_err(local_err);
local_err = NULL;
@ -591,8 +591,8 @@ static void process_incoming_migration_co(void *opaque)
/* we get COLO info, and know if we are in COLO mode */
if (!ret && migration_incoming_colo_enabled()) {
/* Make sure all file formats flush their mutable metadata */
bdrv_invalidate_cache_all(&local_err);
/* Make sure all file formats throw away their mutable metadata */
bdrv_activate_all(&local_err);
if (local_err) {
error_report_err(local_err);
goto fail;
@ -1932,7 +1932,7 @@ static void migrate_fd_cancel(MigrationState *s)
if (s->state == MIGRATION_STATUS_CANCELLING && s->block_inactive) {
Error *local_err = NULL;
bdrv_invalidate_cache_all(&local_err);
bdrv_activate_all(&local_err);
if (local_err) {
error_report_err(local_err);
} else {
@ -3111,7 +3111,7 @@ fail:
*/
Error *local_err = NULL;
bdrv_invalidate_cache_all(&local_err);
bdrv_activate_all(&local_err);
if (local_err) {
error_report_err(local_err);
}
@ -3256,7 +3256,7 @@ fail_invalidate:
Error *local_err = NULL;
qemu_mutex_lock_iothread();
bdrv_invalidate_cache_all(&local_err);
bdrv_activate_all(&local_err);
if (local_err) {
error_report_err(local_err);
} else {

View File

@ -1438,7 +1438,7 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
if (inactivate_disks) {
/* Inactivate before sending QEMU_VM_EOF so that the
* bdrv_invalidate_cache_all() on the other end won't fail. */
* bdrv_activate_all() on the other end won't fail. */
ret = bdrv_inactivate_all();
if (ret) {
error_report("%s: bdrv_inactivate_all() failed (%d)",
@ -2013,9 +2013,9 @@ static void loadvm_postcopy_handle_run_bh(void *opaque)
trace_loadvm_postcopy_handle_run_bh("after announce");
/* Make sure all file formats flush their mutable metadata.
/* Make sure all file formats throw away their mutable metadata.
* If we get an error here, just don't restart the VM yet. */
bdrv_invalidate_cache_all(&local_err);
bdrv_activate_all(&local_err);
if (local_err) {
error_report_err(local_err);
local_err = NULL;
@ -2808,6 +2808,8 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate,
g_autoptr(GDateTime) now = g_date_time_new_now_local();
AioContext *aio_context;
GLOBAL_STATE_CODE();
if (migration_is_blocked(errp)) {
return false;
}

View File

@ -144,7 +144,7 @@ void qmp_cont(Error **errp)
* If there are no inactive block nodes (e.g. because the VM was just
* paused rather than completing a migration), bdrv_inactivate_all() simply
* doesn't do anything. */
bdrv_invalidate_cache_all(&local_err);
bdrv_activate_all(&local_err);
if (local_err) {
error_propagate(errp, local_err);
return;

View File

@ -317,6 +317,12 @@ bool is_daemonized(void)
return daemonize;
}
int os_set_daemonize(bool d)
{
daemonize = d;
return 0;
}
int os_mlock(void)
{
#ifdef HAVE_MLOCKALL

View File

@ -25,6 +25,7 @@
#include "qemu/osdep.h"
#include "qemu-common.h"
#include "monitor/monitor.h"
#include "qemu/coroutine-tls.h"
#include "qapi/error.h"
#include "qapi/qapi-commands-machine.h"
#include "qapi/qapi-commands-misc.h"
@ -473,11 +474,16 @@ bool qemu_in_vcpu_thread(void)
return current_cpu && qemu_cpu_is_self(current_cpu);
}
static __thread bool iothread_locked = false;
QEMU_DEFINE_STATIC_CO_TLS(bool, iothread_locked)
bool qemu_mutex_iothread_locked(void)
{
return iothread_locked;
return get_iothread_locked();
}
bool qemu_in_main_thread(void)
{
return qemu_mutex_iothread_locked();
}
/*
@ -490,13 +496,13 @@ void qemu_mutex_lock_iothread_impl(const char *file, int line)
g_assert(!qemu_mutex_iothread_locked());
bql_lock(&qemu_global_mutex, file, line);
iothread_locked = true;
set_iothread_locked(true);
}
void qemu_mutex_unlock_iothread(void)
{
g_assert(qemu_mutex_iothread_locked());
iothread_locked = false;
set_iothread_locked(false);
qemu_mutex_unlock(&qemu_global_mutex);
}

View File

@ -973,6 +973,8 @@ BlockBackend *blk_by_qdev_id(const char *id, Error **errp)
DeviceState *dev;
BlockBackend *blk;
GLOBAL_STATE_CODE();
dev = find_device_state(id, errp);
if (dev == NULL) {
return NULL;

View File

@ -93,6 +93,9 @@ static void help(void)
" --chardev <options> configure a character device backend\n"
" (see the qemu(1) man page for possible options)\n"
"\n"
" --daemonize daemonize the process, and have the parent exit\n"
" once startup is complete\n"
"\n"
" --export [type=]nbd,id=<id>,node-name=<node-name>[,name=<export-name>]\n"
" [,writable=on|off][,bitmap=<name>]\n"
" export the specified block node over NBD\n"
@ -144,6 +147,7 @@ QEMU_HELP_BOTTOM "\n",
enum {
OPTION_BLOCKDEV = 256,
OPTION_CHARDEV,
OPTION_DAEMONIZE,
OPTION_EXPORT,
OPTION_MONITOR,
OPTION_NBD_SERVER,
@ -177,13 +181,30 @@ static int getopt_set_loc(int argc, char **argv, const char *optstring,
return c;
}
static void process_options(int argc, char *argv[])
/**
* Process QSD command-line arguments.
*
* This is done in two passes:
*
* First (@pre_init_pass is true), we do a pass where all global
* arguments pertaining to the QSD process (like --help or --daemonize)
* are processed. This pass is done before most of the QEMU-specific
* initialization steps (e.g. initializing the block layer or QMP), and
* so must only process arguments that are not really QEMU-specific.
*
* Second (@pre_init_pass is false), we (sequentially) process all
* QEMU/QSD-specific arguments. Many of these arguments are effectively
* translated to QMP commands (like --blockdev for blockdev-add, or
* --export for block-export-add).
*/
static void process_options(int argc, char *argv[], bool pre_init_pass)
{
int c;
static const struct option long_options[] = {
{"blockdev", required_argument, NULL, OPTION_BLOCKDEV},
{"chardev", required_argument, NULL, OPTION_CHARDEV},
{"daemonize", no_argument, NULL, OPTION_DAEMONIZE},
{"export", required_argument, NULL, OPTION_EXPORT},
{"help", no_argument, NULL, 'h'},
{"monitor", required_argument, NULL, OPTION_MONITOR},
@ -196,11 +217,27 @@ static void process_options(int argc, char *argv[])
};
/*
* In contrast to the system emulator, options are processed in the order
* they are given on the command lines. This means that things must be
* defined first before they can be referenced in another option.
* In contrast to the system emulator, QEMU-specific options are processed
* in the order they are given on the command lines. This means that things
* must be defined first before they can be referenced in another option.
*/
optind = 1;
while ((c = getopt_set_loc(argc, argv, "-hT:V", long_options)) != -1) {
bool handle_option_pre_init;
/* Should this argument be processed in the pre-init pass? */
handle_option_pre_init =
c == '?' ||
c == 'h' ||
c == 'V' ||
c == OPTION_DAEMONIZE ||
c == OPTION_PIDFILE;
/* Process every option only in its respective pass */
if (pre_init_pass != handle_option_pre_init) {
continue;
}
switch (c) {
case '?':
exit(EXIT_FAILURE);
@ -246,6 +283,12 @@ static void process_options(int argc, char *argv[])
qemu_opts_del(opts);
break;
}
case OPTION_DAEMONIZE:
if (os_set_daemonize(true) < 0) {
error_report("--daemonize not supported in this build");
exit(EXIT_FAILURE);
}
break;
case OPTION_EXPORT:
{
Visitor *v;
@ -334,6 +377,10 @@ int main(int argc, char *argv[])
qemu_init_exec_dir(argv[0]);
os_setup_signal_handling();
process_options(argc, argv, true);
os_daemonize();
module_call_init(MODULE_INIT_QOM);
module_call_init(MODULE_INIT_TRACE);
qemu_add_opts(&qemu_trace_opts);
@ -348,7 +395,7 @@ int main(int argc, char *argv[])
qemu_set_log(LOG_TRACE);
qemu_init_main_loop(&error_fatal);
process_options(argc, argv);
process_options(argc, argv, false);
/*
* Write the pid file after creating chardevs, exports, and NBD servers but
@ -356,6 +403,7 @@ int main(int argc, char *argv[])
* it.
*/
pid_file_init();
os_setup_post();
while (!exit_requested) {
main_loop_wait(false);

View File

@ -0,0 +1,8 @@
#include "qemu/osdep.h"
#include "qemu/main-loop.h"
bool qemu_in_main_thread(void)
{
return qemu_get_current_aio_context() == qemu_get_aio_context();
}

View File

@ -17,6 +17,9 @@ if linux_io_uring.found()
stub_ss.add(files('io_uring.c'))
endif
stub_ss.add(files('iothread-lock.c'))
if have_block
stub_ss.add(files('iothread-lock-block.c'))
endif
stub_ss.add(files('isa-bus.c'))
stub_ss.add(files('is-daemonized.c'))
if libaio.found()

View File

@ -48,18 +48,6 @@ if LANG=C bash --version | grep -q 'GNU bash, version [123]' ; then
skip "bash version too old ==> Not running the qemu-iotests."
fi
if ! (sed --version | grep 'GNU sed') > /dev/null 2>&1 ; then
if ! command -v gsed >/dev/null 2>&1; then
skip "GNU sed not available ==> Not running the qemu-iotests."
fi
else
# Double-check that we're not using BusyBox' sed which says
# that "This is not GNU sed version 4.0" ...
if sed --version | grep -q 'not GNU sed' ; then
skip "BusyBox sed not supported ==> Not running the qemu-iotests."
fi
fi
cd tests/qemu-iotests
# QEMU_CHECK_BLOCK_AUTO is used to disable some unstable sub-tests

View File

@ -33,6 +33,12 @@ _cleanup()
_rm_test_img "${TEST_IMG}.copy"
_cleanup_test_img
_cleanup_qemu
if [ -f "$TEST_DIR/qsd.pid" ]; then
kill -SIGKILL "$(cat "$TEST_DIR/qsd.pid")"
rm -f "$TEST_DIR/qsd.pid"
fi
rm -f "$SOCK_DIR/qsd.sock"
}
trap "_cleanup; exit \$status" 0 1 2 3 15
@ -45,7 +51,7 @@ _supported_fmt qcow2
_supported_proto file
_supported_os Linux
size=64M
size=$((64 * 1048576))
TEST_IMG="${TEST_IMG}.base" _make_test_img $size
echo
@ -216,6 +222,188 @@ wait=1 _cleanup_qemu | grep -v 'JOB_STATUS_CHANGE'
_check_test_img
echo
echo === Start mirror to throttled QSD and exit qemu ===
echo
# Mirror to a throttled QSD instance (so that qemu cannot drain the
# throttling), wait for READY, then write some data to the device,
# and then quit qemu.
# (qemu should force-cancel the job and not wait for the data to be
# written to the target.)
_make_test_img $size
# Will be used by this and the next case
set_up_throttled_qsd() {
$QSD \
--object throttle-group,id=thrgr,limits.bps-total=1048576 \
--blockdev null-co,node-name=null,size=$size \
--blockdev throttle,node-name=throttled,throttle-group=thrgr,file=null \
--nbd-server addr.type=unix,addr.path="$SOCK_DIR/qsd.sock" \
--export nbd,id=exp,node-name=throttled,name=target,writable=true \
--pidfile "$TEST_DIR/qsd.pid" \
--daemonize
}
set_up_throttled_qsd
# Need a virtio-blk device so that qemu-io writes will not block the monitor
_launch_qemu \
--blockdev file,node-name=source-proto,filename="$TEST_IMG" \
--blockdev qcow2,node-name=source-fmt,file=source-proto \
--device virtio-blk,id=vblk,drive=source-fmt \
--blockdev "{\"driver\": \"nbd\",
\"node-name\": \"target\",
\"server\": {
\"type\": \"unix\",
\"path\": \"$SOCK_DIR/qsd.sock\"
},
\"export\": \"target\"}"
h=$QEMU_HANDLE
_send_qemu_cmd $h '{"execute": "qmp_capabilities"}' 'return'
# Use sync=top, so the first pass will not copy the whole image
_send_qemu_cmd $h \
'{"execute": "blockdev-mirror",
"arguments": {
"job-id": "mirror",
"device": "source-fmt",
"target": "target",
"sync": "top"
}}' \
'return' \
| grep -v JOB_STATUS_CHANGE # Ignore these events during creation
# This too will be used by this and the next case
# $1: QEMU handle
# $2: Image size
wait_for_job_and_quit() {
h=$1
size=$2
# List of expected events
capture_events='BLOCK_JOB_READY JOB_STATUS_CHANGE'
_wait_event $h 'BLOCK_JOB_READY'
QEMU_EVENTS= # Ignore all JOB_STATUS_CHANGE events that came before READY
# Write something to the device for post-READY mirroring. Write it in
# blocks matching the cluster size, each spaced one block apart, so
# that the mirror job will have to spawn one request per cluster.
# Because the number of concurrent requests is limited (to 16), this
# limits the number of bytes concurrently in flight, which speeds up
# cancelling the job (in-flight requests still are waited for).
# To limit the number of bytes in flight, we could alternatively pass
# something for blockdev-mirror's @buf-size parameter, but
# block-commit does not have such a parameter, so we need to figure
# something out that works for both.
cluster_size=65536
step=$((cluster_size * 2))
echo '--- Writing data to the virtio-blk device ---'
for ofs in $(seq 0 $step $((size - step))); do
qemu_io_cmd="qemu-io -d vblk/virtio-backend "
qemu_io_cmd+="\\\"aio_write $ofs $cluster_size\\\""
# Do not include these requests in the reference output
# (it's just too much)
silent=yes _send_qemu_cmd $h \
"{\"execute\": \"human-monitor-command\",
\"arguments\": {
\"command-line\": \"$qemu_io_cmd\"
}}" \
'return'
done
# Wait until the job's length is updated to reflect the write requests
# We have written to half of the device, so this is the expected job length
final_len=$((size / 2))
timeout=100 # unit: 0.1 seconds
while true; do
len=$(
_send_qemu_cmd $h \
'{"execute": "query-block-jobs"}' \
'return.*"len": [0-9]\+' \
| grep 'return.*"len": [0-9]\+' \
| sed -e 's/.*"len": \([0-9]\+\).*/\1/'
)
if [ "$len" -eq "$final_len" ]; then
break
fi
timeout=$((timeout - 1))
if [ "$timeout" -eq 0 ]; then
echo "ERROR: Timeout waiting for job to reach len=$final_len"
break
fi
sleep 0.1
done
sleep 1
_send_qemu_cmd $h \
'{"execute": "quit"}' \
'return'
# List of expected events
capture_events='BLOCK_JOB_CANCELLED JOB_STATUS_CHANGE SHUTDOWN'
_wait_event $h 'SHUTDOWN'
QEMU_EVENTS= # Ignore all JOB_STATUS_CHANGE events that came before SHUTDOWN
_wait_event $h 'JOB_STATUS_CHANGE' # standby
_wait_event $h 'JOB_STATUS_CHANGE' # ready
_wait_event $h 'JOB_STATUS_CHANGE' # aborting
# Filter the offset (depends on when exactly `quit` was issued)
_wait_event $h 'BLOCK_JOB_CANCELLED' \
| sed -e 's/"offset": [0-9]\+/"offset": (filtered)/'
_wait_event $h 'JOB_STATUS_CHANGE' # concluded
_wait_event $h 'JOB_STATUS_CHANGE' # null
wait=yes _cleanup_qemu
kill -SIGTERM "$(cat "$TEST_DIR/qsd.pid")"
}
wait_for_job_and_quit $h $size
echo
echo === Start active commit to throttled QSD and exit qemu ===
echo
# Same as the above, but instead of mirroring, do an active commit
_make_test_img $size
set_up_throttled_qsd
_launch_qemu \
--blockdev "{\"driver\": \"nbd\",
\"node-name\": \"target\",
\"server\": {
\"type\": \"unix\",
\"path\": \"$SOCK_DIR/qsd.sock\"
},
\"export\": \"target\"}" \
--blockdev file,node-name=source-proto,filename="$TEST_IMG" \
--blockdev qcow2,node-name=source-fmt,file=source-proto,backing=target \
--device virtio-blk,id=vblk,drive=source-fmt
h=$QEMU_HANDLE
_send_qemu_cmd $h '{"execute": "qmp_capabilities"}' 'return'
_send_qemu_cmd $h \
'{"execute": "block-commit",
"arguments": {
"job-id": "commit",
"device": "source-fmt"
}}' \
'return' \
| grep -v JOB_STATUS_CHANGE # Ignore these events during creation
wait_for_job_and_quit $h $size
# success, all done
echo "*** done"
rm -f $seq.full

View File

@ -116,4 +116,52 @@ Formatting 'TEST_DIR/t.qcow2.copy', fmt=qcow2 cluster_size=65536 extended_l2=off
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "disk", "len": 67108864, "offset": 524288, "speed": 65536, "type": "stream"}}
No errors were found on the image.
=== Start mirror to throttled QSD and exit qemu ===
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
{"execute": "qmp_capabilities"}
{"return": {}}
{"execute": "blockdev-mirror",
"arguments": {
"job-id": "mirror",
"device": "source-fmt",
"target": "target",
"sync": "top"
}}
{"return": {}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "mirror", "len": 0, "offset": 0, "speed": 0, "type": "mirror"}}
--- Writing data to the virtio-blk device ---
{"execute": "quit"}
{"return": {}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "mirror"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "mirror"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "mirror"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "mirror", "len": 33554432, "offset": (filtered), "speed": 0, "type": "mirror"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "mirror"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "mirror"}}
=== Start active commit to throttled QSD and exit qemu ===
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864
{"execute": "qmp_capabilities"}
{"return": {}}
{"execute": "block-commit",
"arguments": {
"job-id": "commit",
"device": "source-fmt"
}}
{"return": {}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_READY", "data": {"device": "commit", "len": 0, "offset": 0, "speed": 0, "type": "commit"}}
--- Writing data to the virtio-blk device ---
{"execute": "quit"}
{"return": {}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "SHUTDOWN", "data": {"guest": false, "reason": "host-qmp-quit"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "standby", "id": "commit"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "ready", "id": "commit"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "aborting", "id": "commit"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "BLOCK_JOB_CANCELLED", "data": {"device": "commit", "len": 33554432, "offset": (filtered), "speed": 0, "type": "commit"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "concluded", "id": "commit"}}
{"timestamp": {"seconds": TIMESTAMP, "microseconds": TIMESTAMP}, "event": "JOB_STATUS_CHANGE", "data": {"status": "null", "id": "commit"}}
*** done

View File

@ -896,7 +896,7 @@ _make_test_img -o extended_l2=on 1M
# Second and third writes in _concurrent_io() are independent and may finish in
# different order. So, filter offset out to match both possible variants.
_concurrent_io | $QEMU_IO | _filter_qemu_io | \
$SED -e 's/\(20480\|40960\)/OFFSET/'
sed -e 's/\(20480\|40960\)/OFFSET/'
_concurrent_verify | $QEMU_IO | _filter_qemu_io
# success, all done

View File

@ -174,8 +174,12 @@ class EncryptionSetupTestCase(iotests.QMPTestCase):
}
result = vm.qmp('x-blockdev-amend', **args)
assert result['return'] == {}
vm.run_job('job0')
iotests.log(result)
# Run the job only if it was created
event = ('JOB_STATUS_CHANGE',
{'data': {'id': 'job0', 'status': 'created'}})
if vm.events_wait([event], timeout=0.0) is not None:
vm.run_job('job0')
# test that when the image opened by two qemu processes,
# neither of them can update the encryption keys

View File

@ -1,11 +1,9 @@
{"return": {}}
{"execute": "job-dismiss", "arguments": {"id": "job0"}}
{"return": {}}
Job failed: Failed to get shared "consistent read" lock
{"execute": "job-dismiss", "arguments": {"id": "job0"}}
{"return": {}}
Job failed: Failed to get shared "consistent read" lock
{"execute": "job-dismiss", "arguments": {"id": "job0"}}
{"error": {"class": "GenericError", "desc": "Failed to get shared \"consistent read\" lock"}}
{"error": {"class": "GenericError", "desc": "Failed to get shared \"consistent read\" lock"}}
{"return": {}}
{"execute": "job-dismiss", "arguments": {"id": "job0"}}
{"return": {}}
@ -13,14 +11,9 @@ qemu-img: Failed to get shared "consistent read" lock
Is another process using the image [TEST_DIR/test.img]?
.
Job failed: Block node is read-only
{"execute": "job-dismiss", "arguments": {"id": "job0"}}
{"return": {}}
Job failed: Failed to get shared "consistent read" lock
{"execute": "job-dismiss", "arguments": {"id": "job0"}}
{"return": {}}
Job failed: Failed to get shared "consistent read" lock
{"execute": "job-dismiss", "arguments": {"id": "job0"}}
{"error": {"class": "GenericError", "desc": "Block node is read-only"}}
{"error": {"class": "GenericError", "desc": "Failed to get shared \"consistent read\" lock"}}
{"error": {"class": "GenericError", "desc": "Failed to get shared \"consistent read\" lock"}}
{"return": {}}
{"execute": "job-dismiss", "arguments": {"id": "job0"}}
{"return": {}}

View File

@ -21,44 +21,44 @@
_filter_date()
{
$SED -re 's/[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/yyyy-mm-dd hh:mm:ss/'
sed -Ee 's/[0-9]{4}-[0-9]{2}-[0-9]{2} [0-9]{2}:[0-9]{2}:[0-9]{2}/yyyy-mm-dd hh:mm:ss/'
}
_filter_vmstate_size()
{
$SED -r -e 's/[0-9. ]{5} [KMGT]iB/ SIZE/' \
-e 's/[0-9. ]{5} B/ SIZE/'
sed -E -e 's/[0-9. ]{5} [KMGT]iB/ SIZE/' \
-e 's/[0-9. ]{5} B/ SIZE/'
}
_filter_generated_node_ids()
{
$SED -re 's/\#block[0-9]{3,}/NODE_NAME/'
sed -Ee 's/\#block[0-9]{3,}/NODE_NAME/'
}
_filter_qom_path()
{
$SED -e '/Attached to:/s/\device[[0-9]\+\]/device[N]/g'
gsed -e '/Attached to:/s/\device[[0-9]\+\]/device[N]/g'
}
# replace occurrences of the actual TEST_DIR value with TEST_DIR
_filter_testdir()
{
$SED -e "s#$TEST_DIR/#TEST_DIR/#g" \
-e "s#$SOCK_DIR/#SOCK_DIR/#g" \
-e "s#SOCK_DIR/fuse-#TEST_DIR/#g"
sed -e "s#$TEST_DIR/#TEST_DIR/#g" \
-e "s#$SOCK_DIR/#SOCK_DIR/#g" \
-e "s#SOCK_DIR/fuse-#TEST_DIR/#g"
}
# replace occurrences of the actual IMGFMT value with IMGFMT
_filter_imgfmt()
{
$SED -e "s#$IMGFMT#IMGFMT#g"
sed -e "s#$IMGFMT#IMGFMT#g"
}
# Replace error message when the format is not supported and delete
# the output lines after the first one
_filter_qemu_img_check()
{
$SED -e '/allocated.*fragmented.*compressed clusters/d' \
gsed -e '/allocated.*fragmented.*compressed clusters/d' \
-e 's/qemu-img: This image format does not support checks/No errors were found on the image./' \
-e '/Image end offset: [0-9]\+/d'
}
@ -66,13 +66,14 @@ _filter_qemu_img_check()
# Removes \r from messages
_filter_win32()
{
$SED -e 's/\r//g'
gsed -e 's/\r//g'
}
# sanitize qemu-io output
_filter_qemu_io()
{
_filter_win32 | $SED -e "s/[0-9]* ops\; [0-9/:. sec]* ([0-9/.inf]* [EPTGMKiBbytes]*\/sec and [0-9/.inf]* ops\/sec)/X ops\; XX:XX:XX.X (XXX YYY\/sec and XXX ops\/sec)/" \
_filter_win32 | \
gsed -e "s/[0-9]* ops\; [0-9/:. sec]* ([0-9/.inf]* [EPTGMKiBbytes]*\/sec and [0-9/.inf]* ops\/sec)/X ops\; XX:XX:XX.X (XXX YYY\/sec and XXX ops\/sec)/" \
-e "s/: line [0-9][0-9]*: *[0-9][0-9]*\( Aborted\| Killed\)/:\1/" \
-e "s/qemu-io> //g"
}
@ -80,7 +81,7 @@ _filter_qemu_io()
# replace occurrences of QEMU_PROG with "qemu"
_filter_qemu()
{
$SED -e "s#\\(^\\|(qemu) \\)$(basename $QEMU_PROG):#\1QEMU_PROG:#" \
gsed -e "s#\\(^\\|(qemu) \\)$(basename $QEMU_PROG):#\1QEMU_PROG:#" \
-e 's#^QEMU [0-9]\+\.[0-9]\+\.[0-9]\+ monitor#QEMU X.Y.Z monitor#' \
-e $'s#\r##' # QEMU monitor uses \r\n line endings
}
@ -89,7 +90,7 @@ _filter_qemu()
_filter_qmp()
{
_filter_win32 | \
$SED -e 's#\("\(micro\)\?seconds": \)[0-9]\+#\1 TIMESTAMP#g' \
gsed -e 's#\("\(micro\)\?seconds": \)[0-9]\+#\1 TIMESTAMP#g' \
-e 's#^{"QMP":.*}$#QMP_VERSION#' \
-e '/^ "QMP": {\s*$/, /^ }\s*$/ c\' \
-e ' QMP_VERSION'
@ -98,32 +99,32 @@ _filter_qmp()
# readline makes HMP command strings so long that git complains
_filter_hmp()
{
$SED -e $'s/^\\((qemu) \\)\\?.*\e\\[D/\\1/g' \
gsed -e $'s/^\\((qemu) \\)\\?.*\e\\[D/\\1/g' \
-e $'s/\e\\[K//g'
}
# replace block job offset
_filter_block_job_offset()
{
$SED -e 's/, "offset": [0-9]\+,/, "offset": OFFSET,/'
sed -e 's/, "offset": [0-9]\+,/, "offset": OFFSET,/'
}
# replace block job len
_filter_block_job_len()
{
$SED -e 's/, "len": [0-9]\+,/, "len": LEN,/g'
sed -e 's/, "len": [0-9]\+,/, "len": LEN,/g'
}
# replace actual image size (depends on the host filesystem)
_filter_actual_image_size()
{
$SED -s 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g'
gsed -s 's/\("actual-size":\s*\)[0-9]\+/\1SIZE/g'
}
# Filename filters for qemu-img create
_filter_img_create_filenames()
{
$SED \
sed \
-e "s#$REMOTE_TEST_DIR#TEST_DIR#g" \
-e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \
-e "s#$TEST_DIR#TEST_DIR#g" \
@ -141,7 +142,7 @@ _do_filter_img_create()
# precedes ", fmt=") and the options part ($options, which starts
# with "fmt=")
# (And just echo everything before the first "^Formatting")
readarray formatting_line < <($SED -e 's/, fmt=/\n/')
readarray formatting_line < <(gsed -e 's/, fmt=/\n/')
filename_part=${formatting_line[0]}
unset formatting_line[0]
@ -168,11 +169,11 @@ _do_filter_img_create()
options=$(
echo "$options" \
| tr '\n' '\0' \
| $SED -e 's/ \([a-z0-9_.-]*\)=/\n\1=/g' \
| gsed -e 's/ \([a-z0-9_.-]*\)=/\n\1=/g' \
| grep -a -e '^fmt' -e '^size' -e '^backing' -e '^preallocation' \
-e '^encryption' "${grep_data_file[@]}" \
| _filter_img_create_filenames \
| $SED \
| sed \
-e 's/^\(fmt\)/0-\1/' \
-e 's/^\(size\)/1-\1/' \
-e 's/^\(backing\)/2-\1/' \
@ -180,9 +181,9 @@ _do_filter_img_create()
-e 's/^\(encryption\)/4-\1/' \
-e 's/^\(preallocation\)/8-\1/' \
| LC_ALL=C sort \
| $SED -e 's/^[0-9]-//' \
| sed -e 's/^[0-9]-//' \
| tr '\n\0' ' \n' \
| $SED -e 's/^ *$//' -e 's/ *$//'
| sed -e 's/^ *$//' -e 's/ *$//'
)
if [ -n "$options" ]; then
@ -208,7 +209,7 @@ _filter_img_create()
_filter_img_create_size()
{
$SED -e "s# size=[0-9]\\+# size=SIZE#g"
gsed -e "s# size=[0-9]\\+# size=SIZE#g"
}
_filter_img_info()
@ -222,7 +223,7 @@ _filter_img_info()
discard=0
regex_json_spec_start='^ *"format-specific": \{'
$SED -e "s#$REMOTE_TEST_DIR#TEST_DIR#g" \
gsed -e "s#$REMOTE_TEST_DIR#TEST_DIR#g" \
-e "s#$IMGPROTO:$TEST_DIR#TEST_DIR#g" \
-e "s#$TEST_DIR#TEST_DIR#g" \
-e "s#$SOCK_DIR#SOCK_DIR#g" \
@ -284,7 +285,7 @@ _filter_qemu_img_map()
data_file_filter=(-e "s#$data_file_pattern#\\1#")
fi
$SED -e 's/\([0-9a-fx]* *[0-9a-fx]* *\)[0-9a-fx]* */\1/g' \
sed -e 's/\([0-9a-fx]* *[0-9a-fx]* *\)[0-9a-fx]* */\1/g' \
-e 's/"offset": [0-9]\+/"offset": OFFSET/g' \
-e 's/Mapped to *//' \
"${data_file_filter[@]}" \
@ -298,7 +299,7 @@ _filter_nbd()
# receive callbacks sometimes, making them unreliable.
#
# Filter out the TCP port number since this changes between runs.
$SED -e '/nbd\/.*\.c:/d' \
sed -e '/nbd\/.*\.c:/d' \
-e 's#127\.0\.0\.1:[0-9]*#127.0.0.1:PORT#g' \
-e "s#?socket=$SOCK_DIR#?socket=SOCK_DIR#g" \
-e 's#\(foo\|PORT/\?\|.sock\): Failed to .*$#\1#'
@ -335,14 +336,14 @@ sys.stdout.write(result)'
_filter_authz_check_tls()
{
$SED -e 's/TLS x509 authz check for .* is denied/TLS x509 authz check for DISTINGUISHED-NAME is denied/'
sed -e 's/TLS x509 authz check for .* is denied/TLS x509 authz check for DISTINGUISHED-NAME is denied/'
}
_filter_qcow2_compression_type_bit()
{
$SED -e 's/\(incompatible_features\s\+\)\[3\(, \)\?/\1[/' \
-e 's/\(incompatible_features.*\), 3\]/\1]/' \
-e 's/\(incompatible_features.*\), 3\(,.*\)/\1\2/'
gsed -e 's/\(incompatible_features\s\+\)\[3\(, \)\?/\1[/' \
-e 's/\(incompatible_features.*\), 3\]/\1]/' \
-e 's/\(incompatible_features.*\), 3\(,.*\)/\1\2/'
}
# make sure this script returns success

View File

@ -17,17 +17,28 @@
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
SED=
for sed in sed gsed; do
($sed --version | grep 'GNU sed') > /dev/null 2>&1
if [ "$?" -eq 0 ]; then
SED=$sed
break
# bail out, setting up .notrun file
_notrun()
{
echo "$*" >"$OUTPUT_DIR/$seq.notrun"
echo "$seq not run: $*"
status=0
exit
}
if ! command -v gsed >/dev/null 2>&1; then
if sed --version 2>&1 | grep -v 'not GNU sed' | grep 'GNU sed' > /dev/null;
then
gsed()
{
sed "$@"
}
else
gsed()
{
_notrun "GNU sed not available"
}
fi
done
if [ -z "$SED" ]; then
echo "$0: GNU sed not found"
exit 1
fi
dd()
@ -722,16 +733,6 @@ _img_info()
done
}
# bail out, setting up .notrun file
#
_notrun()
{
echo "$*" >"$OUTPUT_DIR/$seq.notrun"
echo "$seq not run: $*"
status=0
exit
}
# bail out, setting up .casenotrun file
# The function _casenotrun() is used as a notifier. It is the
# caller's responsibility to make skipped a particular test.
@ -920,7 +921,7 @@ _require_working_luks()
IMGFMT='luks' _rm_test_img "$file"
if [ $status != 0 ]; then
reason=$(echo "$output" | grep "$file:" | $SED -e "s#.*$file: *##")
reason=$(echo "$output" | grep "$file:" | sed -e "s#.*$file: *##")
if [ -z "$reason" ]; then
reason="Failed to create a LUKS image"
fi

View File

@ -39,6 +39,7 @@ from contextlib import contextmanager
from qemu.machine import qtest
from qemu.qmp import QMPMessage
from qemu.aqmp.legacy import QEMUMonitorProtocol
# Use this logger for logging messages directly from the iotests module
logger = logging.getLogger('qemu.iotests')
@ -348,14 +349,30 @@ class QemuIoInteractive:
class QemuStorageDaemon:
def __init__(self, *args: str, instance_id: str = 'a'):
_qmp: Optional[QEMUMonitorProtocol] = None
_qmpsock: Optional[str] = None
# Python < 3.8 would complain if this type were not a string literal
# (importing `annotations` from `__future__` would work; but not on <= 3.6)
_p: 'Optional[subprocess.Popen[bytes]]' = None
def __init__(self, *args: str, instance_id: str = 'a', qmp: bool = False):
assert '--pidfile' not in args
self.pidfile = os.path.join(test_dir, f'qsd-{instance_id}-pid')
all_args = [qsd_prog] + list(args) + ['--pidfile', self.pidfile]
if qmp:
self._qmpsock = os.path.join(sock_dir, f'qsd-{instance_id}.sock')
all_args += ['--chardev',
f'socket,id=qmp-sock,path={self._qmpsock}',
'--monitor', 'qmp-sock']
self._qmp = QEMUMonitorProtocol(self._qmpsock, server=True)
# Cannot use with here, we want the subprocess to stay around
# pylint: disable=consider-using-with
self._p = subprocess.Popen(all_args)
if self._qmp is not None:
self._qmp.accept()
while not os.path.exists(self.pidfile):
if self._p.poll() is not None:
cmd = ' '.join(all_args)
@ -370,11 +387,24 @@ class QemuStorageDaemon:
assert self._pid == self._p.pid
def qmp(self, cmd: str, args: Optional[Dict[str, object]] = None) \
-> QMPMessage:
assert self._qmp is not None
return self._qmp.cmd(cmd, args)
def stop(self, kill_signal=15):
self._p.send_signal(kill_signal)
self._p.wait()
self._p = None
if self._qmp:
self._qmp.close()
if self._qmpsock is not None:
try:
os.remove(self._qmpsock)
except OSError:
pass
try:
os.remove(self.pidfile)
except OSError:

View File

@ -0,0 +1,91 @@
#!/usr/bin/env python3
# group: rw
#
# Test graph changes while I/O is happening
#
# Copyright (C) 2022 Red Hat, Inc.
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program. If not, see <http://www.gnu.org/licenses/>.
#
import os
from threading import Thread
import iotests
from iotests import imgfmt, qemu_img, qemu_img_create, QMPTestCase, \
QemuStorageDaemon
top = os.path.join(iotests.test_dir, 'top.img')
nbd_sock = os.path.join(iotests.sock_dir, 'nbd.sock')
def do_qemu_img_bench() -> None:
"""
Do some I/O requests on `nbd_sock`.
"""
assert qemu_img('bench', '-f', 'raw', '-c', '2000000',
f'nbd+unix:///node0?socket={nbd_sock}') == 0
class TestGraphChangesWhileIO(QMPTestCase):
def setUp(self) -> None:
# Create an overlay that can be added at runtime on top of the
# null-co block node that will receive I/O
assert qemu_img_create('-f', imgfmt, '-F', 'raw', '-b', 'null-co://',
top) == 0
# QSD instance with a null-co block node in an I/O thread,
# exported over NBD (on `nbd_sock`, export name "node0")
self.qsd = QemuStorageDaemon(
'--object', 'iothread,id=iothread0',
'--blockdev', 'null-co,node-name=node0,read-zeroes=true',
'--nbd-server', f'addr.type=unix,addr.path={nbd_sock}',
'--export', 'nbd,id=exp0,node-name=node0,iothread=iothread0,' +
'fixed-iothread=true,writable=true',
qmp=True
)
def tearDown(self) -> None:
self.qsd.stop()
def test_blockdev_add_while_io(self) -> None:
# Run qemu-img bench in the background
bench_thr = Thread(target=do_qemu_img_bench)
bench_thr.start()
# While qemu-img bench is running, repeatedly add and remove an
# overlay to/from node0
while bench_thr.is_alive():
result = self.qsd.qmp('blockdev-add', {
'driver': imgfmt,
'node-name': 'overlay',
'backing': 'node0',
'file': {
'driver': 'file',
'filename': top
}
})
self.assert_qmp(result, 'return', {})
result = self.qsd.qmp('blockdev-del', {
'node-name': 'overlay'
})
self.assert_qmp(result, 'return', {})
bench_thr.join()
if __name__ == '__main__':
# Format must support raw backing files
iotests.main(supported_fmts=['qcow', 'qcow2', 'qed'],
supported_protocols=['file'])

View File

@ -0,0 +1,5 @@
.
----------------------------------------------------------------------
Ran 1 tests
OK

View File

@ -122,7 +122,7 @@ static void *rcu_read_perf_test(void *arg)
rcu_register_thread();
*(struct rcu_reader_data **)arg = &rcu_reader;
*(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
qatomic_inc(&nthreadsrunning);
while (goflag == GOFLAG_INIT) {
g_usleep(1000);
@ -148,7 +148,7 @@ static void *rcu_update_perf_test(void *arg)
rcu_register_thread();
*(struct rcu_reader_data **)arg = &rcu_reader;
*(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
qatomic_inc(&nthreadsrunning);
while (goflag == GOFLAG_INIT) {
g_usleep(1000);
@ -253,7 +253,7 @@ static void *rcu_read_stress_test(void *arg)
rcu_register_thread();
*(struct rcu_reader_data **)arg = &rcu_reader;
*(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
while (goflag == GOFLAG_INIT) {
g_usleep(1000);
}
@ -304,7 +304,7 @@ static void *rcu_update_stress_test(void *arg)
struct rcu_stress *cp = qatomic_read(&rcu_stress_current);
rcu_register_thread();
*(struct rcu_reader_data **)arg = &rcu_reader;
*(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
while (goflag == GOFLAG_INIT) {
g_usleep(1000);
@ -347,7 +347,7 @@ static void *rcu_fake_update_stress_test(void *arg)
{
rcu_register_thread();
*(struct rcu_reader_data **)arg = &rcu_reader;
*(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
while (goflag == GOFLAG_INIT) {
g_usleep(1000);
}

View File

@ -279,10 +279,10 @@ static void test_sync_op_check(BdrvChild *c)
g_assert_cmpint(ret, ==, -ENOTSUP);
}
static void test_sync_op_invalidate_cache(BdrvChild *c)
static void test_sync_op_activate(BdrvChild *c)
{
/* Early success: Image is not inactive */
bdrv_invalidate_cache(c->bs, NULL);
bdrv_activate(c->bs, NULL);
}
@ -325,8 +325,8 @@ const SyncOpTest sync_op_tests[] = {
.name = "/sync-op/check",
.fn = test_sync_op_check,
}, {
.name = "/sync-op/invalidate_cache",
.fn = test_sync_op_invalidate_cache,
.name = "/sync-op/activate",
.fn = test_sync_op_activate,
},
};

View File

@ -171,7 +171,7 @@ static void *rcu_q_reader(void *arg)
rcu_register_thread();
*(struct rcu_reader_data **)arg = &rcu_reader;
*(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
qatomic_inc(&nthreadsrunning);
while (qatomic_read(&goflag) == GOFLAG_INIT) {
g_usleep(1000);
@ -206,7 +206,7 @@ static void *rcu_q_updater(void *arg)
long long n_removed_local = 0;
struct list_element *el, *prev_el;
*(struct rcu_reader_data **)arg = &rcu_reader;
*(struct rcu_reader_data **)arg = get_ptr_rcu_reader();
qatomic_inc(&nthreadsrunning);
while (qatomic_read(&goflag) == GOFLAG_INIT) {
g_usleep(1000);

View File

@ -32,6 +32,7 @@
#include "qemu/rcu_queue.h"
#include "block/raw-aio.h"
#include "qemu/coroutine_int.h"
#include "qemu/coroutine-tls.h"
#include "trace.h"
/***********************************************************/
@ -675,12 +676,13 @@ void aio_context_release(AioContext *ctx)
qemu_rec_mutex_unlock(&ctx->lock);
}
static __thread AioContext *my_aiocontext;
QEMU_DEFINE_STATIC_CO_TLS(AioContext *, my_aiocontext)
AioContext *qemu_get_current_aio_context(void)
{
if (my_aiocontext) {
return my_aiocontext;
AioContext *ctx = get_my_aiocontext();
if (ctx) {
return ctx;
}
if (qemu_mutex_iothread_locked()) {
/* Possibly in a vCPU thread. */
@ -691,6 +693,6 @@ AioContext *qemu_get_current_aio_context(void)
void qemu_set_current_aio_context(AioContext *ctx)
{
assert(!my_aiocontext);
my_aiocontext = ctx;
assert(!get_my_aiocontext());
set_my_aiocontext(ctx);
}

View File

@ -65,7 +65,7 @@ static inline int rcu_gp_ongoing(unsigned long *ctr)
/* Written to only by each individual reader. Read by both the reader and the
* writers.
*/
__thread struct rcu_reader_data rcu_reader;
QEMU_DEFINE_CO_TLS(struct rcu_reader_data, rcu_reader)
/* Protected by rcu_registry_lock. */
typedef QLIST_HEAD(, rcu_reader_data) ThreadList;
@ -355,23 +355,23 @@ void drain_call_rcu(void)
void rcu_register_thread(void)
{
assert(rcu_reader.ctr == 0);
assert(get_ptr_rcu_reader()->ctr == 0);
qemu_mutex_lock(&rcu_registry_lock);
QLIST_INSERT_HEAD(&registry, &rcu_reader, node);
QLIST_INSERT_HEAD(&registry, get_ptr_rcu_reader(), node);
qemu_mutex_unlock(&rcu_registry_lock);
}
void rcu_unregister_thread(void)
{
qemu_mutex_lock(&rcu_registry_lock);
QLIST_REMOVE(&rcu_reader, node);
QLIST_REMOVE(get_ptr_rcu_reader(), node);
qemu_mutex_unlock(&rcu_registry_lock);
}
void rcu_add_force_rcu_notifier(Notifier *n)
{
qemu_mutex_lock(&rcu_registry_lock);
notifier_list_add(&rcu_reader.force_rcu, n);
notifier_list_add(&get_ptr_rcu_reader()->force_rcu, n);
qemu_mutex_unlock(&rcu_registry_lock);
}