Block layer patches:

- qemu-img create: Fail gracefully when backing file is an empty string
 - Fixes related to filter block nodes ("Deal with filters" series)
 - block/nvme: Various cleanups required to use multiple queues
 - block/nvme: Use NvmeBar structure from "block/nvme.h"
 - file-win32: Fix "locking" option
 - iotests: Allow running from different directory
 -----BEGIN PGP SIGNATURE-----
 
 iQJFBAABCAAvFiEE3D3rFZqa+V09dFb+fwmycsiPL9YFAl9Z7bcRHGt3b2xmQHJl
 ZGhhdC5jb20ACgkQfwmycsiPL9ZS6w//bos+A0RfRRF0YFWkIBLQWxqzKcGvMJ8W
 XWv3mFzd47UaDgRYwVnCC3CR6bLYEINISngZ3geA4jI1+w7AtYKDOO0HN32dUg+D
 ZrNMn02701CA6qkmpxJ+yjsrl9ltR3jYe0me4Wr39Pvdexa2pl/e+M4Vas6FhkYL
 ghAwNThypscGCrFjAlz3ru2Sc/K+sPWrGoqkzr+SWvsm9wy4vb8aLxr8Yy50x/zc
 CqALS9SQ/YA93BCVi9CzPkVyV3ioA0kg/y38WvLtAQ9GZ3m/ekMro3WvdYsRsFCN
 LGXsuwFig+U7Kd7lJrCS9TLnlTJstNGqPq9jEoV5cThPvGknFfMvVOzRmmP7tzqT
 YRcPRy39z44OoLKa3kyg3aF38BTxt+9gPqBnivKMr9j9EecMvPsXXHRvF+lP+LsP
 j753Ih561hX6FurcjX8pc9GOM2cQA0GjlyL77UTTAmLZyFXP/8e55oQbBuYTylc/
 Xlvmc/T+yEGiEGTnK+FxgDAiUaxbCCM9cDVStJjTvsIq43dwXb48g1onDsGZ5eDf
 j9lmAD6TJxHNOB5ErNsDPODf4/D1wJ9t9WVF8UZp9ArfPHRdxMzT7Q4LvetaDmVl
 +hQC9cgTq8Qd8LwSqbKEYua4L6iGbmLAT7/N6htq5L1eVLg76/tLg/tKSwh/vKAY
 yzPmyHaVK84=
 =gaaW
 -----END PGP SIGNATURE-----

Merge remote-tracking branch 'remotes/kevin/tags/for-upstream' into staging

Block layer patches:

- qemu-img create: Fail gracefully when backing file is an empty string
- Fixes related to filter block nodes ("Deal with filters" series)
- block/nvme: Various cleanups required to use multiple queues
- block/nvme: Use NvmeBar structure from "block/nvme.h"
- file-win32: Fix "locking" option
- iotests: Allow running from different directory

# gpg: Signature made Thu 10 Sep 2020 10:11:19 BST
# gpg:                using RSA key DC3DEB159A9AF95D3D7456FE7F09B272C88F2FD6
# gpg:                issuer "kwolf@redhat.com"
# gpg: Good signature from "Kevin Wolf <kwolf@redhat.com>" [full]
# Primary key fingerprint: DC3D EB15 9A9A F95D 3D74  56FE 7F09 B272 C88F 2FD6

* remotes/kevin/tags/for-upstream: (65 commits)
  block/qcow2-cluster: Add missing "fallthrough" annotation
  block/nvme: Pair doorbell registers
  block/nvme: Use generic NvmeBar structure
  block/nvme: Group controller registers in NVMeRegs structure
  file-win32: Fix "locking" option
  iotests: Allow running from different directory
  iotests: Test committing to overridden backing
  iotests: Add test for commit in sub directory
  iotests: Add filter mirror test cases
  iotests: Add filter commit test cases
  iotests: Let complete_and_wait() work with commit
  iotests: Test that qcow2's data-file is flushed
  block: Leave BDS.backing_{file,format} constant
  block: Inline bdrv_co_block_status_from_*()
  blockdev: Fix active commit choice
  block: Drop backing_bs()
  qemu-img: Use child access functions
  nbd: Use CAF when looking for dirty bitmap
  commit: Deal with filters
  backup: Deal with filters
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2020-09-11 14:47:49 +01:00
commit 2499453eb1
49 changed files with 1764 additions and 555 deletions

492
block.c
View File

@ -712,11 +712,12 @@ int coroutine_fn bdrv_co_delete_file(BlockDriverState *bs, Error **errp)
int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz) int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
{ {
BlockDriver *drv = bs->drv; BlockDriver *drv = bs->drv;
BlockDriverState *filtered = bdrv_filter_bs(bs);
if (drv && drv->bdrv_probe_blocksizes) { if (drv && drv->bdrv_probe_blocksizes) {
return drv->bdrv_probe_blocksizes(bs, bsz); return drv->bdrv_probe_blocksizes(bs, bsz);
} else if (drv && drv->is_filter && bs->file) { } else if (filtered) {
return bdrv_probe_blocksizes(bs->file->bs, bsz); return bdrv_probe_blocksizes(filtered, bsz);
} }
return -ENOTSUP; return -ENOTSUP;
@ -731,11 +732,12 @@ int bdrv_probe_blocksizes(BlockDriverState *bs, BlockSizes *bsz)
int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo) int bdrv_probe_geometry(BlockDriverState *bs, HDGeometry *geo)
{ {
BlockDriver *drv = bs->drv; BlockDriver *drv = bs->drv;
BlockDriverState *filtered = bdrv_filter_bs(bs);
if (drv && drv->bdrv_probe_geometry) { if (drv && drv->bdrv_probe_geometry) {
return drv->bdrv_probe_geometry(bs, geo); return drv->bdrv_probe_geometry(bs, geo);
} else if (drv && drv->is_filter && bs->file) { } else if (filtered) {
return bdrv_probe_geometry(bs->file->bs, geo); return bdrv_probe_geometry(filtered, geo);
} }
return -ENOTSUP; return -ENOTSUP;
@ -1153,10 +1155,6 @@ static void bdrv_backing_attach(BdrvChild *c)
bdrv_refresh_filename(backing_hd); bdrv_refresh_filename(backing_hd);
parent->open_flags &= ~BDRV_O_NO_BACKING; parent->open_flags &= ~BDRV_O_NO_BACKING;
pstrcpy(parent->backing_file, sizeof(parent->backing_file),
backing_hd->filename);
pstrcpy(parent->backing_format, sizeof(parent->backing_format),
backing_hd->drv ? backing_hd->drv->format_name : "");
bdrv_op_block_all(backing_hd, parent->backing_blocker); bdrv_op_block_all(backing_hd, parent->backing_blocker);
/* Otherwise we won't be able to commit or stream */ /* Otherwise we won't be able to commit or stream */
@ -2612,12 +2610,15 @@ static void bdrv_replace_child_noperm(BdrvChild *child,
* If @new_bs is not NULL, bdrv_check_perm() must be called beforehand, as this * If @new_bs is not NULL, bdrv_check_perm() must be called beforehand, as this
* function uses bdrv_set_perm() to update the permissions according to the new * function uses bdrv_set_perm() to update the permissions according to the new
* reference that @new_bs gets. * reference that @new_bs gets.
*
* Callers must ensure that child->frozen is false.
*/ */
static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs) static void bdrv_replace_child(BdrvChild *child, BlockDriverState *new_bs)
{ {
BlockDriverState *old_bs = child->bs; BlockDriverState *old_bs = child->bs;
uint64_t perm, shared_perm; uint64_t perm, shared_perm;
/* Asserts that child->frozen == false */
bdrv_replace_child_noperm(child, new_bs); bdrv_replace_child_noperm(child, new_bs);
/* /*
@ -2778,6 +2779,7 @@ static void bdrv_detach_child(BdrvChild *child)
g_free(child); g_free(child);
} }
/* Callers must ensure that child->frozen is false. */
void bdrv_root_unref_child(BdrvChild *child) void bdrv_root_unref_child(BdrvChild *child)
{ {
BlockDriverState *child_bs; BlockDriverState *child_bs;
@ -2815,6 +2817,7 @@ static void bdrv_unset_inherits_from(BlockDriverState *root, BdrvChild *child)
} }
} }
/* Callers must ensure that child->frozen is false. */
void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child) void bdrv_unref_child(BlockDriverState *parent, BdrvChild *child)
{ {
if (child == NULL) { if (child == NULL) {
@ -2863,7 +2866,7 @@ static BdrvChildRole bdrv_backing_role(BlockDriverState *bs)
} }
/* /*
* Sets the backing file link of a BDS. A new reference is created; callers * Sets the bs->backing link of a BDS. A new reference is created; callers
* which don't need their own reference any more must call bdrv_unref(). * which don't need their own reference any more must call bdrv_unref().
*/ */
void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd, void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
@ -2872,7 +2875,7 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
bool update_inherits_from = bdrv_chain_contains(bs, backing_hd) && bool update_inherits_from = bdrv_chain_contains(bs, backing_hd) &&
bdrv_inherits_from_recursive(backing_hd, bs); bdrv_inherits_from_recursive(backing_hd, bs);
if (bdrv_is_backing_chain_frozen(bs, backing_bs(bs), errp)) { if (bdrv_is_backing_chain_frozen(bs, child_bs(bs->backing), errp)) {
return; return;
} }
@ -2881,6 +2884,7 @@ void bdrv_set_backing_hd(BlockDriverState *bs, BlockDriverState *backing_hd,
} }
if (bs->backing) { if (bs->backing) {
/* Cannot be frozen, we checked that above */
bdrv_unref_child(bs, bs->backing); bdrv_unref_child(bs, bs->backing);
bs->backing = NULL; bs->backing = NULL;
} }
@ -3996,7 +4000,7 @@ static int bdrv_reopen_parse_backing(BDRVReopenState *reopen_state,
Error **errp) Error **errp)
{ {
BlockDriverState *bs = reopen_state->bs; BlockDriverState *bs = reopen_state->bs;
BlockDriverState *overlay_bs, *new_backing_bs; BlockDriverState *overlay_bs, *below_bs, *new_backing_bs;
QObject *value; QObject *value;
const char *str; const char *str;
@ -4035,26 +4039,57 @@ static int bdrv_reopen_parse_backing(BDRVReopenState *reopen_state,
} }
} }
/*
* Ensure that @bs can really handle backing files, because we are
* about to give it one (or swap the existing one)
*/
if (bs->drv->is_filter) {
/* Filters always have a file or a backing child */
if (!bs->backing) {
error_setg(errp, "'%s' is a %s filter node that does not support a "
"backing child", bs->node_name, bs->drv->format_name);
return -EINVAL;
}
} else if (!bs->drv->supports_backing) {
error_setg(errp, "Driver '%s' of node '%s' does not support backing "
"files", bs->drv->format_name, bs->node_name);
return -EINVAL;
}
/* /*
* Find the "actual" backing file by skipping all links that point * Find the "actual" backing file by skipping all links that point
* to an implicit node, if any (e.g. a commit filter node). * to an implicit node, if any (e.g. a commit filter node).
* We cannot use any of the bdrv_skip_*() functions here because
* those return the first explicit node, while we are looking for
* its overlay here.
*/ */
overlay_bs = bs; overlay_bs = bs;
while (backing_bs(overlay_bs) && backing_bs(overlay_bs)->implicit) { for (below_bs = bdrv_filter_or_cow_bs(overlay_bs);
overlay_bs = backing_bs(overlay_bs); below_bs && below_bs->implicit;
below_bs = bdrv_filter_or_cow_bs(overlay_bs))
{
overlay_bs = below_bs;
} }
/* If we want to replace the backing file we need some extra checks */ /* If we want to replace the backing file we need some extra checks */
if (new_backing_bs != backing_bs(overlay_bs)) { if (new_backing_bs != bdrv_filter_or_cow_bs(overlay_bs)) {
/* Check for implicit nodes between bs and its backing file */ /* Check for implicit nodes between bs and its backing file */
if (bs != overlay_bs) { if (bs != overlay_bs) {
error_setg(errp, "Cannot change backing link if '%s' has " error_setg(errp, "Cannot change backing link if '%s' has "
"an implicit backing file", bs->node_name); "an implicit backing file", bs->node_name);
return -EPERM; return -EPERM;
} }
/* Check if the backing link that we want to replace is frozen */ /*
if (bdrv_is_backing_chain_frozen(overlay_bs, backing_bs(overlay_bs), * Check if the backing link that we want to replace is frozen.
errp)) { * Note that
* bdrv_filter_or_cow_child(overlay_bs) == overlay_bs->backing,
* because we know that overlay_bs == bs, and that @bs
* either is a filter that uses ->backing or a COW format BDS
* with bs->drv->supports_backing == true.
*/
if (bdrv_is_backing_chain_frozen(overlay_bs,
child_bs(overlay_bs->backing), errp))
{
return -EPERM; return -EPERM;
} }
reopen_state->replace_backing_bs = true; reopen_state->replace_backing_bs = true;
@ -4203,7 +4238,7 @@ int bdrv_reopen_prepare(BDRVReopenState *reopen_state, BlockReopenQueue *queue,
* its metadata. Otherwise the 'backing' option can be omitted. * its metadata. Otherwise the 'backing' option can be omitted.
*/ */
if (drv->supports_backing && reopen_state->backing_missing && if (drv->supports_backing && reopen_state->backing_missing &&
(backing_bs(reopen_state->bs) || reopen_state->bs->backing_file[0])) { (reopen_state->bs->backing || reopen_state->bs->backing_file[0])) {
error_setg(errp, "backing is missing for '%s'", error_setg(errp, "backing is missing for '%s'",
reopen_state->bs->node_name); reopen_state->bs->node_name);
ret = -EINVAL; ret = -EINVAL;
@ -4344,7 +4379,7 @@ void bdrv_reopen_commit(BDRVReopenState *reopen_state)
* from bdrv_set_backing_hd()) has the new values. * from bdrv_set_backing_hd()) has the new values.
*/ */
if (reopen_state->replace_backing_bs) { if (reopen_state->replace_backing_bs) {
BlockDriverState *old_backing_bs = backing_bs(bs); BlockDriverState *old_backing_bs = child_bs(bs->backing);
assert(!old_backing_bs || !old_backing_bs->implicit); assert(!old_backing_bs || !old_backing_bs->implicit);
/* Abort the permission update on the backing bs we're detaching */ /* Abort the permission update on the backing bs we're detaching */
if (old_backing_bs) { if (old_backing_bs) {
@ -4387,6 +4422,7 @@ static void bdrv_close(BlockDriverState *bs)
if (bs->drv) { if (bs->drv) {
if (bs->drv->bdrv_close) { if (bs->drv->bdrv_close) {
/* Must unfreeze all children, so bdrv_unref_child() works */
bs->drv->bdrv_close(bs); bs->drv->bdrv_close(bs);
} }
bs->drv = NULL; bs->drv = NULL;
@ -4736,9 +4772,9 @@ int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file,
} }
/* /*
* Finds the image layer in the chain that has 'bs' as its backing file. * Finds the first non-filter node above bs in the chain between
* * active and bs. The returned node is either an immediate parent of
* active is the current topmost image. * bs, or there are only filter nodes between the two.
* *
* Returns NULL if bs is not found in active's image chain, * Returns NULL if bs is not found in active's image chain,
* or if active == bs. * or if active == bs.
@ -4748,11 +4784,18 @@ int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file,
BlockDriverState *bdrv_find_overlay(BlockDriverState *active, BlockDriverState *bdrv_find_overlay(BlockDriverState *active,
BlockDriverState *bs) BlockDriverState *bs)
{ {
while (active && bs != backing_bs(active)) { bs = bdrv_skip_filters(bs);
active = backing_bs(active); active = bdrv_skip_filters(active);
while (active) {
BlockDriverState *next = bdrv_backing_chain_next(active);
if (bs == next) {
return active;
}
active = next;
} }
return active; return NULL;
} }
/* Given a BDS, searches for the base layer. */ /* Given a BDS, searches for the base layer. */
@ -4762,20 +4805,22 @@ BlockDriverState *bdrv_find_base(BlockDriverState *bs)
} }
/* /*
* Return true if at least one of the backing links between @bs and * Return true if at least one of the COW (backing) and filter links
* @base is frozen. @errp is set if that's the case. * between @bs and @base is frozen. @errp is set if that's the case.
* @base must be reachable from @bs, or NULL. * @base must be reachable from @bs, or NULL.
*/ */
bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base, bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base,
Error **errp) Error **errp)
{ {
BlockDriverState *i; BlockDriverState *i;
BdrvChild *child;
for (i = bs; i != base; i = backing_bs(i)) { for (i = bs; i != base; i = child_bs(child)) {
if (i->backing && i->backing->frozen) { child = bdrv_filter_or_cow_child(i);
if (child && child->frozen) {
error_setg(errp, "Cannot change '%s' link from '%s' to '%s'", error_setg(errp, "Cannot change '%s' link from '%s' to '%s'",
i->backing->name, i->node_name, child->name, i->node_name, child->bs->node_name);
backing_bs(i)->node_name);
return true; return true;
} }
} }
@ -4784,7 +4829,7 @@ bool bdrv_is_backing_chain_frozen(BlockDriverState *bs, BlockDriverState *base,
} }
/* /*
* Freeze all backing links between @bs and @base. * Freeze all COW (backing) and filter links between @bs and @base.
* If any of the links is already frozen the operation is aborted and * If any of the links is already frozen the operation is aborted and
* none of the links are modified. * none of the links are modified.
* @base must be reachable from @bs, or NULL. * @base must be reachable from @bs, or NULL.
@ -4794,22 +4839,25 @@ int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base,
Error **errp) Error **errp)
{ {
BlockDriverState *i; BlockDriverState *i;
BdrvChild *child;
if (bdrv_is_backing_chain_frozen(bs, base, errp)) { if (bdrv_is_backing_chain_frozen(bs, base, errp)) {
return -EPERM; return -EPERM;
} }
for (i = bs; i != base; i = backing_bs(i)) { for (i = bs; i != base; i = child_bs(child)) {
if (i->backing && backing_bs(i)->never_freeze) { child = bdrv_filter_or_cow_child(i);
if (child && child->bs->never_freeze) {
error_setg(errp, "Cannot freeze '%s' link to '%s'", error_setg(errp, "Cannot freeze '%s' link to '%s'",
i->backing->name, backing_bs(i)->node_name); child->name, child->bs->node_name);
return -EPERM; return -EPERM;
} }
} }
for (i = bs; i != base; i = backing_bs(i)) { for (i = bs; i != base; i = child_bs(child)) {
if (i->backing) { child = bdrv_filter_or_cow_child(i);
i->backing->frozen = true; if (child) {
child->frozen = true;
} }
} }
@ -4817,18 +4865,21 @@ int bdrv_freeze_backing_chain(BlockDriverState *bs, BlockDriverState *base,
} }
/* /*
* Unfreeze all backing links between @bs and @base. The caller must * Unfreeze all COW (backing) and filter links between @bs and @base.
* ensure that all links are frozen before using this function. * The caller must ensure that all links are frozen before using this
* function.
* @base must be reachable from @bs, or NULL. * @base must be reachable from @bs, or NULL.
*/ */
void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base) void bdrv_unfreeze_backing_chain(BlockDriverState *bs, BlockDriverState *base)
{ {
BlockDriverState *i; BlockDriverState *i;
BdrvChild *child;
for (i = bs; i != base; i = backing_bs(i)) { for (i = bs; i != base; i = child_bs(child)) {
if (i->backing) { child = bdrv_filter_or_cow_child(i);
assert(i->backing->frozen); if (child) {
i->backing->frozen = false; assert(child->frozen);
child->frozen = false;
} }
} }
} }
@ -4896,9 +4947,7 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
* other intermediate nodes have been dropped. * other intermediate nodes have been dropped.
* If 'top' is an implicit node (e.g. "commit_top") we should skip * If 'top' is an implicit node (e.g. "commit_top") we should skip
* it because no one inherits from it. We use explicit_top for that. */ * it because no one inherits from it. We use explicit_top for that. */
while (explicit_top && explicit_top->implicit) { explicit_top = bdrv_skip_implicit_filters(explicit_top);
explicit_top = backing_bs(explicit_top);
}
update_inherits_from = bdrv_inherits_from_recursive(base, explicit_top); update_inherits_from = bdrv_inherits_from_recursive(base, explicit_top);
/* success - we can delete the intermediate states, and link top->base */ /* success - we can delete the intermediate states, and link top->base */
@ -4931,8 +4980,11 @@ int bdrv_drop_intermediate(BlockDriverState *top, BlockDriverState *base,
} }
} }
/* Do the actual switch in the in-memory graph. /*
* Completes bdrv_check_update_perm() transaction internally. */ * Do the actual switch in the in-memory graph.
* Completes bdrv_check_update_perm() transaction internally.
* c->frozen is false, we have checked that above.
*/
bdrv_ref(base); bdrv_ref(base);
bdrv_replace_child(c, base); bdrv_replace_child(c, base);
bdrv_unref(top); bdrv_unref(top);
@ -4949,6 +5001,31 @@ exit:
return ret; return ret;
} }
/**
* Implementation of BlockDriver.bdrv_get_allocated_file_size() that
* sums the size of all data-bearing children. (This excludes backing
* children.)
*/
static int64_t bdrv_sum_allocated_file_size(BlockDriverState *bs)
{
BdrvChild *child;
int64_t child_size, sum = 0;
QLIST_FOREACH(child, &bs->children, next) {
if (child->role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA |
BDRV_CHILD_FILTERED))
{
child_size = bdrv_get_allocated_file_size(child->bs);
if (child_size < 0) {
return child_size;
}
sum += child_size;
}
}
return sum;
}
/** /**
* Length of a allocated file in bytes. Sparse files are counted by actual * Length of a allocated file in bytes. Sparse files are counted by actual
* allocated space. Return < 0 if error or unknown. * allocated space. Return < 0 if error or unknown.
@ -4962,10 +5039,21 @@ int64_t bdrv_get_allocated_file_size(BlockDriverState *bs)
if (drv->bdrv_get_allocated_file_size) { if (drv->bdrv_get_allocated_file_size) {
return drv->bdrv_get_allocated_file_size(bs); return drv->bdrv_get_allocated_file_size(bs);
} }
if (bs->file) {
return bdrv_get_allocated_file_size(bs->file->bs); if (drv->bdrv_file_open) {
} /*
* Protocol drivers default to -ENOTSUP (most of their data is
* not stored in any of their children (if they even have any),
* so there is no generic way to figure it out).
*/
return -ENOTSUP; return -ENOTSUP;
} else if (drv->is_filter) {
/* Filter drivers default to the size of their filtered child */
return bdrv_get_allocated_file_size(bdrv_filter_bs(bs));
} else {
/* Other drivers default to summing their children's sizes */
return bdrv_sum_allocated_file_size(bs);
}
} }
/* /*
@ -5047,12 +5135,27 @@ bool bdrv_is_sg(BlockDriverState *bs)
return bs->sg; return bs->sg;
} }
bool bdrv_is_encrypted(BlockDriverState *bs) /**
* Return whether the given node supports compressed writes.
*/
bool bdrv_supports_compressed_writes(BlockDriverState *bs)
{ {
if (bs->backing && bs->backing->bs->encrypted) { BlockDriverState *filtered;
return true;
if (!bs->drv || !block_driver_can_compress(bs->drv)) {
return false;
} }
return bs->encrypted;
filtered = bdrv_filter_bs(bs);
if (filtered) {
/*
* Filters can only forward compressed writes, so we have to
* check the child.
*/
return bdrv_supports_compressed_writes(filtered);
}
return true;
} }
const char *bdrv_get_format_name(BlockDriverState *bs) const char *bdrv_get_format_name(BlockDriverState *bs)
@ -5337,7 +5440,7 @@ BlockDriverState *bdrv_lookup_bs(const char *device,
bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base) bool bdrv_chain_contains(BlockDriverState *top, BlockDriverState *base)
{ {
while (top && top != base) { while (top && top != base) {
top = backing_bs(top); top = bdrv_filter_or_cow_bs(top);
} }
return top != NULL; return top != NULL;
@ -5409,20 +5512,24 @@ int bdrv_has_zero_init_1(BlockDriverState *bs)
int bdrv_has_zero_init(BlockDriverState *bs) int bdrv_has_zero_init(BlockDriverState *bs)
{ {
BlockDriverState *filtered;
if (!bs->drv) { if (!bs->drv) {
return 0; return 0;
} }
/* If BS is a copy on write image, it is initialized to /* If BS is a copy on write image, it is initialized to
the contents of the base image, which may not be zeroes. */ the contents of the base image, which may not be zeroes. */
if (bs->backing) { if (bdrv_cow_child(bs)) {
return 0; return 0;
} }
if (bs->drv->bdrv_has_zero_init) { if (bs->drv->bdrv_has_zero_init) {
return bs->drv->bdrv_has_zero_init(bs); return bs->drv->bdrv_has_zero_init(bs);
} }
if (bs->file && bs->drv->is_filter) {
return bdrv_has_zero_init(bs->file->bs); filtered = bdrv_filter_bs(bs);
if (filtered) {
return bdrv_has_zero_init(filtered);
} }
/* safe default */ /* safe default */
@ -5452,8 +5559,9 @@ int bdrv_get_info(BlockDriverState *bs, BlockDriverInfo *bdi)
return -ENOMEDIUM; return -ENOMEDIUM;
} }
if (!drv->bdrv_get_info) { if (!drv->bdrv_get_info) {
if (bs->file && drv->is_filter) { BlockDriverState *filtered = bdrv_filter_bs(bs);
return bdrv_get_info(bs->file->bs, bdi); if (filtered) {
return bdrv_get_info(filtered, bdi);
} }
return -ENOTSUP; return -ENOTSUP;
} }
@ -5492,17 +5600,7 @@ void bdrv_debug_event(BlockDriverState *bs, BlkdebugEvent event)
static BlockDriverState *bdrv_find_debug_node(BlockDriverState *bs) static BlockDriverState *bdrv_find_debug_node(BlockDriverState *bs)
{ {
while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) { while (bs && bs->drv && !bs->drv->bdrv_debug_breakpoint) {
if (bs->file) { bs = bdrv_primary_bs(bs);
bs = bs->file->bs;
continue;
}
if (bs->drv->is_filter && bs->backing) {
bs = bs->backing->bs;
continue;
}
break;
} }
if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) { if (bs && bs->drv && bs->drv->bdrv_debug_breakpoint) {
@ -5537,7 +5635,7 @@ int bdrv_debug_remove_breakpoint(BlockDriverState *bs, const char *tag)
int bdrv_debug_resume(BlockDriverState *bs, const char *tag) int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
{ {
while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) { while (bs && (!bs->drv || !bs->drv->bdrv_debug_resume)) {
bs = bs->file ? bs->file->bs : NULL; bs = bdrv_primary_bs(bs);
} }
if (bs && bs->drv && bs->drv->bdrv_debug_resume) { if (bs && bs->drv && bs->drv->bdrv_debug_resume) {
@ -5550,7 +5648,7 @@ int bdrv_debug_resume(BlockDriverState *bs, const char *tag)
bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag) bool bdrv_debug_is_suspended(BlockDriverState *bs, const char *tag)
{ {
while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) { while (bs && bs->drv && !bs->drv->bdrv_debug_is_suspended) {
bs = bs->file ? bs->file->bs : NULL; bs = bdrv_primary_bs(bs);
} }
if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) { if (bs && bs->drv && bs->drv->bdrv_debug_is_suspended) {
@ -5571,8 +5669,10 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
char *backing_file_full = NULL; char *backing_file_full = NULL;
char *filename_tmp = NULL; char *filename_tmp = NULL;
int is_protocol = 0; int is_protocol = 0;
bool filenames_refreshed = false;
BlockDriverState *curr_bs = NULL; BlockDriverState *curr_bs = NULL;
BlockDriverState *retval = NULL; BlockDriverState *retval = NULL;
BlockDriverState *bs_below;
if (!bs || !bs->drv || !backing_file) { if (!bs || !bs->drv || !backing_file) {
return NULL; return NULL;
@ -5583,15 +5683,47 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
is_protocol = path_has_protocol(backing_file); is_protocol = path_has_protocol(backing_file);
for (curr_bs = bs; curr_bs->backing; curr_bs = curr_bs->backing->bs) { /*
* Being largely a legacy function, skip any filters here
* (because filters do not have normal filenames, so they cannot
* match anyway; and allowing json:{} filenames is a bit out of
* scope).
*/
for (curr_bs = bdrv_skip_filters(bs);
bdrv_cow_child(curr_bs) != NULL;
curr_bs = bs_below)
{
bs_below = bdrv_backing_chain_next(curr_bs);
/* If either of the filename paths is actually a protocol, then if (bdrv_backing_overridden(curr_bs)) {
* compare unmodified paths; otherwise make paths relative */ /*
if (is_protocol || path_has_protocol(curr_bs->backing_file)) { * If the backing file was overridden, we can only compare
* directly against the backing node's filename.
*/
if (!filenames_refreshed) {
/*
* This will automatically refresh all of the
* filenames in the rest of the backing chain, so we
* only need to do this once.
*/
bdrv_refresh_filename(bs_below);
filenames_refreshed = true;
}
if (strcmp(backing_file, bs_below->filename) == 0) {
retval = bs_below;
break;
}
} else if (is_protocol || path_has_protocol(curr_bs->backing_file)) {
/*
* If either of the filename paths is actually a protocol, then
* compare unmodified paths; otherwise make paths relative.
*/
char *backing_file_full_ret; char *backing_file_full_ret;
if (strcmp(backing_file, curr_bs->backing_file) == 0) { if (strcmp(backing_file, curr_bs->backing_file) == 0) {
retval = curr_bs->backing->bs; retval = bs_below;
break; break;
} }
/* Also check against the full backing filename for the image */ /* Also check against the full backing filename for the image */
@ -5601,7 +5733,7 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
bool equal = strcmp(backing_file, backing_file_full_ret) == 0; bool equal = strcmp(backing_file, backing_file_full_ret) == 0;
g_free(backing_file_full_ret); g_free(backing_file_full_ret);
if (equal) { if (equal) {
retval = curr_bs->backing->bs; retval = bs_below;
break; break;
} }
} }
@ -5627,7 +5759,7 @@ BlockDriverState *bdrv_find_backing_image(BlockDriverState *bs,
g_free(filename_tmp); g_free(filename_tmp);
if (strcmp(backing_file_full, filename_full) == 0) { if (strcmp(backing_file_full, filename_full) == 0) {
retval = curr_bs->backing->bs; retval = bs_below;
break; break;
} }
} }
@ -6119,6 +6251,10 @@ void bdrv_img_create(const char *filename, const char *fmt,
"same filename as the backing file"); "same filename as the backing file");
goto out; goto out;
} }
if (backing_file[0] == '\0') {
error_setg(errp, "Expected backing file name, got empty string");
goto out;
}
} }
backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT); backing_fmt = qemu_opt_get(opts, BLOCK_OPT_BACKING_FMT);
@ -6534,6 +6670,8 @@ int bdrv_amend_options(BlockDriverState *bs, QemuOpts *opts,
bool bdrv_recurse_can_replace(BlockDriverState *bs, bool bdrv_recurse_can_replace(BlockDriverState *bs,
BlockDriverState *to_replace) BlockDriverState *to_replace)
{ {
BlockDriverState *filtered;
if (!bs || !bs->drv) { if (!bs || !bs->drv) {
return false; return false;
} }
@ -6548,9 +6686,9 @@ bool bdrv_recurse_can_replace(BlockDriverState *bs,
} }
/* For filters without an own implementation, we can recurse on our own */ /* For filters without an own implementation, we can recurse on our own */
if (bs->drv->is_filter) { filtered = bdrv_filter_bs(bs);
BdrvChild *child = bs->file ?: bs->backing; if (filtered) {
return bdrv_recurse_can_replace(child->bs, to_replace); return bdrv_recurse_can_replace(filtered, to_replace);
} }
/* Safe default */ /* Safe default */
@ -6701,7 +6839,7 @@ static bool append_strong_runtime_options(QDict *d, BlockDriverState *bs)
/* Note: This function may return false positives; it may return true /* Note: This function may return false positives; it may return true
* even if opening the backing file specified by bs's image header * even if opening the backing file specified by bs's image header
* would result in exactly bs->backing. */ * would result in exactly bs->backing. */
static bool bdrv_backing_overridden(BlockDriverState *bs) bool bdrv_backing_overridden(BlockDriverState *bs)
{ {
if (bs->backing) { if (bs->backing) {
return strcmp(bs->auto_backing_file, return strcmp(bs->auto_backing_file,
@ -6729,6 +6867,7 @@ void bdrv_refresh_filename(BlockDriverState *bs)
{ {
BlockDriver *drv = bs->drv; BlockDriver *drv = bs->drv;
BdrvChild *child; BdrvChild *child;
BlockDriverState *primary_child_bs;
QDict *opts; QDict *opts;
bool backing_overridden; bool backing_overridden;
bool generate_json_filename; /* Whether our default implementation should bool generate_json_filename; /* Whether our default implementation should
@ -6798,20 +6937,30 @@ void bdrv_refresh_filename(BlockDriverState *bs)
qobject_unref(bs->full_open_options); qobject_unref(bs->full_open_options);
bs->full_open_options = opts; bs->full_open_options = opts;
primary_child_bs = bdrv_primary_bs(bs);
if (drv->bdrv_refresh_filename) { if (drv->bdrv_refresh_filename) {
/* Obsolete information is of no use here, so drop the old file name /* Obsolete information is of no use here, so drop the old file name
* information before refreshing it */ * information before refreshing it */
bs->exact_filename[0] = '\0'; bs->exact_filename[0] = '\0';
drv->bdrv_refresh_filename(bs); drv->bdrv_refresh_filename(bs);
} else if (bs->file) { } else if (primary_child_bs) {
/* Try to reconstruct valid information from the underlying file */ /*
* Try to reconstruct valid information from the underlying
* file -- this only works for format nodes (filter nodes
* cannot be probed and as such must be selected by the user
* either through an options dict, or through a special
* filename which the filter driver must construct in its
* .bdrv_refresh_filename() implementation).
*/
bs->exact_filename[0] = '\0'; bs->exact_filename[0] = '\0';
/* /*
* We can use the underlying file's filename if: * We can use the underlying file's filename if:
* - it has a filename, * - it has a filename,
* - the current BDS is not a filter,
* - the file is a protocol BDS, and * - the file is a protocol BDS, and
* - opening that file (as this BDS's format) will automatically create * - opening that file (as this BDS's format) will automatically create
* the BDS tree we have right now, that is: * the BDS tree we have right now, that is:
@ -6820,11 +6969,11 @@ void bdrv_refresh_filename(BlockDriverState *bs)
* - no non-file child of this BDS has been overridden by the user * - no non-file child of this BDS has been overridden by the user
* Both of these conditions are represented by generate_json_filename. * Both of these conditions are represented by generate_json_filename.
*/ */
if (bs->file->bs->exact_filename[0] && if (primary_child_bs->exact_filename[0] &&
bs->file->bs->drv->bdrv_file_open && primary_child_bs->drv->bdrv_file_open &&
!generate_json_filename) !drv->is_filter && !generate_json_filename)
{ {
strcpy(bs->exact_filename, bs->file->bs->exact_filename); strcpy(bs->exact_filename, primary_child_bs->exact_filename);
} }
} }
@ -6844,6 +6993,7 @@ void bdrv_refresh_filename(BlockDriverState *bs)
char *bdrv_dirname(BlockDriverState *bs, Error **errp) char *bdrv_dirname(BlockDriverState *bs, Error **errp)
{ {
BlockDriver *drv = bs->drv; BlockDriver *drv = bs->drv;
BlockDriverState *child_bs;
if (!drv) { if (!drv) {
error_setg(errp, "Node '%s' is ejected", bs->node_name); error_setg(errp, "Node '%s' is ejected", bs->node_name);
@ -6854,8 +7004,9 @@ char *bdrv_dirname(BlockDriverState *bs, Error **errp)
return drv->bdrv_dirname(bs, errp); return drv->bdrv_dirname(bs, errp);
} }
if (bs->file) { child_bs = bdrv_primary_bs(bs);
return bdrv_dirname(bs->file->bs, errp); if (child_bs) {
return bdrv_dirname(child_bs, errp);
} }
bdrv_refresh_filename(bs); bdrv_refresh_filename(bs);
@ -6939,3 +7090,156 @@ int bdrv_make_empty(BdrvChild *c, Error **errp)
return 0; return 0;
} }
/*
* Return the child that @bs acts as an overlay for, and from which data may be
* copied in COW or COR operations. Usually this is the backing file.
*/
BdrvChild *bdrv_cow_child(BlockDriverState *bs)
{
if (!bs || !bs->drv) {
return NULL;
}
if (bs->drv->is_filter) {
return NULL;
}
if (!bs->backing) {
return NULL;
}
assert(bs->backing->role & BDRV_CHILD_COW);
return bs->backing;
}
/*
* If @bs acts as a filter for exactly one of its children, return
* that child.
*/
BdrvChild *bdrv_filter_child(BlockDriverState *bs)
{
BdrvChild *c;
if (!bs || !bs->drv) {
return NULL;
}
if (!bs->drv->is_filter) {
return NULL;
}
/* Only one of @backing or @file may be used */
assert(!(bs->backing && bs->file));
c = bs->backing ?: bs->file;
if (!c) {
return NULL;
}
assert(c->role & BDRV_CHILD_FILTERED);
return c;
}
/*
* Return either the result of bdrv_cow_child() or bdrv_filter_child(),
* whichever is non-NULL.
*
* Return NULL if both are NULL.
*/
BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs)
{
BdrvChild *cow_child = bdrv_cow_child(bs);
BdrvChild *filter_child = bdrv_filter_child(bs);
/* Filter nodes cannot have COW backing files */
assert(!(cow_child && filter_child));
return cow_child ?: filter_child;
}
/*
* Return the primary child of this node: For filters, that is the
* filtered child. For other nodes, that is usually the child storing
* metadata.
* (A generally more helpful description is that this is (usually) the
* child that has the same filename as @bs.)
*
* Drivers do not necessarily have a primary child; for example quorum
* does not.
*/
BdrvChild *bdrv_primary_child(BlockDriverState *bs)
{
BdrvChild *c, *found = NULL;
QLIST_FOREACH(c, &bs->children, next) {
if (c->role & BDRV_CHILD_PRIMARY) {
assert(!found);
found = c;
}
}
return found;
}
static BlockDriverState *bdrv_do_skip_filters(BlockDriverState *bs,
bool stop_on_explicit_filter)
{
BdrvChild *c;
if (!bs) {
return NULL;
}
while (!(stop_on_explicit_filter && !bs->implicit)) {
c = bdrv_filter_child(bs);
if (!c) {
/*
* A filter that is embedded in a working block graph must
* have a child. Assert this here so this function does
* not return a filter node that is not expected by the
* caller.
*/
assert(!bs->drv || !bs->drv->is_filter);
break;
}
bs = c->bs;
}
/*
* Note that this treats nodes with bs->drv == NULL as not being
* filters (bs->drv == NULL should be replaced by something else
* anyway).
* The advantage of this behavior is that this function will thus
* always return a non-NULL value (given a non-NULL @bs).
*/
return bs;
}
/*
* Return the first BDS that has not been added implicitly or that
* does not have a filtered child down the chain starting from @bs
* (including @bs itself).
*/
BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs)
{
return bdrv_do_skip_filters(bs, true);
}
/*
* Return the first BDS that does not have a filtered child down the
* chain starting from @bs (including @bs itself).
*/
BlockDriverState *bdrv_skip_filters(BlockDriverState *bs)
{
return bdrv_do_skip_filters(bs, false);
}
/*
* For a backing chain, return the first non-filter backing image of
* the first non-filter image.
*/
BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs)
{
return bdrv_skip_filters(bdrv_cow_bs(bdrv_skip_filters(bs)));
}

View File

@ -175,8 +175,6 @@ BlockDriver bdrv_backup_top_filter = {
.bdrv_co_pdiscard = backup_top_co_pdiscard, .bdrv_co_pdiscard = backup_top_co_pdiscard,
.bdrv_co_flush = backup_top_co_flush, .bdrv_co_flush = backup_top_co_flush,
.bdrv_co_block_status = bdrv_co_block_status_from_backing,
.bdrv_refresh_filename = backup_top_refresh_filename, .bdrv_refresh_filename = backup_top_refresh_filename,
.bdrv_child_perm = backup_top_child_perm, .bdrv_child_perm = backup_top_child_perm,
@ -281,7 +279,7 @@ void bdrv_backup_top_drop(BlockDriverState *bs)
s->active = false; s->active = false;
bdrv_child_refresh_perms(bs, bs->backing, &error_abort); bdrv_child_refresh_perms(bs, bs->backing, &error_abort);
bdrv_replace_node(bs, backing_bs(bs), &error_abort); bdrv_replace_node(bs, bs->backing->bs, &error_abort);
bdrv_set_backing_hd(bs, NULL, &error_abort); bdrv_set_backing_hd(bs, NULL, &error_abort);
bdrv_drained_end(bs); bdrv_drained_end(bs);

View File

@ -297,6 +297,7 @@ static int64_t backup_calculate_cluster_size(BlockDriverState *target,
{ {
int ret; int ret;
BlockDriverInfo bdi; BlockDriverInfo bdi;
bool target_does_cow = bdrv_backing_chain_next(target);
/* /*
* If there is no backing file on the target, we cannot rely on COW if our * If there is no backing file on the target, we cannot rely on COW if our
@ -304,7 +305,7 @@ static int64_t backup_calculate_cluster_size(BlockDriverState *target,
* targets with a backing file, try to avoid COW if possible. * targets with a backing file, try to avoid COW if possible.
*/ */
ret = bdrv_get_info(target, &bdi); ret = bdrv_get_info(target, &bdi);
if (ret == -ENOTSUP && !target->backing) { if (ret == -ENOTSUP && !target_does_cow) {
/* Cluster size is not defined */ /* Cluster size is not defined */
warn_report("The target block device doesn't provide " warn_report("The target block device doesn't provide "
"information about the block size and it doesn't have a " "information about the block size and it doesn't have a "
@ -313,14 +314,14 @@ static int64_t backup_calculate_cluster_size(BlockDriverState *target,
"this default, the backup may be unusable", "this default, the backup may be unusable",
BACKUP_CLUSTER_SIZE_DEFAULT); BACKUP_CLUSTER_SIZE_DEFAULT);
return BACKUP_CLUSTER_SIZE_DEFAULT; return BACKUP_CLUSTER_SIZE_DEFAULT;
} else if (ret < 0 && !target->backing) { } else if (ret < 0 && !target_does_cow) {
error_setg_errno(errp, -ret, error_setg_errno(errp, -ret,
"Couldn't determine the cluster size of the target image, " "Couldn't determine the cluster size of the target image, "
"which has no backing file"); "which has no backing file");
error_append_hint(errp, error_append_hint(errp,
"Aborting, since this may create an unusable destination image\n"); "Aborting, since this may create an unusable destination image\n");
return ret; return ret;
} else if (ret < 0 && target->backing) { } else if (ret < 0 && target_does_cow) {
/* Not fatal; just trudge on ahead. */ /* Not fatal; just trudge on ahead. */
return BACKUP_CLUSTER_SIZE_DEFAULT; return BACKUP_CLUSTER_SIZE_DEFAULT;
} }
@ -371,7 +372,7 @@ BlockJob *backup_job_create(const char *job_id, BlockDriverState *bs,
return NULL; return NULL;
} }
if (compress && !block_driver_can_compress(target->drv)) { if (compress && !bdrv_supports_compressed_writes(target)) {
error_setg(errp, "Compression is not supported for this drive %s", error_setg(errp, "Compression is not supported for this drive %s",
bdrv_get_device_name(target)); bdrv_get_device_name(target));
return NULL; return NULL;

View File

@ -752,8 +752,11 @@ static int coroutine_fn blkdebug_co_block_status(BlockDriverState *bs,
return err; return err;
} }
return bdrv_co_block_status_from_file(bs, want_zero, offset, bytes, assert(bs->file && bs->file->bs);
pnum, map, file); *pnum = bytes;
*map = offset;
*file = bs->file->bs;
return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
} }
static void blkdebug_close(BlockDriverState *bs) static void blkdebug_close(BlockDriverState *bs)

View File

@ -515,7 +515,6 @@ static BlockDriver bdrv_blk_log_writes = {
.bdrv_co_pwrite_zeroes = blk_log_writes_co_pwrite_zeroes, .bdrv_co_pwrite_zeroes = blk_log_writes_co_pwrite_zeroes,
.bdrv_co_flush_to_disk = blk_log_writes_co_flush_to_disk, .bdrv_co_flush_to_disk = blk_log_writes_co_flush_to_disk,
.bdrv_co_pdiscard = blk_log_writes_co_pdiscard, .bdrv_co_pdiscard = blk_log_writes_co_pdiscard,
.bdrv_co_block_status = bdrv_co_block_status_from_file,
.is_filter = true, .is_filter = true,
.strong_runtime_opts = blk_log_writes_strong_runtime_opts, .strong_runtime_opts = blk_log_writes_strong_runtime_opts,

View File

@ -2279,10 +2279,13 @@ int blk_commit_all(void)
while ((blk = blk_all_next(blk)) != NULL) { while ((blk = blk_all_next(blk)) != NULL) {
AioContext *aio_context = blk_get_aio_context(blk); AioContext *aio_context = blk_get_aio_context(blk);
BlockDriverState *unfiltered_bs = bdrv_skip_filters(blk_bs(blk));
aio_context_acquire(aio_context); aio_context_acquire(aio_context);
if (blk_is_inserted(blk) && blk->root->bs->backing) { if (blk_is_inserted(blk) && bdrv_cow_child(unfiltered_bs)) {
int ret = bdrv_commit(blk->root->bs); int ret;
ret = bdrv_commit(unfiltered_bs);
if (ret < 0) { if (ret < 0) {
aio_context_release(aio_context); aio_context_release(aio_context);
return ret; return ret;

View File

@ -440,8 +440,8 @@ static int block_copy_block_status(BlockCopyState *s, int64_t offset,
BlockDriverState *base; BlockDriverState *base;
int ret; int ret;
if (s->skip_unallocated && s->source->bs->backing) { if (s->skip_unallocated) {
base = s->source->bs->backing->bs; base = bdrv_backing_chain_next(s->source->bs);
} else { } else {
base = NULL; base = NULL;
} }

View File

@ -37,6 +37,7 @@ typedef struct CommitBlockJob {
BlockBackend *top; BlockBackend *top;
BlockBackend *base; BlockBackend *base;
BlockDriverState *base_bs; BlockDriverState *base_bs;
BlockDriverState *base_overlay;
BlockdevOnError on_error; BlockdevOnError on_error;
bool base_read_only; bool base_read_only;
bool chain_frozen; bool chain_frozen;
@ -89,7 +90,7 @@ static void commit_abort(Job *job)
* XXX Can (or should) we somehow keep 'consistent read' blocked even * XXX Can (or should) we somehow keep 'consistent read' blocked even
* after the failed/cancelled commit job is gone? If we already wrote * after the failed/cancelled commit job is gone? If we already wrote
* something to base, the intermediate images aren't valid any more. */ * something to base, the intermediate images aren't valid any more. */
bdrv_replace_node(s->commit_top_bs, backing_bs(s->commit_top_bs), bdrv_replace_node(s->commit_top_bs, s->commit_top_bs->backing->bs,
&error_abort); &error_abort);
bdrv_unref(s->commit_top_bs); bdrv_unref(s->commit_top_bs);
@ -153,7 +154,7 @@ static int coroutine_fn commit_run(Job *job, Error **errp)
break; break;
} }
/* Copy if allocated above the base */ /* Copy if allocated above the base */
ret = bdrv_is_allocated_above(blk_bs(s->top), blk_bs(s->base), false, ret = bdrv_is_allocated_above(blk_bs(s->top), s->base_overlay, true,
offset, COMMIT_BUFFER_SIZE, &n); offset, COMMIT_BUFFER_SIZE, &n);
copy = (ret == 1); copy = (ret == 1);
trace_commit_one_iteration(s, offset, n, ret); trace_commit_one_iteration(s, offset, n, ret);
@ -237,7 +238,6 @@ static void bdrv_commit_top_child_perm(BlockDriverState *bs, BdrvChild *c,
static BlockDriver bdrv_commit_top = { static BlockDriver bdrv_commit_top = {
.format_name = "commit_top", .format_name = "commit_top",
.bdrv_co_preadv = bdrv_commit_top_preadv, .bdrv_co_preadv = bdrv_commit_top_preadv,
.bdrv_co_block_status = bdrv_co_block_status_from_backing,
.bdrv_refresh_filename = bdrv_commit_top_refresh_filename, .bdrv_refresh_filename = bdrv_commit_top_refresh_filename,
.bdrv_child_perm = bdrv_commit_top_child_perm, .bdrv_child_perm = bdrv_commit_top_child_perm,
@ -253,15 +253,35 @@ void commit_start(const char *job_id, BlockDriverState *bs,
CommitBlockJob *s; CommitBlockJob *s;
BlockDriverState *iter; BlockDriverState *iter;
BlockDriverState *commit_top_bs = NULL; BlockDriverState *commit_top_bs = NULL;
BlockDriverState *filtered_base;
Error *local_err = NULL; Error *local_err = NULL;
int64_t base_size, top_size;
uint64_t base_perms, iter_shared_perms;
int ret; int ret;
assert(top != bs); assert(top != bs);
if (top == base) { if (bdrv_skip_filters(top) == bdrv_skip_filters(base)) {
error_setg(errp, "Invalid files for merge: top and base are the same"); error_setg(errp, "Invalid files for merge: top and base are the same");
return; return;
} }
base_size = bdrv_getlength(base);
if (base_size < 0) {
error_setg_errno(errp, -base_size, "Could not inquire base image size");
return;
}
top_size = bdrv_getlength(top);
if (top_size < 0) {
error_setg_errno(errp, -top_size, "Could not inquire top image size");
return;
}
base_perms = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE;
if (base_size < top_size) {
base_perms |= BLK_PERM_RESIZE;
}
s = block_job_create(job_id, &commit_job_driver, NULL, bs, 0, BLK_PERM_ALL, s = block_job_create(job_id, &commit_job_driver, NULL, bs, 0, BLK_PERM_ALL,
speed, creation_flags, NULL, NULL, errp); speed, creation_flags, NULL, NULL, errp);
if (!s) { if (!s) {
@ -301,17 +321,43 @@ void commit_start(const char *job_id, BlockDriverState *bs,
s->commit_top_bs = commit_top_bs; s->commit_top_bs = commit_top_bs;
/* Block all nodes between top and base, because they will /*
* disappear from the chain after this operation. */ * Block all nodes between top and base, because they will
assert(bdrv_chain_contains(top, base)); * disappear from the chain after this operation.
for (iter = top; iter != base; iter = backing_bs(iter)) { * Note that this assumes that the user is fine with removing all
/* XXX BLK_PERM_WRITE needs to be allowed so we don't block ourselves * nodes (including R/W filters) between top and base. Assuring
* this is the responsibility of the interface (i.e. whoever calls
* commit_start()).
*/
s->base_overlay = bdrv_find_overlay(top, base);
assert(s->base_overlay);
/*
* The topmost node with
* bdrv_skip_filters(filtered_base) == bdrv_skip_filters(base)
*/
filtered_base = bdrv_cow_bs(s->base_overlay);
assert(bdrv_skip_filters(filtered_base) == bdrv_skip_filters(base));
/*
* XXX BLK_PERM_WRITE needs to be allowed so we don't block ourselves
* at s->base (if writes are blocked for a node, they are also blocked * at s->base (if writes are blocked for a node, they are also blocked
* for its backing file). The other options would be a second filter * for its backing file). The other options would be a second filter
* driver above s->base. */ * driver above s->base.
*/
iter_shared_perms = BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE;
for (iter = top; iter != base; iter = bdrv_filter_or_cow_bs(iter)) {
if (iter == filtered_base) {
/*
* From here on, all nodes are filters on the base. This
* allows us to share BLK_PERM_CONSISTENT_READ.
*/
iter_shared_perms |= BLK_PERM_CONSISTENT_READ;
}
ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0, ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE, iter_shared_perms, errp);
errp);
if (ret < 0) { if (ret < 0) {
goto fail; goto fail;
} }
@ -328,9 +374,7 @@ void commit_start(const char *job_id, BlockDriverState *bs,
} }
s->base = blk_new(s->common.job.aio_context, s->base = blk_new(s->common.job.aio_context,
BLK_PERM_CONSISTENT_READ base_perms,
| BLK_PERM_WRITE
| BLK_PERM_RESIZE,
BLK_PERM_CONSISTENT_READ BLK_PERM_CONSISTENT_READ
| BLK_PERM_GRAPH_MOD | BLK_PERM_GRAPH_MOD
| BLK_PERM_WRITE_UNCHANGED); | BLK_PERM_WRITE_UNCHANGED);
@ -398,19 +442,22 @@ int bdrv_commit(BlockDriverState *bs)
if (!drv) if (!drv)
return -ENOMEDIUM; return -ENOMEDIUM;
if (!bs->backing) { backing_file_bs = bdrv_cow_bs(bs);
if (!backing_file_bs) {
return -ENOTSUP; return -ENOTSUP;
} }
if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) || if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) { bdrv_op_is_blocked(backing_file_bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL))
{
return -EBUSY; return -EBUSY;
} }
ro = bs->backing->bs->read_only; ro = backing_file_bs->read_only;
if (ro) { if (ro) {
if (bdrv_reopen_set_read_only(bs->backing->bs, false, NULL)) { if (bdrv_reopen_set_read_only(backing_file_bs, false, NULL)) {
return -EACCES; return -EACCES;
} }
} }
@ -428,8 +475,6 @@ int bdrv_commit(BlockDriverState *bs)
} }
/* Insert commit_top block node above backing, so we can write to it */ /* Insert commit_top block node above backing, so we can write to it */
backing_file_bs = backing_bs(bs);
commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, NULL, BDRV_O_RDWR, commit_top_bs = bdrv_new_open_driver(&bdrv_commit_top, NULL, BDRV_O_RDWR,
&local_err); &local_err);
if (commit_top_bs == NULL) { if (commit_top_bs == NULL) {
@ -515,7 +560,7 @@ ro_cleanup:
qemu_vfree(buf); qemu_vfree(buf);
blk_unref(backing); blk_unref(backing);
if (backing_file_bs) { if (bdrv_cow_bs(bs) != backing_file_bs) {
bdrv_set_backing_hd(bs, backing_file_bs, &error_abort); bdrv_set_backing_hd(bs, backing_file_bs, &error_abort);
} }
bdrv_unref(commit_top_bs); bdrv_unref(commit_top_bs);
@ -523,7 +568,7 @@ ro_cleanup:
if (ro) { if (ro) {
/* ignoring error return here */ /* ignoring error return here */
bdrv_reopen_set_read_only(bs->backing->bs, true, NULL); bdrv_reopen_set_read_only(backing_file_bs, true, NULL);
} }
return ret; return ret;

View File

@ -107,6 +107,16 @@ static int coroutine_fn cor_co_pdiscard(BlockDriverState *bs,
} }
static int coroutine_fn cor_co_pwritev_compressed(BlockDriverState *bs,
uint64_t offset,
uint64_t bytes,
QEMUIOVector *qiov)
{
return bdrv_co_pwritev(bs->file, offset, bytes, qiov,
BDRV_REQ_WRITE_COMPRESSED);
}
static void cor_eject(BlockDriverState *bs, bool eject_flag) static void cor_eject(BlockDriverState *bs, bool eject_flag)
{ {
bdrv_eject(bs->file->bs, eject_flag); bdrv_eject(bs->file->bs, eject_flag);
@ -131,12 +141,11 @@ static BlockDriver bdrv_copy_on_read = {
.bdrv_co_pwritev = cor_co_pwritev, .bdrv_co_pwritev = cor_co_pwritev,
.bdrv_co_pwrite_zeroes = cor_co_pwrite_zeroes, .bdrv_co_pwrite_zeroes = cor_co_pwrite_zeroes,
.bdrv_co_pdiscard = cor_co_pdiscard, .bdrv_co_pdiscard = cor_co_pdiscard,
.bdrv_co_pwritev_compressed = cor_co_pwritev_compressed,
.bdrv_eject = cor_eject, .bdrv_eject = cor_eject,
.bdrv_lock_medium = cor_lock_medium, .bdrv_lock_medium = cor_lock_medium,
.bdrv_co_block_status = bdrv_co_block_status_from_file,
.has_variable_length = true, .has_variable_length = true,
.is_filter = true, .is_filter = true,
}; };

View File

@ -299,6 +299,11 @@ static QemuOptsList raw_runtime_opts = {
.type = QEMU_OPT_STRING, .type = QEMU_OPT_STRING,
.help = "host AIO implementation (threads, native)", .help = "host AIO implementation (threads, native)",
}, },
{
.name = "locking",
.type = QEMU_OPT_STRING,
.help = "file locking mode (on/off/auto, default: auto)",
},
{ /* end of list */ } { /* end of list */ }
}, },
}; };
@ -333,6 +338,7 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
Error *local_err = NULL; Error *local_err = NULL;
const char *filename; const char *filename;
bool use_aio; bool use_aio;
OnOffAuto locking;
int ret; int ret;
s->type = FTYPE_FILE; s->type = FTYPE_FILE;
@ -343,10 +349,24 @@ static int raw_open(BlockDriverState *bs, QDict *options, int flags,
goto fail; goto fail;
} }
if (qdict_get_try_bool(options, "locking", false)) { locking = qapi_enum_parse(&OnOffAuto_lookup,
qemu_opt_get(opts, "locking"),
ON_OFF_AUTO_AUTO, &local_err);
if (local_err) {
error_propagate(errp, local_err);
ret = -EINVAL;
goto fail;
}
switch (locking) {
case ON_OFF_AUTO_ON:
error_setg(errp, "locking=on is not supported on Windows"); error_setg(errp, "locking=on is not supported on Windows");
ret = -EINVAL; ret = -EINVAL;
goto fail; goto fail;
case ON_OFF_AUTO_OFF:
case ON_OFF_AUTO_AUTO:
break;
default:
g_assert_not_reached();
} }
filename = qemu_opt_get(opts, "filename"); filename = qemu_opt_get(opts, "filename");

View File

@ -146,8 +146,6 @@ static BlockDriver bdrv_compress = {
.bdrv_eject = compress_eject, .bdrv_eject = compress_eject,
.bdrv_lock_medium = compress_lock_medium, .bdrv_lock_medium = compress_lock_medium,
.bdrv_co_block_status = bdrv_co_block_status_from_file,
.has_variable_length = true, .has_variable_length = true,
.is_filter = true, .is_filter = true,
}; };

View File

@ -135,6 +135,8 @@ static void bdrv_merge_limits(BlockLimits *dst, const BlockLimits *src)
void bdrv_refresh_limits(BlockDriverState *bs, Error **errp) void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
{ {
BlockDriver *drv = bs->drv; BlockDriver *drv = bs->drv;
BdrvChild *c;
bool have_limits;
Error *local_err = NULL; Error *local_err = NULL;
memset(&bs->bl, 0, sizeof(bs->bl)); memset(&bs->bl, 0, sizeof(bs->bl));
@ -149,14 +151,21 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
drv->bdrv_co_preadv_part) ? 1 : 512; drv->bdrv_co_preadv_part) ? 1 : 512;
/* Take some limits from the children as a default */ /* Take some limits from the children as a default */
if (bs->file) { have_limits = false;
bdrv_refresh_limits(bs->file->bs, &local_err); QLIST_FOREACH(c, &bs->children, next) {
if (c->role & (BDRV_CHILD_DATA | BDRV_CHILD_FILTERED | BDRV_CHILD_COW))
{
bdrv_refresh_limits(c->bs, &local_err);
if (local_err) { if (local_err) {
error_propagate(errp, local_err); error_propagate(errp, local_err);
return; return;
} }
bdrv_merge_limits(&bs->bl, &bs->file->bs->bl); bdrv_merge_limits(&bs->bl, &c->bs->bl);
} else { have_limits = true;
}
}
if (!have_limits) {
bs->bl.min_mem_alignment = 512; bs->bl.min_mem_alignment = 512;
bs->bl.opt_mem_alignment = qemu_real_host_page_size; bs->bl.opt_mem_alignment = qemu_real_host_page_size;
@ -164,15 +173,6 @@ void bdrv_refresh_limits(BlockDriverState *bs, Error **errp)
bs->bl.max_iov = IOV_MAX; bs->bl.max_iov = IOV_MAX;
} }
if (bs->backing) {
bdrv_refresh_limits(bs->backing->bs, &local_err);
if (local_err) {
error_propagate(errp, local_err);
return;
}
bdrv_merge_limits(&bs->bl, &bs->backing->bs->bl);
}
/* Then let the driver override it */ /* Then let the driver override it */
if (drv->bdrv_refresh_limits) { if (drv->bdrv_refresh_limits) {
drv->bdrv_refresh_limits(bs, errp); drv->bdrv_refresh_limits(bs, errp);
@ -2255,36 +2255,6 @@ typedef struct BdrvCoBlockStatusData {
BlockDriverState **file; BlockDriverState **file;
} BdrvCoBlockStatusData; } BdrvCoBlockStatusData;
int coroutine_fn bdrv_co_block_status_from_file(BlockDriverState *bs,
bool want_zero,
int64_t offset,
int64_t bytes,
int64_t *pnum,
int64_t *map,
BlockDriverState **file)
{
assert(bs->file && bs->file->bs);
*pnum = bytes;
*map = offset;
*file = bs->file->bs;
return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
}
int coroutine_fn bdrv_co_block_status_from_backing(BlockDriverState *bs,
bool want_zero,
int64_t offset,
int64_t bytes,
int64_t *pnum,
int64_t *map,
BlockDriverState **file)
{
assert(bs->backing && bs->backing->bs);
*pnum = bytes;
*map = offset;
*file = bs->backing->bs;
return BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
}
/* /*
* Returns the allocation status of the specified sectors. * Returns the allocation status of the specified sectors.
* Drivers not implementing the functionality are assumed to not support * Drivers not implementing the functionality are assumed to not support
@ -2325,6 +2295,7 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
BlockDriverState *local_file = NULL; BlockDriverState *local_file = NULL;
int64_t aligned_offset, aligned_bytes; int64_t aligned_offset, aligned_bytes;
uint32_t align; uint32_t align;
bool has_filtered_child;
assert(pnum); assert(pnum);
*pnum = 0; *pnum = 0;
@ -2350,7 +2321,8 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
/* Must be non-NULL or bdrv_getlength() would have failed */ /* Must be non-NULL or bdrv_getlength() would have failed */
assert(bs->drv); assert(bs->drv);
if (!bs->drv->bdrv_co_block_status) { has_filtered_child = bdrv_filter_child(bs);
if (!bs->drv->bdrv_co_block_status && !has_filtered_child) {
*pnum = bytes; *pnum = bytes;
ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED; ret = BDRV_BLOCK_DATA | BDRV_BLOCK_ALLOCATED;
if (offset + bytes == total_size) { if (offset + bytes == total_size) {
@ -2371,9 +2343,20 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
aligned_offset = QEMU_ALIGN_DOWN(offset, align); aligned_offset = QEMU_ALIGN_DOWN(offset, align);
aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset; aligned_bytes = ROUND_UP(offset + bytes, align) - aligned_offset;
if (bs->drv->bdrv_co_block_status) {
ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset, ret = bs->drv->bdrv_co_block_status(bs, want_zero, aligned_offset,
aligned_bytes, pnum, &local_map, aligned_bytes, pnum, &local_map,
&local_file); &local_file);
} else {
/* Default code for filters */
local_file = bdrv_filter_bs(bs);
assert(local_file);
*pnum = aligned_bytes;
local_map = aligned_offset;
ret = BDRV_BLOCK_RAW | BDRV_BLOCK_OFFSET_VALID;
}
if (ret < 0) { if (ret < 0) {
*pnum = 0; *pnum = 0;
goto out; goto out;
@ -2409,9 +2392,10 @@ static int coroutine_fn bdrv_co_block_status(BlockDriverState *bs,
if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) { if (ret & (BDRV_BLOCK_DATA | BDRV_BLOCK_ZERO)) {
ret |= BDRV_BLOCK_ALLOCATED; ret |= BDRV_BLOCK_ALLOCATED;
} else if (want_zero && bs->drv->supports_backing) { } else if (want_zero && bs->drv->supports_backing) {
if (bs->backing) { BlockDriverState *cow_bs = bdrv_cow_bs(bs);
BlockDriverState *bs2 = bs->backing->bs;
int64_t size2 = bdrv_getlength(bs2); if (cow_bs) {
int64_t size2 = bdrv_getlength(cow_bs);
if (size2 >= 0 && offset >= size2) { if (size2 >= 0 && offset >= size2) {
ret |= BDRV_BLOCK_ZERO; ret |= BDRV_BLOCK_ZERO;
@ -2479,7 +2463,7 @@ static int coroutine_fn bdrv_co_block_status_above(BlockDriverState *bs,
bool first = true; bool first = true;
assert(bs != base); assert(bs != base);
for (p = bs; p != base; p = backing_bs(p)) { for (p = bs; p != base; p = bdrv_filter_or_cow_bs(p)) {
ret = bdrv_co_block_status(p, want_zero, offset, bytes, pnum, map, ret = bdrv_co_block_status(p, want_zero, offset, bytes, pnum, map,
file); file);
if (ret < 0) { if (ret < 0) {
@ -2553,7 +2537,7 @@ int bdrv_block_status_above(BlockDriverState *bs, BlockDriverState *base,
int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes, int bdrv_block_status(BlockDriverState *bs, int64_t offset, int64_t bytes,
int64_t *pnum, int64_t *map, BlockDriverState **file) int64_t *pnum, int64_t *map, BlockDriverState **file)
{ {
return bdrv_block_status_above(bs, backing_bs(bs), return bdrv_block_status_above(bs, bdrv_filter_or_cow_bs(bs),
offset, bytes, pnum, map, file); offset, bytes, pnum, map, file);
} }
@ -2563,9 +2547,9 @@ int coroutine_fn bdrv_is_allocated(BlockDriverState *bs, int64_t offset,
int ret; int ret;
int64_t dummy; int64_t dummy;
ret = bdrv_common_block_status_above(bs, backing_bs(bs), false, offset, ret = bdrv_common_block_status_above(bs, bdrv_filter_or_cow_bs(bs), false,
bytes, pnum ? pnum : &dummy, NULL, offset, bytes, pnum ? pnum : &dummy,
NULL); NULL, NULL);
if (ret < 0) { if (ret < 0) {
return ret; return ret;
} }
@ -2628,7 +2612,7 @@ int bdrv_is_allocated_above(BlockDriverState *top,
break; break;
} }
intermediate = backing_bs(intermediate); intermediate = bdrv_filter_or_cow_bs(intermediate);
} }
*pnum = n; *pnum = n;
@ -2647,6 +2631,7 @@ bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
bool is_read) bool is_read)
{ {
BlockDriver *drv = bs->drv; BlockDriver *drv = bs->drv;
BlockDriverState *child_bs = bdrv_primary_bs(bs);
int ret = -ENOTSUP; int ret = -ENOTSUP;
bdrv_inc_in_flight(bs); bdrv_inc_in_flight(bs);
@ -2659,8 +2644,8 @@ bdrv_co_rw_vmstate(BlockDriverState *bs, QEMUIOVector *qiov, int64_t pos,
} else { } else {
ret = drv->bdrv_save_vmstate(bs, qiov, pos); ret = drv->bdrv_save_vmstate(bs, qiov, pos);
} }
} else if (bs->file) { } else if (child_bs) {
ret = bdrv_co_rw_vmstate(bs->file->bs, qiov, pos, is_read); ret = bdrv_co_rw_vmstate(child_bs, qiov, pos, is_read);
} }
bdrv_dec_in_flight(bs); bdrv_dec_in_flight(bs);
@ -2770,6 +2755,8 @@ static int coroutine_fn bdrv_flush_co_entry(void *opaque)
int coroutine_fn bdrv_co_flush(BlockDriverState *bs) int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
{ {
BdrvChild *primary_child = bdrv_primary_child(bs);
BdrvChild *child;
int current_gen; int current_gen;
int ret = 0; int ret = 0;
@ -2799,7 +2786,7 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
} }
/* Write back cached data to the OS even with cache=unsafe */ /* Write back cached data to the OS even with cache=unsafe */
BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_OS); BLKDBG_EVENT(primary_child, BLKDBG_FLUSH_TO_OS);
if (bs->drv->bdrv_co_flush_to_os) { if (bs->drv->bdrv_co_flush_to_os) {
ret = bs->drv->bdrv_co_flush_to_os(bs); ret = bs->drv->bdrv_co_flush_to_os(bs);
if (ret < 0) { if (ret < 0) {
@ -2809,15 +2796,15 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
/* But don't actually force it to the disk with cache=unsafe */ /* But don't actually force it to the disk with cache=unsafe */
if (bs->open_flags & BDRV_O_NO_FLUSH) { if (bs->open_flags & BDRV_O_NO_FLUSH) {
goto flush_parent; goto flush_children;
} }
/* Check if we really need to flush anything */ /* Check if we really need to flush anything */
if (bs->flushed_gen == current_gen) { if (bs->flushed_gen == current_gen) {
goto flush_parent; goto flush_children;
} }
BLKDBG_EVENT(bs->file, BLKDBG_FLUSH_TO_DISK); BLKDBG_EVENT(primary_child, BLKDBG_FLUSH_TO_DISK);
if (!bs->drv) { if (!bs->drv) {
/* bs->drv->bdrv_co_flush() might have ejected the BDS /* bs->drv->bdrv_co_flush() might have ejected the BDS
* (even in case of apparent success) */ * (even in case of apparent success) */
@ -2861,8 +2848,17 @@ int coroutine_fn bdrv_co_flush(BlockDriverState *bs)
/* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH /* Now flush the underlying protocol. It will also have BDRV_O_NO_FLUSH
* in the case of cache=unsafe, so there are no useless flushes. * in the case of cache=unsafe, so there are no useless flushes.
*/ */
flush_parent: flush_children:
ret = bs->file ? bdrv_co_flush(bs->file->bs) : 0; ret = 0;
QLIST_FOREACH(child, &bs->children, next) {
if (child->perm & (BLK_PERM_WRITE | BLK_PERM_WRITE_UNCHANGED)) {
int this_child_ret = bdrv_co_flush(child->bs);
if (!ret) {
ret = this_child_ret;
}
}
}
out: out:
/* Notify any pending flushes that we have completed */ /* Notify any pending flushes that we have completed */
if (ret == 0) { if (ret == 0) {
@ -3309,6 +3305,7 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
Error **errp) Error **errp)
{ {
BlockDriverState *bs = child->bs; BlockDriverState *bs = child->bs;
BdrvChild *filtered, *backing;
BlockDriver *drv = bs->drv; BlockDriver *drv = bs->drv;
BdrvTrackedRequest req; BdrvTrackedRequest req;
int64_t old_size, new_bytes; int64_t old_size, new_bytes;
@ -3360,6 +3357,9 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
goto out; goto out;
} }
filtered = bdrv_filter_child(bs);
backing = bdrv_cow_child(bs);
/* /*
* If the image has a backing file that is large enough that it would * If the image has a backing file that is large enough that it would
* provide data for the new area, we cannot leave it unallocated because * provide data for the new area, we cannot leave it unallocated because
@ -3370,10 +3370,10 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
* backing file, taking care of keeping things consistent with that backing * backing file, taking care of keeping things consistent with that backing
* file is the user's responsibility. * file is the user's responsibility.
*/ */
if (new_bytes && bs->backing) { if (new_bytes && backing) {
int64_t backing_len; int64_t backing_len;
backing_len = bdrv_getlength(backing_bs(bs)); backing_len = bdrv_getlength(backing->bs);
if (backing_len < 0) { if (backing_len < 0) {
ret = backing_len; ret = backing_len;
error_setg_errno(errp, -ret, "Could not get backing file size"); error_setg_errno(errp, -ret, "Could not get backing file size");
@ -3392,8 +3392,8 @@ int coroutine_fn bdrv_co_truncate(BdrvChild *child, int64_t offset, bool exact,
goto out; goto out;
} }
ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, flags, errp); ret = drv->bdrv_co_truncate(bs, offset, exact, prealloc, flags, errp);
} else if (bs->file && drv->is_filter) { } else if (filtered) {
ret = bdrv_co_truncate(bs->file, offset, exact, prealloc, flags, errp); ret = bdrv_co_truncate(filtered, offset, exact, prealloc, flags, errp);
} else { } else {
error_setg(errp, "Image format driver does not support resize"); error_setg(errp, "Image format driver does not support resize");
ret = -ENOTSUP; ret = -ENOTSUP;

View File

@ -42,6 +42,7 @@ typedef struct MirrorBlockJob {
BlockBackend *target; BlockBackend *target;
BlockDriverState *mirror_top_bs; BlockDriverState *mirror_top_bs;
BlockDriverState *base; BlockDriverState *base;
BlockDriverState *base_overlay;
/* The name of the graph node to replace */ /* The name of the graph node to replace */
char *replaces; char *replaces;
@ -677,8 +678,10 @@ static int mirror_exit_common(Job *job)
&error_abort); &error_abort);
if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) { if (!abort && s->backing_mode == MIRROR_SOURCE_BACKING_CHAIN) {
BlockDriverState *backing = s->is_none_mode ? src : s->base; BlockDriverState *backing = s->is_none_mode ? src : s->base;
if (backing_bs(target_bs) != backing) { BlockDriverState *unfiltered_target = bdrv_skip_filters(target_bs);
bdrv_set_backing_hd(target_bs, backing, &local_err);
if (bdrv_cow_bs(unfiltered_target) != backing) {
bdrv_set_backing_hd(unfiltered_target, backing, &local_err);
if (local_err) { if (local_err) {
error_report_err(local_err); error_report_err(local_err);
local_err = NULL; local_err = NULL;
@ -740,7 +743,7 @@ static int mirror_exit_common(Job *job)
* valid. * valid.
*/ */
block_job_remove_all_bdrv(bjob); block_job_remove_all_bdrv(bjob);
bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort); bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
/* We just changed the BDS the job BB refers to (with either or both of the /* We just changed the BDS the job BB refers to (with either or both of the
* bdrv_replace_node() calls), so switch the BB back so the cleanup does * bdrv_replace_node() calls), so switch the BB back so the cleanup does
@ -786,7 +789,6 @@ static void coroutine_fn mirror_throttle(MirrorBlockJob *s)
static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s) static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
{ {
int64_t offset; int64_t offset;
BlockDriverState *base = s->base;
BlockDriverState *bs = s->mirror_top_bs->backing->bs; BlockDriverState *bs = s->mirror_top_bs->backing->bs;
BlockDriverState *target_bs = blk_bs(s->target); BlockDriverState *target_bs = blk_bs(s->target);
int ret; int ret;
@ -837,7 +839,8 @@ static int coroutine_fn mirror_dirty_init(MirrorBlockJob *s)
return 0; return 0;
} }
ret = bdrv_is_allocated_above(bs, base, false, offset, bytes, &count); ret = bdrv_is_allocated_above(bs, s->base_overlay, true, offset, bytes,
&count);
if (ret < 0) { if (ret < 0) {
return ret; return ret;
} }
@ -936,7 +939,7 @@ static int coroutine_fn mirror_run(Job *job, Error **errp)
} else { } else {
s->target_cluster_size = BDRV_SECTOR_SIZE; s->target_cluster_size = BDRV_SECTOR_SIZE;
} }
if (backing_filename[0] && !target_bs->backing && if (backing_filename[0] && !bdrv_backing_chain_next(target_bs) &&
s->granularity < s->target_cluster_size) { s->granularity < s->target_cluster_size) {
s->buf_size = MAX(s->buf_size, s->target_cluster_size); s->buf_size = MAX(s->buf_size, s->target_cluster_size);
s->cow_bitmap = bitmap_new(length); s->cow_bitmap = bitmap_new(length);
@ -1116,8 +1119,9 @@ static void mirror_complete(Job *job, Error **errp)
if (s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) { if (s->backing_mode == MIRROR_OPEN_BACKING_CHAIN) {
int ret; int ret;
assert(!target->backing); assert(!bdrv_backing_chain_next(target));
ret = bdrv_open_backing_file(target, NULL, "backing", errp); ret = bdrv_open_backing_file(bdrv_skip_filters(target), NULL,
"backing", errp);
if (ret < 0) { if (ret < 0) {
return; return;
} }
@ -1527,7 +1531,6 @@ static BlockDriver bdrv_mirror_top = {
.bdrv_co_pwrite_zeroes = bdrv_mirror_top_pwrite_zeroes, .bdrv_co_pwrite_zeroes = bdrv_mirror_top_pwrite_zeroes,
.bdrv_co_pdiscard = bdrv_mirror_top_pdiscard, .bdrv_co_pdiscard = bdrv_mirror_top_pdiscard,
.bdrv_co_flush = bdrv_mirror_top_flush, .bdrv_co_flush = bdrv_mirror_top_flush,
.bdrv_co_block_status = bdrv_co_block_status_from_backing,
.bdrv_refresh_filename = bdrv_mirror_top_refresh_filename, .bdrv_refresh_filename = bdrv_mirror_top_refresh_filename,
.bdrv_child_perm = bdrv_mirror_top_child_perm, .bdrv_child_perm = bdrv_mirror_top_child_perm,
@ -1555,8 +1558,8 @@ static BlockJob *mirror_start_job(
MirrorBlockJob *s; MirrorBlockJob *s;
MirrorBDSOpaque *bs_opaque; MirrorBDSOpaque *bs_opaque;
BlockDriverState *mirror_top_bs; BlockDriverState *mirror_top_bs;
bool target_graph_mod;
bool target_is_backing; bool target_is_backing;
uint64_t target_perms, target_shared_perms;
Error *local_err = NULL; Error *local_err = NULL;
int ret; int ret;
@ -1575,7 +1578,7 @@ static BlockJob *mirror_start_job(
buf_size = DEFAULT_MIRROR_BUF_SIZE; buf_size = DEFAULT_MIRROR_BUF_SIZE;
} }
if (bs == target) { if (bdrv_skip_filters(bs) == bdrv_skip_filters(target)) {
error_setg(errp, "Can't mirror node into itself"); error_setg(errp, "Can't mirror node into itself");
return NULL; return NULL;
} }
@ -1639,15 +1642,50 @@ static BlockJob *mirror_start_job(
* In the case of active commit, things look a bit different, though, * In the case of active commit, things look a bit different, though,
* because the target is an already populated backing file in active use. * because the target is an already populated backing file in active use.
* We can allow anything except resize there.*/ * We can allow anything except resize there.*/
target_perms = BLK_PERM_WRITE;
target_shared_perms = BLK_PERM_WRITE_UNCHANGED;
target_is_backing = bdrv_chain_contains(bs, target); target_is_backing = bdrv_chain_contains(bs, target);
target_graph_mod = (backing_mode != MIRROR_LEAVE_BACKING_CHAIN); if (target_is_backing) {
int64_t bs_size, target_size;
bs_size = bdrv_getlength(bs);
if (bs_size < 0) {
error_setg_errno(errp, -bs_size,
"Could not inquire top image size");
goto fail;
}
target_size = bdrv_getlength(target);
if (target_size < 0) {
error_setg_errno(errp, -target_size,
"Could not inquire base image size");
goto fail;
}
if (target_size < bs_size) {
target_perms |= BLK_PERM_RESIZE;
}
target_shared_perms |= BLK_PERM_CONSISTENT_READ
| BLK_PERM_WRITE
| BLK_PERM_GRAPH_MOD;
} else if (bdrv_chain_contains(bs, bdrv_skip_filters(target))) {
/*
* We may want to allow this in the future, but it would
* require taking some extra care.
*/
error_setg(errp, "Cannot mirror to a filter on top of a node in the "
"source's backing chain");
goto fail;
}
if (backing_mode != MIRROR_LEAVE_BACKING_CHAIN) {
target_perms |= BLK_PERM_GRAPH_MOD;
}
s->target = blk_new(s->common.job.aio_context, s->target = blk_new(s->common.job.aio_context,
BLK_PERM_WRITE | BLK_PERM_RESIZE | target_perms, target_shared_perms);
(target_graph_mod ? BLK_PERM_GRAPH_MOD : 0),
BLK_PERM_WRITE_UNCHANGED |
(target_is_backing ? BLK_PERM_CONSISTENT_READ |
BLK_PERM_WRITE |
BLK_PERM_GRAPH_MOD : 0));
ret = blk_insert_bs(s->target, target, errp); ret = blk_insert_bs(s->target, target, errp);
if (ret < 0) { if (ret < 0) {
goto fail; goto fail;
@ -1672,6 +1710,7 @@ static BlockJob *mirror_start_job(
s->zero_target = zero_target; s->zero_target = zero_target;
s->copy_mode = copy_mode; s->copy_mode = copy_mode;
s->base = base; s->base = base;
s->base_overlay = bdrv_find_overlay(bs, base);
s->granularity = granularity; s->granularity = granularity;
s->buf_size = ROUND_UP(buf_size, granularity); s->buf_size = ROUND_UP(buf_size, granularity);
s->unmap = unmap; s->unmap = unmap;
@ -1702,15 +1741,39 @@ static BlockJob *mirror_start_job(
/* In commit_active_start() all intermediate nodes disappear, so /* In commit_active_start() all intermediate nodes disappear, so
* any jobs in them must be blocked */ * any jobs in them must be blocked */
if (target_is_backing) { if (target_is_backing) {
BlockDriverState *iter; BlockDriverState *iter, *filtered_target;
for (iter = backing_bs(bs); iter != target; iter = backing_bs(iter)) { uint64_t iter_shared_perms;
/* XXX BLK_PERM_WRITE needs to be allowed so we don't block
/*
* The topmost node with
* bdrv_skip_filters(filtered_target) == bdrv_skip_filters(target)
*/
filtered_target = bdrv_cow_bs(bdrv_find_overlay(bs, target));
assert(bdrv_skip_filters(filtered_target) ==
bdrv_skip_filters(target));
/*
* XXX BLK_PERM_WRITE needs to be allowed so we don't block
* ourselves at s->base (if writes are blocked for a node, they are * ourselves at s->base (if writes are blocked for a node, they are
* also blocked for its backing file). The other options would be a * also blocked for its backing file). The other options would be a
* second filter driver above s->base (== target). */ * second filter driver above s->base (== target).
*/
iter_shared_perms = BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE;
for (iter = bdrv_filter_or_cow_bs(bs); iter != target;
iter = bdrv_filter_or_cow_bs(iter))
{
if (iter == filtered_target) {
/*
* From here on, all nodes are filters on the base.
* This allows us to share BLK_PERM_CONSISTENT_READ.
*/
iter_shared_perms |= BLK_PERM_CONSISTENT_READ;
}
ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0, ret = block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
BLK_PERM_WRITE_UNCHANGED | BLK_PERM_WRITE, iter_shared_perms, errp);
errp);
if (ret < 0) { if (ret < 0) {
goto fail; goto fail;
} }
@ -1746,7 +1809,7 @@ fail:
bs_opaque->stop = true; bs_opaque->stop = true;
bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing, bdrv_child_refresh_perms(mirror_top_bs, mirror_top_bs->backing,
&error_abort); &error_abort);
bdrv_replace_node(mirror_top_bs, backing_bs(mirror_top_bs), &error_abort); bdrv_replace_node(mirror_top_bs, mirror_top_bs->backing->bs, &error_abort);
bdrv_unref(mirror_top_bs); bdrv_unref(mirror_top_bs);
@ -1774,7 +1837,7 @@ void mirror_start(const char *job_id, BlockDriverState *bs,
return; return;
} }
is_none_mode = mode == MIRROR_SYNC_MODE_NONE; is_none_mode = mode == MIRROR_SYNC_MODE_NONE;
base = mode == MIRROR_SYNC_MODE_TOP ? backing_bs(bs) : NULL; base = mode == MIRROR_SYNC_MODE_TOP ? bdrv_backing_chain_next(bs) : NULL;
mirror_start_job(job_id, bs, creation_flags, target, replaces, mirror_start_job(job_id, bs, creation_flags, target, replaces,
speed, granularity, buf_size, backing_mode, zero_target, speed, granularity, buf_size, backing_mode, zero_target,
on_source_error, on_target_error, unmap, NULL, NULL, on_source_error, on_target_error, unmap, NULL, NULL,

View File

@ -217,7 +217,7 @@ void hmp_commit(Monitor *mon, const QDict *qdict)
return; return;
} }
bs = blk_bs(blk); bs = bdrv_skip_implicit_filters(blk_bs(blk));
aio_context = bdrv_get_aio_context(bs); aio_context = bdrv_get_aio_context(bs);
aio_context_acquire(aio_context); aio_context_acquire(aio_context);

View File

@ -262,6 +262,11 @@ static void null_refresh_filename(BlockDriverState *bs)
bs->drv->format_name); bs->drv->format_name);
} }
static int64_t null_allocated_file_size(BlockDriverState *bs)
{
return 0;
}
static const char *const null_strong_runtime_opts[] = { static const char *const null_strong_runtime_opts[] = {
BLOCK_OPT_SIZE, BLOCK_OPT_SIZE,
NULL_OPT_ZEROES, NULL_OPT_ZEROES,
@ -277,6 +282,7 @@ static BlockDriver bdrv_null_co = {
.bdrv_file_open = null_file_open, .bdrv_file_open = null_file_open,
.bdrv_parse_filename = null_co_parse_filename, .bdrv_parse_filename = null_co_parse_filename,
.bdrv_getlength = null_getlength, .bdrv_getlength = null_getlength,
.bdrv_get_allocated_file_size = null_allocated_file_size,
.bdrv_co_preadv = null_co_preadv, .bdrv_co_preadv = null_co_preadv,
.bdrv_co_pwritev = null_co_pwritev, .bdrv_co_pwritev = null_co_pwritev,
@ -297,6 +303,7 @@ static BlockDriver bdrv_null_aio = {
.bdrv_file_open = null_file_open, .bdrv_file_open = null_file_open,
.bdrv_parse_filename = null_aio_parse_filename, .bdrv_parse_filename = null_aio_parse_filename,
.bdrv_getlength = null_getlength, .bdrv_getlength = null_getlength,
.bdrv_get_allocated_file_size = null_allocated_file_size,
.bdrv_aio_preadv = null_aio_preadv, .bdrv_aio_preadv = null_aio_preadv,
.bdrv_aio_pwritev = null_aio_pwritev, .bdrv_aio_pwritev = null_aio_pwritev,

View File

@ -83,25 +83,21 @@ typedef struct {
/* Memory mapped registers */ /* Memory mapped registers */
typedef volatile struct { typedef volatile struct {
uint64_t cap; NvmeBar ctrl;
uint32_t vs; struct {
uint32_t intms; uint32_t sq_tail;
uint32_t intmc; uint32_t cq_head;
uint32_t cc; } doorbells[];
uint32_t reserved0;
uint32_t csts;
uint32_t nssr;
uint32_t aqa;
uint64_t asq;
uint64_t acq;
uint32_t cmbloc;
uint32_t cmbsz;
uint8_t reserved1[0xec0];
uint8_t cmd_set_specfic[0x100];
uint32_t doorbells[];
} NVMeRegs; } NVMeRegs;
QEMU_BUILD_BUG_ON(offsetof(NVMeRegs, doorbells) != 0x1000); #define INDEX_ADMIN 0
#define INDEX_IO(n) (1 + n)
/* This driver shares a single MSIX IRQ for the admin and I/O queues */
enum {
MSIX_SHARED_IRQ_IDX = 0,
MSIX_IRQ_COUNT = 1
};
struct BDRVNVMeState { struct BDRVNVMeState {
AioContext *aio_context; AioContext *aio_context;
@ -117,7 +113,7 @@ struct BDRVNVMeState {
/* How many uint32_t elements does each doorbell entry take. */ /* How many uint32_t elements does each doorbell entry take. */
size_t doorbell_scale; size_t doorbell_scale;
bool write_cache_supported; bool write_cache_supported;
EventNotifier irq_notifier; EventNotifier irq_notifier[MSIX_IRQ_COUNT];
uint64_t nsze; /* Namespace size reported by identify command */ uint64_t nsze; /* Namespace size reported by identify command */
int nsid; /* The namespace id to read/write data. */ int nsid; /* The namespace id to read/write data. */
@ -162,21 +158,20 @@ static QemuOptsList runtime_opts = {
}, },
}; };
static void nvme_init_queue(BlockDriverState *bs, NVMeQueue *q, static void nvme_init_queue(BDRVNVMeState *s, NVMeQueue *q,
int nentries, int entry_bytes, Error **errp) int nentries, int entry_bytes, Error **errp)
{ {
BDRVNVMeState *s = bs->opaque;
size_t bytes; size_t bytes;
int r; int r;
bytes = ROUND_UP(nentries * entry_bytes, s->page_size); bytes = ROUND_UP(nentries * entry_bytes, s->page_size);
q->head = q->tail = 0; q->head = q->tail = 0;
q->queue = qemu_try_blockalign0(bs, bytes); q->queue = qemu_try_memalign(s->page_size, bytes);
if (!q->queue) { if (!q->queue) {
error_setg(errp, "Cannot allocate queue"); error_setg(errp, "Cannot allocate queue");
return; return;
} }
memset(q->queue, 0, bytes);
r = qemu_vfio_dma_map(s->vfio, q->queue, bytes, false, &q->iova); r = qemu_vfio_dma_map(s->vfio, q->queue, bytes, false, &q->iova);
if (r) { if (r) {
error_setg(errp, "Cannot map queue"); error_setg(errp, "Cannot map queue");
@ -206,23 +201,31 @@ static void nvme_free_req_queue_cb(void *opaque)
qemu_mutex_unlock(&q->lock); qemu_mutex_unlock(&q->lock);
} }
static NVMeQueuePair *nvme_create_queue_pair(BlockDriverState *bs, static NVMeQueuePair *nvme_create_queue_pair(BDRVNVMeState *s,
AioContext *aio_context,
int idx, int size, int idx, int size,
Error **errp) Error **errp)
{ {
int i, r; int i, r;
BDRVNVMeState *s = bs->opaque;
Error *local_err = NULL; Error *local_err = NULL;
NVMeQueuePair *q = g_new0(NVMeQueuePair, 1); NVMeQueuePair *q;
uint64_t prp_list_iova; uint64_t prp_list_iova;
q = g_try_new0(NVMeQueuePair, 1);
if (!q) {
return NULL;
}
q->prp_list_pages = qemu_try_memalign(s->page_size,
s->page_size * NVME_NUM_REQS);
if (!q->prp_list_pages) {
goto fail;
}
memset(q->prp_list_pages, 0, s->page_size * NVME_NUM_REQS);
qemu_mutex_init(&q->lock); qemu_mutex_init(&q->lock);
q->s = s; q->s = s;
q->index = idx; q->index = idx;
qemu_co_queue_init(&q->free_req_queue); qemu_co_queue_init(&q->free_req_queue);
q->prp_list_pages = qemu_blockalign0(bs, s->page_size * NVME_NUM_REQS); q->completion_bh = aio_bh_new(aio_context, nvme_process_completion_bh, q);
q->completion_bh = aio_bh_new(bdrv_get_aio_context(bs),
nvme_process_completion_bh, q);
r = qemu_vfio_dma_map(s->vfio, q->prp_list_pages, r = qemu_vfio_dma_map(s->vfio, q->prp_list_pages,
s->page_size * NVME_NUM_REQS, s->page_size * NVME_NUM_REQS,
false, &prp_list_iova); false, &prp_list_iova);
@ -239,19 +242,19 @@ static NVMeQueuePair *nvme_create_queue_pair(BlockDriverState *bs,
req->prp_list_iova = prp_list_iova + i * s->page_size; req->prp_list_iova = prp_list_iova + i * s->page_size;
} }
nvme_init_queue(bs, &q->sq, size, NVME_SQ_ENTRY_BYTES, &local_err); nvme_init_queue(s, &q->sq, size, NVME_SQ_ENTRY_BYTES, &local_err);
if (local_err) { if (local_err) {
error_propagate(errp, local_err); error_propagate(errp, local_err);
goto fail; goto fail;
} }
q->sq.doorbell = &s->regs->doorbells[idx * 2 * s->doorbell_scale]; q->sq.doorbell = &s->regs->doorbells[idx * s->doorbell_scale].sq_tail;
nvme_init_queue(bs, &q->cq, size, NVME_CQ_ENTRY_BYTES, &local_err); nvme_init_queue(s, &q->cq, size, NVME_CQ_ENTRY_BYTES, &local_err);
if (local_err) { if (local_err) {
error_propagate(errp, local_err); error_propagate(errp, local_err);
goto fail; goto fail;
} }
q->cq.doorbell = &s->regs->doorbells[(idx * 2 + 1) * s->doorbell_scale]; q->cq.doorbell = &s->regs->doorbells[idx * s->doorbell_scale].cq_head;
return q; return q;
fail: fail:
@ -441,6 +444,9 @@ static void nvme_trace_command(const NvmeCmd *cmd)
{ {
int i; int i;
if (!trace_event_get_state_backends(TRACE_NVME_SUBMIT_COMMAND_RAW)) {
return;
}
for (i = 0; i < 8; ++i) { for (i = 0; i < 8; ++i) {
uint8_t *cmdp = (uint8_t *)cmd + i * 8; uint8_t *cmdp = (uint8_t *)cmd + i * 8;
trace_nvme_submit_command_raw(cmdp[0], cmdp[1], cmdp[2], cmdp[3], trace_nvme_submit_command_raw(cmdp[0], cmdp[1], cmdp[2], cmdp[3],
@ -479,6 +485,7 @@ static void nvme_cmd_sync_cb(void *opaque, int ret)
static int nvme_cmd_sync(BlockDriverState *bs, NVMeQueuePair *q, static int nvme_cmd_sync(BlockDriverState *bs, NVMeQueuePair *q,
NvmeCmd *cmd) NvmeCmd *cmd)
{ {
AioContext *aio_context = bdrv_get_aio_context(bs);
NVMeRequest *req; NVMeRequest *req;
int ret = -EINPROGRESS; int ret = -EINPROGRESS;
req = nvme_get_free_req(q); req = nvme_get_free_req(q);
@ -487,17 +494,18 @@ static int nvme_cmd_sync(BlockDriverState *bs, NVMeQueuePair *q,
} }
nvme_submit_command(q, req, cmd, nvme_cmd_sync_cb, &ret); nvme_submit_command(q, req, cmd, nvme_cmd_sync_cb, &ret);
BDRV_POLL_WHILE(bs, ret == -EINPROGRESS); AIO_WAIT_WHILE(aio_context, ret == -EINPROGRESS);
return ret; return ret;
} }
static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp) static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
{ {
BDRVNVMeState *s = bs->opaque; BDRVNVMeState *s = bs->opaque;
NvmeIdCtrl *idctrl; union {
NvmeIdNs *idns; NvmeIdCtrl ctrl;
NvmeIdNs ns;
} *id;
NvmeLBAF *lbaf; NvmeLBAF *lbaf;
uint8_t *resp;
uint16_t oncs; uint16_t oncs;
int r; int r;
uint64_t iova; uint64_t iova;
@ -506,54 +514,52 @@ static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
.cdw10 = cpu_to_le32(0x1), .cdw10 = cpu_to_le32(0x1),
}; };
resp = qemu_try_blockalign0(bs, sizeof(NvmeIdCtrl)); id = qemu_try_memalign(s->page_size, sizeof(*id));
if (!resp) { if (!id) {
error_setg(errp, "Cannot allocate buffer for identify response"); error_setg(errp, "Cannot allocate buffer for identify response");
goto out; goto out;
} }
idctrl = (NvmeIdCtrl *)resp; r = qemu_vfio_dma_map(s->vfio, id, sizeof(*id), true, &iova);
idns = (NvmeIdNs *)resp;
r = qemu_vfio_dma_map(s->vfio, resp, sizeof(NvmeIdCtrl), true, &iova);
if (r) { if (r) {
error_setg(errp, "Cannot map buffer for DMA"); error_setg(errp, "Cannot map buffer for DMA");
goto out; goto out;
} }
cmd.dptr.prp1 = cpu_to_le64(iova);
if (nvme_cmd_sync(bs, s->queues[0], &cmd)) { memset(id, 0, sizeof(*id));
cmd.dptr.prp1 = cpu_to_le64(iova);
if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) {
error_setg(errp, "Failed to identify controller"); error_setg(errp, "Failed to identify controller");
goto out; goto out;
} }
if (le32_to_cpu(idctrl->nn) < namespace) { if (le32_to_cpu(id->ctrl.nn) < namespace) {
error_setg(errp, "Invalid namespace"); error_setg(errp, "Invalid namespace");
goto out; goto out;
} }
s->write_cache_supported = le32_to_cpu(idctrl->vwc) & 0x1; s->write_cache_supported = le32_to_cpu(id->ctrl.vwc) & 0x1;
s->max_transfer = (idctrl->mdts ? 1 << idctrl->mdts : 0) * s->page_size; s->max_transfer = (id->ctrl.mdts ? 1 << id->ctrl.mdts : 0) * s->page_size;
/* For now the page list buffer per command is one page, to hold at most /* For now the page list buffer per command is one page, to hold at most
* s->page_size / sizeof(uint64_t) entries. */ * s->page_size / sizeof(uint64_t) entries. */
s->max_transfer = MIN_NON_ZERO(s->max_transfer, s->max_transfer = MIN_NON_ZERO(s->max_transfer,
s->page_size / sizeof(uint64_t) * s->page_size); s->page_size / sizeof(uint64_t) * s->page_size);
oncs = le16_to_cpu(idctrl->oncs); oncs = le16_to_cpu(id->ctrl.oncs);
s->supports_write_zeroes = !!(oncs & NVME_ONCS_WRITE_ZEROES); s->supports_write_zeroes = !!(oncs & NVME_ONCS_WRITE_ZEROES);
s->supports_discard = !!(oncs & NVME_ONCS_DSM); s->supports_discard = !!(oncs & NVME_ONCS_DSM);
memset(resp, 0, 4096); memset(id, 0, sizeof(*id));
cmd.cdw10 = 0; cmd.cdw10 = 0;
cmd.nsid = cpu_to_le32(namespace); cmd.nsid = cpu_to_le32(namespace);
if (nvme_cmd_sync(bs, s->queues[0], &cmd)) { if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) {
error_setg(errp, "Failed to identify namespace"); error_setg(errp, "Failed to identify namespace");
goto out; goto out;
} }
s->nsze = le64_to_cpu(idns->nsze); s->nsze = le64_to_cpu(id->ns.nsze);
lbaf = &idns->lbaf[NVME_ID_NS_FLBAS_INDEX(idns->flbas)]; lbaf = &id->ns.lbaf[NVME_ID_NS_FLBAS_INDEX(id->ns.flbas)];
if (NVME_ID_NS_DLFEAT_WRITE_ZEROES(idns->dlfeat) && if (NVME_ID_NS_DLFEAT_WRITE_ZEROES(id->ns.dlfeat) &&
NVME_ID_NS_DLFEAT_READ_BEHAVIOR(idns->dlfeat) == NVME_ID_NS_DLFEAT_READ_BEHAVIOR(id->ns.dlfeat) ==
NVME_ID_NS_DLFEAT_READ_BEHAVIOR_ZEROES) { NVME_ID_NS_DLFEAT_READ_BEHAVIOR_ZEROES) {
bs->supported_write_flags |= BDRV_REQ_MAY_UNMAP; bs->supported_write_flags |= BDRV_REQ_MAY_UNMAP;
} }
@ -573,17 +579,14 @@ static void nvme_identify(BlockDriverState *bs, int namespace, Error **errp)
s->blkshift = lbaf->ds; s->blkshift = lbaf->ds;
out: out:
qemu_vfio_dma_unmap(s->vfio, resp); qemu_vfio_dma_unmap(s->vfio, id);
qemu_vfree(resp); qemu_vfree(id);
} }
static bool nvme_poll_queues(BDRVNVMeState *s) static bool nvme_poll_queue(NVMeQueuePair *q)
{ {
bool progress = false; bool progress = false;
int i;
for (i = 0; i < s->nr_queues; i++) {
NVMeQueuePair *q = s->queues[i];
const size_t cqe_offset = q->cq.head * NVME_CQ_ENTRY_BYTES; const size_t cqe_offset = q->cq.head * NVME_CQ_ENTRY_BYTES;
NvmeCqe *cqe = (NvmeCqe *)&q->cq.queue[cqe_offset]; NvmeCqe *cqe = (NvmeCqe *)&q->cq.queue[cqe_offset];
@ -593,7 +596,7 @@ static bool nvme_poll_queues(BDRVNVMeState *s)
* cannot race with itself. * cannot race with itself.
*/ */
if ((le16_to_cpu(cqe->status) & 0x1) == q->cq_phase) { if ((le16_to_cpu(cqe->status) & 0x1) == q->cq_phase) {
continue; return false;
} }
qemu_mutex_lock(&q->lock); qemu_mutex_lock(&q->lock);
@ -602,13 +605,27 @@ static bool nvme_poll_queues(BDRVNVMeState *s)
progress = true; progress = true;
} }
qemu_mutex_unlock(&q->lock); qemu_mutex_unlock(&q->lock);
return progress;
}
static bool nvme_poll_queues(BDRVNVMeState *s)
{
bool progress = false;
int i;
for (i = 0; i < s->nr_queues; i++) {
if (nvme_poll_queue(s->queues[i])) {
progress = true;
}
} }
return progress; return progress;
} }
static void nvme_handle_event(EventNotifier *n) static void nvme_handle_event(EventNotifier *n)
{ {
BDRVNVMeState *s = container_of(n, BDRVNVMeState, irq_notifier); BDRVNVMeState *s = container_of(n, BDRVNVMeState,
irq_notifier[MSIX_SHARED_IRQ_IDX]);
trace_nvme_handle_event(s); trace_nvme_handle_event(s);
event_notifier_test_and_clear(n); event_notifier_test_and_clear(n);
@ -623,7 +640,8 @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp)
NvmeCmd cmd; NvmeCmd cmd;
int queue_size = NVME_QUEUE_SIZE; int queue_size = NVME_QUEUE_SIZE;
q = nvme_create_queue_pair(bs, n, queue_size, errp); q = nvme_create_queue_pair(s, bdrv_get_aio_context(bs),
n, queue_size, errp);
if (!q) { if (!q) {
return false; return false;
} }
@ -633,10 +651,9 @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp)
.cdw10 = cpu_to_le32(((queue_size - 1) << 16) | (n & 0xFFFF)), .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | (n & 0xFFFF)),
.cdw11 = cpu_to_le32(0x3), .cdw11 = cpu_to_le32(0x3),
}; };
if (nvme_cmd_sync(bs, s->queues[0], &cmd)) { if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) {
error_setg(errp, "Failed to create io queue [%d]", n); error_setg(errp, "Failed to create CQ io queue [%d]", n);
nvme_free_queue_pair(q); goto out_error;
return false;
} }
cmd = (NvmeCmd) { cmd = (NvmeCmd) {
.opcode = NVME_ADM_CMD_CREATE_SQ, .opcode = NVME_ADM_CMD_CREATE_SQ,
@ -644,21 +661,24 @@ static bool nvme_add_io_queue(BlockDriverState *bs, Error **errp)
.cdw10 = cpu_to_le32(((queue_size - 1) << 16) | (n & 0xFFFF)), .cdw10 = cpu_to_le32(((queue_size - 1) << 16) | (n & 0xFFFF)),
.cdw11 = cpu_to_le32(0x1 | (n << 16)), .cdw11 = cpu_to_le32(0x1 | (n << 16)),
}; };
if (nvme_cmd_sync(bs, s->queues[0], &cmd)) { if (nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd)) {
error_setg(errp, "Failed to create io queue [%d]", n); error_setg(errp, "Failed to create SQ io queue [%d]", n);
nvme_free_queue_pair(q); goto out_error;
return false;
} }
s->queues = g_renew(NVMeQueuePair *, s->queues, n + 1); s->queues = g_renew(NVMeQueuePair *, s->queues, n + 1);
s->queues[n] = q; s->queues[n] = q;
s->nr_queues++; s->nr_queues++;
return true; return true;
out_error:
nvme_free_queue_pair(q);
return false;
} }
static bool nvme_poll_cb(void *opaque) static bool nvme_poll_cb(void *opaque)
{ {
EventNotifier *e = opaque; EventNotifier *e = opaque;
BDRVNVMeState *s = container_of(e, BDRVNVMeState, irq_notifier); BDRVNVMeState *s = container_of(e, BDRVNVMeState,
irq_notifier[MSIX_SHARED_IRQ_IDX]);
trace_nvme_poll_cb(s); trace_nvme_poll_cb(s);
return nvme_poll_queues(s); return nvme_poll_queues(s);
@ -668,6 +688,7 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
Error **errp) Error **errp)
{ {
BDRVNVMeState *s = bs->opaque; BDRVNVMeState *s = bs->opaque;
AioContext *aio_context = bdrv_get_aio_context(bs);
int ret; int ret;
uint64_t cap; uint64_t cap;
uint64_t timeout_ms; uint64_t timeout_ms;
@ -679,7 +700,7 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
s->device = g_strdup(device); s->device = g_strdup(device);
s->nsid = namespace; s->nsid = namespace;
s->aio_context = bdrv_get_aio_context(bs); s->aio_context = bdrv_get_aio_context(bs);
ret = event_notifier_init(&s->irq_notifier, 0); ret = event_notifier_init(&s->irq_notifier[MSIX_SHARED_IRQ_IDX], 0);
if (ret) { if (ret) {
error_setg(errp, "Failed to init event notifier"); error_setg(errp, "Failed to init event notifier");
return ret; return ret;
@ -700,7 +721,7 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
/* Perform initialize sequence as described in NVMe spec "7.6.1 /* Perform initialize sequence as described in NVMe spec "7.6.1
* Initialization". */ * Initialization". */
cap = le64_to_cpu(s->regs->cap); cap = le64_to_cpu(s->regs->ctrl.cap);
if (!(cap & (1ULL << 37))) { if (!(cap & (1ULL << 37))) {
error_setg(errp, "Device doesn't support NVMe command set"); error_setg(errp, "Device doesn't support NVMe command set");
ret = -EINVAL; ret = -EINVAL;
@ -713,10 +734,10 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
timeout_ms = MIN(500 * ((cap >> 24) & 0xFF), 30000); timeout_ms = MIN(500 * ((cap >> 24) & 0xFF), 30000);
/* Reset device to get a clean state. */ /* Reset device to get a clean state. */
s->regs->cc = cpu_to_le32(le32_to_cpu(s->regs->cc) & 0xFE); s->regs->ctrl.cc = cpu_to_le32(le32_to_cpu(s->regs->ctrl.cc) & 0xFE);
/* Wait for CSTS.RDY = 0. */ /* Wait for CSTS.RDY = 0. */
deadline = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ms * 1000000ULL; deadline = qemu_clock_get_ns(QEMU_CLOCK_REALTIME) + timeout_ms * SCALE_MS;
while (le32_to_cpu(s->regs->csts) & 0x1) { while (le32_to_cpu(s->regs->ctrl.csts) & 0x1) {
if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) { if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
error_setg(errp, "Timeout while waiting for device to reset (%" error_setg(errp, "Timeout while waiting for device to reset (%"
PRId64 " ms)", PRId64 " ms)",
@ -728,25 +749,27 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
/* Set up admin queue. */ /* Set up admin queue. */
s->queues = g_new(NVMeQueuePair *, 1); s->queues = g_new(NVMeQueuePair *, 1);
s->queues[0] = nvme_create_queue_pair(bs, 0, NVME_QUEUE_SIZE, errp); s->queues[INDEX_ADMIN] = nvme_create_queue_pair(s, aio_context, 0,
if (!s->queues[0]) { NVME_QUEUE_SIZE,
errp);
if (!s->queues[INDEX_ADMIN]) {
ret = -EINVAL; ret = -EINVAL;
goto out; goto out;
} }
s->nr_queues = 1; s->nr_queues = 1;
QEMU_BUILD_BUG_ON(NVME_QUEUE_SIZE & 0xF000); QEMU_BUILD_BUG_ON(NVME_QUEUE_SIZE & 0xF000);
s->regs->aqa = cpu_to_le32((NVME_QUEUE_SIZE << 16) | NVME_QUEUE_SIZE); s->regs->ctrl.aqa = cpu_to_le32((NVME_QUEUE_SIZE << 16) | NVME_QUEUE_SIZE);
s->regs->asq = cpu_to_le64(s->queues[0]->sq.iova); s->regs->ctrl.asq = cpu_to_le64(s->queues[INDEX_ADMIN]->sq.iova);
s->regs->acq = cpu_to_le64(s->queues[0]->cq.iova); s->regs->ctrl.acq = cpu_to_le64(s->queues[INDEX_ADMIN]->cq.iova);
/* After setting up all control registers we can enable device now. */ /* After setting up all control registers we can enable device now. */
s->regs->cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << 20) | s->regs->ctrl.cc = cpu_to_le32((ctz32(NVME_CQ_ENTRY_BYTES) << 20) |
(ctz32(NVME_SQ_ENTRY_BYTES) << 16) | (ctz32(NVME_SQ_ENTRY_BYTES) << 16) |
0x1); 0x1);
/* Wait for CSTS.RDY = 1. */ /* Wait for CSTS.RDY = 1. */
now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME); now = qemu_clock_get_ns(QEMU_CLOCK_REALTIME);
deadline = now + timeout_ms * 1000000; deadline = now + timeout_ms * 1000000;
while (!(le32_to_cpu(s->regs->csts) & 0x1)) { while (!(le32_to_cpu(s->regs->ctrl.csts) & 0x1)) {
if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) { if (qemu_clock_get_ns(QEMU_CLOCK_REALTIME) > deadline) {
error_setg(errp, "Timeout while waiting for device to start (%" error_setg(errp, "Timeout while waiting for device to start (%"
PRId64 " ms)", PRId64 " ms)",
@ -756,12 +779,13 @@ static int nvme_init(BlockDriverState *bs, const char *device, int namespace,
} }
} }
ret = qemu_vfio_pci_init_irq(s->vfio, &s->irq_notifier, ret = qemu_vfio_pci_init_irq(s->vfio, s->irq_notifier,
VFIO_PCI_MSIX_IRQ_INDEX, errp); VFIO_PCI_MSIX_IRQ_INDEX, errp);
if (ret) { if (ret) {
goto out; goto out;
} }
aio_set_event_notifier(bdrv_get_aio_context(bs), &s->irq_notifier, aio_set_event_notifier(bdrv_get_aio_context(bs),
&s->irq_notifier[MSIX_SHARED_IRQ_IDX],
false, nvme_handle_event, nvme_poll_cb); false, nvme_handle_event, nvme_poll_cb);
nvme_identify(bs, namespace, &local_err); nvme_identify(bs, namespace, &local_err);
@ -828,7 +852,7 @@ static int nvme_enable_disable_write_cache(BlockDriverState *bs, bool enable,
.cdw11 = cpu_to_le32(enable ? 0x01 : 0x00), .cdw11 = cpu_to_le32(enable ? 0x01 : 0x00),
}; };
ret = nvme_cmd_sync(bs, s->queues[0], &cmd); ret = nvme_cmd_sync(bs, s->queues[INDEX_ADMIN], &cmd);
if (ret) { if (ret) {
error_setg(errp, "Failed to configure NVMe write cache"); error_setg(errp, "Failed to configure NVMe write cache");
} }
@ -844,9 +868,10 @@ static void nvme_close(BlockDriverState *bs)
nvme_free_queue_pair(s->queues[i]); nvme_free_queue_pair(s->queues[i]);
} }
g_free(s->queues); g_free(s->queues);
aio_set_event_notifier(bdrv_get_aio_context(bs), &s->irq_notifier, aio_set_event_notifier(bdrv_get_aio_context(bs),
&s->irq_notifier[MSIX_SHARED_IRQ_IDX],
false, NULL, NULL); false, NULL, NULL);
event_notifier_cleanup(&s->irq_notifier); event_notifier_cleanup(&s->irq_notifier[MSIX_SHARED_IRQ_IDX]);
qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->regs, 0, NVME_BAR_SIZE); qemu_vfio_pci_unmap_bar(s->vfio, 0, (void *)s->regs, 0, NVME_BAR_SIZE);
qemu_vfio_close(s->vfio); qemu_vfio_close(s->vfio);
@ -1045,7 +1070,7 @@ static coroutine_fn int nvme_co_prw_aligned(BlockDriverState *bs,
{ {
int r; int r;
BDRVNVMeState *s = bs->opaque; BDRVNVMeState *s = bs->opaque;
NVMeQueuePair *ioq = s->queues[1]; NVMeQueuePair *ioq = s->queues[INDEX_IO(0)];
NVMeRequest *req; NVMeRequest *req;
uint32_t cdw12 = (((bytes >> s->blkshift) - 1) & 0xFFFF) | uint32_t cdw12 = (((bytes >> s->blkshift) - 1) & 0xFFFF) |
@ -1124,7 +1149,7 @@ static int nvme_co_prw(BlockDriverState *bs, uint64_t offset, uint64_t bytes,
return nvme_co_prw_aligned(bs, offset, bytes, qiov, is_write, flags); return nvme_co_prw_aligned(bs, offset, bytes, qiov, is_write, flags);
} }
trace_nvme_prw_buffered(s, offset, bytes, qiov->niov, is_write); trace_nvme_prw_buffered(s, offset, bytes, qiov->niov, is_write);
buf = qemu_try_blockalign(bs, bytes); buf = qemu_try_memalign(s->page_size, bytes);
if (!buf) { if (!buf) {
return -ENOMEM; return -ENOMEM;
@ -1160,7 +1185,7 @@ static coroutine_fn int nvme_co_pwritev(BlockDriverState *bs,
static coroutine_fn int nvme_co_flush(BlockDriverState *bs) static coroutine_fn int nvme_co_flush(BlockDriverState *bs)
{ {
BDRVNVMeState *s = bs->opaque; BDRVNVMeState *s = bs->opaque;
NVMeQueuePair *ioq = s->queues[1]; NVMeQueuePair *ioq = s->queues[INDEX_IO(0)];
NVMeRequest *req; NVMeRequest *req;
NvmeCmd cmd = { NvmeCmd cmd = {
.opcode = NVME_CMD_FLUSH, .opcode = NVME_CMD_FLUSH,
@ -1191,7 +1216,7 @@ static coroutine_fn int nvme_co_pwrite_zeroes(BlockDriverState *bs,
BdrvRequestFlags flags) BdrvRequestFlags flags)
{ {
BDRVNVMeState *s = bs->opaque; BDRVNVMeState *s = bs->opaque;
NVMeQueuePair *ioq = s->queues[1]; NVMeQueuePair *ioq = s->queues[INDEX_IO(0)];
NVMeRequest *req; NVMeRequest *req;
uint32_t cdw12 = ((bytes >> s->blkshift) - 1) & 0xFFFF; uint32_t cdw12 = ((bytes >> s->blkshift) - 1) & 0xFFFF;
@ -1244,7 +1269,7 @@ static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs,
int bytes) int bytes)
{ {
BDRVNVMeState *s = bs->opaque; BDRVNVMeState *s = bs->opaque;
NVMeQueuePair *ioq = s->queues[1]; NVMeQueuePair *ioq = s->queues[INDEX_IO(0)];
NVMeRequest *req; NVMeRequest *req;
NvmeDsmRange *buf; NvmeDsmRange *buf;
QEMUIOVector local_qiov; QEMUIOVector local_qiov;
@ -1268,11 +1293,11 @@ static int coroutine_fn nvme_co_pdiscard(BlockDriverState *bs,
assert(s->nr_queues > 1); assert(s->nr_queues > 1);
buf = qemu_try_blockalign0(bs, s->page_size); buf = qemu_try_memalign(s->page_size, s->page_size);
if (!buf) { if (!buf) {
return -ENOMEM; return -ENOMEM;
} }
memset(buf, 0, s->page_size);
buf->nlb = cpu_to_le32(bytes >> s->blkshift); buf->nlb = cpu_to_le32(bytes >> s->blkshift);
buf->slba = cpu_to_le64(offset >> s->blkshift); buf->slba = cpu_to_le64(offset >> s->blkshift);
buf->cattr = 0; buf->cattr = 0;
@ -1353,7 +1378,8 @@ static void nvme_detach_aio_context(BlockDriverState *bs)
q->completion_bh = NULL; q->completion_bh = NULL;
} }
aio_set_event_notifier(bdrv_get_aio_context(bs), &s->irq_notifier, aio_set_event_notifier(bdrv_get_aio_context(bs),
&s->irq_notifier[MSIX_SHARED_IRQ_IDX],
false, NULL, NULL); false, NULL, NULL);
} }
@ -1363,7 +1389,7 @@ static void nvme_attach_aio_context(BlockDriverState *bs,
BDRVNVMeState *s = bs->opaque; BDRVNVMeState *s = bs->opaque;
s->aio_context = new_context; s->aio_context = new_context;
aio_set_event_notifier(new_context, &s->irq_notifier, aio_set_event_notifier(new_context, &s->irq_notifier[MSIX_SHARED_IRQ_IDX],
false, nvme_handle_event, nvme_poll_cb); false, nvme_handle_event, nvme_poll_cb);
for (int i = 0; i < s->nr_queues; i++) { for (int i = 0; i < s->nr_queues; i++) {
@ -1387,7 +1413,7 @@ static void nvme_aio_unplug(BlockDriverState *bs)
BDRVNVMeState *s = bs->opaque; BDRVNVMeState *s = bs->opaque;
assert(s->plugged); assert(s->plugged);
s->plugged = false; s->plugged = false;
for (i = 1; i < s->nr_queues; i++) { for (i = INDEX_IO(0); i < s->nr_queues; i++) {
NVMeQueuePair *q = s->queues[i]; NVMeQueuePair *q = s->queues[i];
qemu_mutex_lock(&q->lock); qemu_mutex_lock(&q->lock);
nvme_kick(q); nvme_kick(q);

View File

@ -47,7 +47,7 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
Error **errp) Error **errp)
{ {
ImageInfo **p_image_info; ImageInfo **p_image_info;
BlockDriverState *bs0; BlockDriverState *bs0, *backing;
BlockDeviceInfo *info; BlockDeviceInfo *info;
if (!bs->drv) { if (!bs->drv) {
@ -76,9 +76,10 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
info->node_name = g_strdup(bs->node_name); info->node_name = g_strdup(bs->node_name);
} }
if (bs->backing_file[0]) { backing = bdrv_cow_bs(bs);
if (backing) {
info->has_backing_file = true; info->has_backing_file = true;
info->backing_file = g_strdup(bs->backing_file); info->backing_file = g_strdup(backing->filename);
} }
if (!QLIST_EMPTY(&bs->dirty_bitmaps)) { if (!QLIST_EMPTY(&bs->dirty_bitmaps)) {
@ -163,9 +164,13 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
break; break;
} }
if (bs0->drv && bs0->backing) { if (bs0->drv && bdrv_filter_or_cow_child(bs0)) {
/*
* Put any filtered child here (for backwards compatibility to when
* we put bs0->backing here, which might be any filtered child).
*/
info->backing_file_depth++; info->backing_file_depth++;
bs0 = bs0->backing->bs; bs0 = bdrv_filter_or_cow_bs(bs0);
(*p_image_info)->has_backing_image = true; (*p_image_info)->has_backing_image = true;
p_image_info = &((*p_image_info)->backing_image); p_image_info = &((*p_image_info)->backing_image);
} else { } else {
@ -174,9 +179,8 @@ BlockDeviceInfo *bdrv_block_device_info(BlockBackend *blk,
/* Skip automatically inserted nodes that the user isn't aware of for /* Skip automatically inserted nodes that the user isn't aware of for
* query-block (blk != NULL), but not for query-named-block-nodes */ * query-block (blk != NULL), but not for query-named-block-nodes */
while (blk && bs0->drv && bs0->implicit) { if (blk) {
bs0 = backing_bs(bs0); bs0 = bdrv_skip_implicit_filters(bs0);
assert(bs0);
} }
} }
@ -288,7 +292,7 @@ void bdrv_query_image_info(BlockDriverState *bs,
info->virtual_size = size; info->virtual_size = size;
info->actual_size = bdrv_get_allocated_file_size(bs); info->actual_size = bdrv_get_allocated_file_size(bs);
info->has_actual_size = info->actual_size >= 0; info->has_actual_size = info->actual_size >= 0;
if (bdrv_is_encrypted(bs)) { if (bs->encrypted) {
info->encrypted = true; info->encrypted = true;
info->has_encrypted = true; info->has_encrypted = true;
} }
@ -311,6 +315,7 @@ void bdrv_query_image_info(BlockDriverState *bs,
backing_filename = bs->backing_file; backing_filename = bs->backing_file;
if (backing_filename[0] != '\0') { if (backing_filename[0] != '\0') {
char *backing_filename2; char *backing_filename2;
info->backing_filename = g_strdup(backing_filename); info->backing_filename = g_strdup(backing_filename);
info->has_backing_filename = true; info->has_backing_filename = true;
backing_filename2 = bdrv_get_full_backing_filename(bs, NULL); backing_filename2 = bdrv_get_full_backing_filename(bs, NULL);
@ -362,9 +367,7 @@ static void bdrv_query_info(BlockBackend *blk, BlockInfo **p_info,
char *qdev; char *qdev;
/* Skip automatically inserted nodes that the user isn't aware of */ /* Skip automatically inserted nodes that the user isn't aware of */
while (bs && bs->drv && bs->implicit) { bs = bdrv_skip_implicit_filters(bs);
bs = backing_bs(bs);
}
info->device = g_strdup(blk_name(blk)); info->device = g_strdup(blk_name(blk));
info->type = g_strdup("unknown"); info->type = g_strdup("unknown");
@ -526,6 +529,8 @@ static void bdrv_query_blk_stats(BlockDeviceStats *ds, BlockBackend *blk)
static BlockStats *bdrv_query_bds_stats(BlockDriverState *bs, static BlockStats *bdrv_query_bds_stats(BlockDriverState *bs,
bool blk_level) bool blk_level)
{ {
BdrvChild *parent_child;
BlockDriverState *filter_or_cow_bs;
BlockStats *s = NULL; BlockStats *s = NULL;
s = g_malloc0(sizeof(*s)); s = g_malloc0(sizeof(*s));
@ -538,9 +543,8 @@ static BlockStats *bdrv_query_bds_stats(BlockDriverState *bs,
/* Skip automatically inserted nodes that the user isn't aware of in /* Skip automatically inserted nodes that the user isn't aware of in
* a BlockBackend-level command. Stay at the exact node for a node-level * a BlockBackend-level command. Stay at the exact node for a node-level
* command. */ * command. */
while (blk_level && bs->drv && bs->implicit) { if (blk_level) {
bs = backing_bs(bs); bs = bdrv_skip_implicit_filters(bs);
assert(bs);
} }
if (bdrv_get_node_name(bs)[0]) { if (bdrv_get_node_name(bs)[0]) {
@ -555,14 +559,46 @@ static BlockStats *bdrv_query_bds_stats(BlockDriverState *bs,
s->has_driver_specific = true; s->has_driver_specific = true;
} }
if (bs->file) { parent_child = bdrv_primary_child(bs);
if (!parent_child ||
!(parent_child->role & (BDRV_CHILD_DATA | BDRV_CHILD_FILTERED)))
{
BdrvChild *c;
/*
* Look for a unique data-storing child. We do not need to look for
* filtered children, as there would be only one and it would have been
* the primary child.
*/
parent_child = NULL;
QLIST_FOREACH(c, &bs->children, next) {
if (c->role & BDRV_CHILD_DATA) {
if (parent_child) {
/*
* There are multiple data-storing children and we cannot
* choose between them.
*/
parent_child = NULL;
break;
}
parent_child = c;
}
}
}
if (parent_child) {
s->has_parent = true; s->has_parent = true;
s->parent = bdrv_query_bds_stats(bs->file->bs, blk_level); s->parent = bdrv_query_bds_stats(parent_child->bs, blk_level);
} }
if (blk_level && bs->backing) { filter_or_cow_bs = bdrv_filter_or_cow_bs(bs);
if (blk_level && filter_or_cow_bs) {
/*
* Put any filtered or COW child here (for backwards
* compatibility to when we put bs0->backing here, which might
* be either)
*/
s->has_backing = true; s->has_backing = true;
s->backing = bdrv_query_bds_stats(bs->backing->bs, blk_level); s->backing = bdrv_query_bds_stats(filter_or_cow_bs, blk_level);
} }
return s; return s;

View File

@ -1320,6 +1320,7 @@ static bool cluster_needs_new_alloc(BlockDriverState *bs, uint64_t l2_entry)
if (l2_entry & QCOW_OFLAG_COPIED) { if (l2_entry & QCOW_OFLAG_COPIED) {
return false; return false;
} }
/* fallthrough */
case QCOW2_CLUSTER_UNALLOCATED: case QCOW2_CLUSTER_UNALLOCATED:
case QCOW2_CLUSTER_COMPRESSED: case QCOW2_CLUSTER_COMPRESSED:
case QCOW2_CLUSTER_ZERO_PLAIN: case QCOW2_CLUSTER_ZERO_PLAIN:

View File

@ -147,6 +147,56 @@ bool bdrv_snapshot_find_by_id_and_name(BlockDriverState *bs,
return ret; return ret;
} }
/**
* Return a pointer to the child BDS pointer to which we can fall
* back if the given BDS does not support snapshots.
* Return NULL if there is no BDS to (safely) fall back to.
*
* We need to return an indirect pointer because bdrv_snapshot_goto()
* has to modify the BdrvChild pointer.
*/
static BdrvChild **bdrv_snapshot_fallback_ptr(BlockDriverState *bs)
{
BdrvChild **fallback;
BdrvChild *child;
/*
* The only BdrvChild pointers that are safe to modify (and which
* we can thus return a reference to) are bs->file and
* bs->backing.
*/
fallback = &bs->file;
if (!*fallback && bs->drv && bs->drv->is_filter) {
fallback = &bs->backing;
}
if (!*fallback) {
return NULL;
}
/*
* Check that there are no other children that would need to be
* snapshotted. If there are, it is not safe to fall back to
* *fallback.
*/
QLIST_FOREACH(child, &bs->children, next) {
if (child->role & (BDRV_CHILD_DATA | BDRV_CHILD_METADATA |
BDRV_CHILD_FILTERED) &&
child != *fallback)
{
return NULL;
}
}
return fallback;
}
static BlockDriverState *bdrv_snapshot_fallback(BlockDriverState *bs)
{
BdrvChild **child_ptr = bdrv_snapshot_fallback_ptr(bs);
return child_ptr ? (*child_ptr)->bs : NULL;
}
int bdrv_can_snapshot(BlockDriverState *bs) int bdrv_can_snapshot(BlockDriverState *bs)
{ {
BlockDriver *drv = bs->drv; BlockDriver *drv = bs->drv;
@ -155,8 +205,9 @@ int bdrv_can_snapshot(BlockDriverState *bs)
} }
if (!drv->bdrv_snapshot_create) { if (!drv->bdrv_snapshot_create) {
if (bs->file != NULL) { BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs);
return bdrv_can_snapshot(bs->file->bs); if (fallback_bs) {
return bdrv_can_snapshot(fallback_bs);
} }
return 0; return 0;
} }
@ -168,14 +219,15 @@ int bdrv_snapshot_create(BlockDriverState *bs,
QEMUSnapshotInfo *sn_info) QEMUSnapshotInfo *sn_info)
{ {
BlockDriver *drv = bs->drv; BlockDriver *drv = bs->drv;
BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs);
if (!drv) { if (!drv) {
return -ENOMEDIUM; return -ENOMEDIUM;
} }
if (drv->bdrv_snapshot_create) { if (drv->bdrv_snapshot_create) {
return drv->bdrv_snapshot_create(bs, sn_info); return drv->bdrv_snapshot_create(bs, sn_info);
} }
if (bs->file) { if (fallback_bs) {
return bdrv_snapshot_create(bs->file->bs, sn_info); return bdrv_snapshot_create(fallback_bs, sn_info);
} }
return -ENOTSUP; return -ENOTSUP;
} }
@ -185,6 +237,7 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
Error **errp) Error **errp)
{ {
BlockDriver *drv = bs->drv; BlockDriver *drv = bs->drv;
BdrvChild **fallback_ptr;
int ret, open_ret; int ret, open_ret;
if (!drv) { if (!drv) {
@ -205,39 +258,46 @@ int bdrv_snapshot_goto(BlockDriverState *bs,
return ret; return ret;
} }
if (bs->file) { fallback_ptr = bdrv_snapshot_fallback_ptr(bs);
BlockDriverState *file; if (fallback_ptr) {
QDict *options = qdict_clone_shallow(bs->options); QDict *options;
QDict *file_options; QDict *file_options;
Error *local_err = NULL; Error *local_err = NULL;
BlockDriverState *fallback_bs = (*fallback_ptr)->bs;
char *subqdict_prefix = g_strdup_printf("%s.", (*fallback_ptr)->name);
options = qdict_clone_shallow(bs->options);
file = bs->file->bs;
/* Prevent it from getting deleted when detached from bs */ /* Prevent it from getting deleted when detached from bs */
bdrv_ref(file); bdrv_ref(fallback_bs);
qdict_extract_subqdict(options, &file_options, "file."); qdict_extract_subqdict(options, &file_options, subqdict_prefix);
qobject_unref(file_options); qobject_unref(file_options);
qdict_put_str(options, "file", bdrv_get_node_name(file)); g_free(subqdict_prefix);
qdict_put_str(options, (*fallback_ptr)->name,
bdrv_get_node_name(fallback_bs));
if (drv->bdrv_close) { if (drv->bdrv_close) {
drv->bdrv_close(bs); drv->bdrv_close(bs);
} }
bdrv_unref_child(bs, bs->file);
bs->file = NULL;
ret = bdrv_snapshot_goto(file, snapshot_id, errp); bdrv_unref_child(bs, *fallback_ptr);
*fallback_ptr = NULL;
ret = bdrv_snapshot_goto(fallback_bs, snapshot_id, errp);
open_ret = drv->bdrv_open(bs, options, bs->open_flags, &local_err); open_ret = drv->bdrv_open(bs, options, bs->open_flags, &local_err);
qobject_unref(options); qobject_unref(options);
if (open_ret < 0) { if (open_ret < 0) {
bdrv_unref(file); bdrv_unref(fallback_bs);
bs->drv = NULL; bs->drv = NULL;
/* A bdrv_snapshot_goto() error takes precedence */ /* A bdrv_snapshot_goto() error takes precedence */
error_propagate(errp, local_err); error_propagate(errp, local_err);
return ret < 0 ? ret : open_ret; return ret < 0 ? ret : open_ret;
} }
assert(bs->file->bs == file); assert(fallback_bs == (*fallback_ptr)->bs);
bdrv_unref(file); bdrv_unref(fallback_bs);
return ret; return ret;
} }
@ -273,6 +333,7 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
Error **errp) Error **errp)
{ {
BlockDriver *drv = bs->drv; BlockDriver *drv = bs->drv;
BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs);
int ret; int ret;
if (!drv) { if (!drv) {
@ -289,8 +350,8 @@ int bdrv_snapshot_delete(BlockDriverState *bs,
if (drv->bdrv_snapshot_delete) { if (drv->bdrv_snapshot_delete) {
ret = drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp); ret = drv->bdrv_snapshot_delete(bs, snapshot_id, name, errp);
} else if (bs->file) { } else if (fallback_bs) {
ret = bdrv_snapshot_delete(bs->file->bs, snapshot_id, name, errp); ret = bdrv_snapshot_delete(fallback_bs, snapshot_id, name, errp);
} else { } else {
error_setg(errp, "Block format '%s' used by device '%s' " error_setg(errp, "Block format '%s' used by device '%s' "
"does not support internal snapshot deletion", "does not support internal snapshot deletion",
@ -306,14 +367,15 @@ int bdrv_snapshot_list(BlockDriverState *bs,
QEMUSnapshotInfo **psn_info) QEMUSnapshotInfo **psn_info)
{ {
BlockDriver *drv = bs->drv; BlockDriver *drv = bs->drv;
BlockDriverState *fallback_bs = bdrv_snapshot_fallback(bs);
if (!drv) { if (!drv) {
return -ENOMEDIUM; return -ENOMEDIUM;
} }
if (drv->bdrv_snapshot_list) { if (drv->bdrv_snapshot_list) {
return drv->bdrv_snapshot_list(bs, psn_info); return drv->bdrv_snapshot_list(bs, psn_info);
} }
if (bs->file) { if (fallback_bs) {
return bdrv_snapshot_list(bs->file->bs, psn_info); return bdrv_snapshot_list(fallback_bs, psn_info);
} }
return -ENOTSUP; return -ENOTSUP;
} }

View File

@ -31,7 +31,8 @@ enum {
typedef struct StreamBlockJob { typedef struct StreamBlockJob {
BlockJob common; BlockJob common;
BlockDriverState *bottom; BlockDriverState *base_overlay; /* COW overlay (stream from this) */
BlockDriverState *above_base; /* Node directly above the base */
BlockdevOnError on_error; BlockdevOnError on_error;
char *backing_file_str; char *backing_file_str;
bool bs_read_only; bool bs_read_only;
@ -53,7 +54,7 @@ static void stream_abort(Job *job)
if (s->chain_frozen) { if (s->chain_frozen) {
BlockJob *bjob = &s->common; BlockJob *bjob = &s->common;
bdrv_unfreeze_backing_chain(blk_bs(bjob->blk), s->bottom); bdrv_unfreeze_backing_chain(blk_bs(bjob->blk), s->above_base);
} }
} }
@ -62,14 +63,15 @@ static int stream_prepare(Job *job)
StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
BlockJob *bjob = &s->common; BlockJob *bjob = &s->common;
BlockDriverState *bs = blk_bs(bjob->blk); BlockDriverState *bs = blk_bs(bjob->blk);
BlockDriverState *base = backing_bs(s->bottom); BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
BlockDriverState *base = bdrv_filter_or_cow_bs(s->above_base);
Error *local_err = NULL; Error *local_err = NULL;
int ret = 0; int ret = 0;
bdrv_unfreeze_backing_chain(bs, s->bottom); bdrv_unfreeze_backing_chain(bs, s->above_base);
s->chain_frozen = false; s->chain_frozen = false;
if (bs->backing) { if (bdrv_cow_child(unfiltered_bs)) {
const char *base_id = NULL, *base_fmt = NULL; const char *base_id = NULL, *base_fmt = NULL;
if (base) { if (base) {
base_id = s->backing_file_str; base_id = s->backing_file_str;
@ -77,8 +79,8 @@ static int stream_prepare(Job *job)
base_fmt = base->drv->format_name; base_fmt = base->drv->format_name;
} }
} }
bdrv_set_backing_hd(bs, base, &local_err); bdrv_set_backing_hd(unfiltered_bs, base, &local_err);
ret = bdrv_change_backing_file(bs, base_id, base_fmt, false); ret = bdrv_change_backing_file(unfiltered_bs, base_id, base_fmt, false);
if (local_err) { if (local_err) {
error_report_err(local_err); error_report_err(local_err);
return -EPERM; return -EPERM;
@ -109,14 +111,15 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
StreamBlockJob *s = container_of(job, StreamBlockJob, common.job); StreamBlockJob *s = container_of(job, StreamBlockJob, common.job);
BlockBackend *blk = s->common.blk; BlockBackend *blk = s->common.blk;
BlockDriverState *bs = blk_bs(blk); BlockDriverState *bs = blk_bs(blk);
bool enable_cor = !backing_bs(s->bottom); BlockDriverState *unfiltered_bs = bdrv_skip_filters(bs);
bool enable_cor = !bdrv_cow_child(s->base_overlay);
int64_t len; int64_t len;
int64_t offset = 0; int64_t offset = 0;
uint64_t delay_ns = 0; uint64_t delay_ns = 0;
int error = 0; int error = 0;
int64_t n = 0; /* bytes */ int64_t n = 0; /* bytes */
if (bs == s->bottom) { if (unfiltered_bs == s->base_overlay) {
/* Nothing to stream */ /* Nothing to stream */
return 0; return 0;
} }
@ -150,13 +153,14 @@ static int coroutine_fn stream_run(Job *job, Error **errp)
copy = false; copy = false;
ret = bdrv_is_allocated(bs, offset, STREAM_CHUNK, &n); ret = bdrv_is_allocated(unfiltered_bs, offset, STREAM_CHUNK, &n);
if (ret == 1) { if (ret == 1) {
/* Allocated in the top, no need to copy. */ /* Allocated in the top, no need to copy. */
} else if (ret >= 0) { } else if (ret >= 0) {
/* Copy if allocated in the intermediate images. Limit to the /* Copy if allocated in the intermediate images. Limit to the
* known-unallocated area [offset, offset+n*BDRV_SECTOR_SIZE). */ * known-unallocated area [offset, offset+n*BDRV_SECTOR_SIZE). */
ret = bdrv_is_allocated_above(backing_bs(bs), s->bottom, true, ret = bdrv_is_allocated_above(bdrv_cow_bs(unfiltered_bs),
s->base_overlay, true,
offset, n, &n); offset, n, &n);
/* Finish early if end of backing file has been reached */ /* Finish early if end of backing file has been reached */
if (ret == 0 && n == 0) { if (ret == 0 && n == 0) {
@ -223,9 +227,29 @@ void stream_start(const char *job_id, BlockDriverState *bs,
BlockDriverState *iter; BlockDriverState *iter;
bool bs_read_only; bool bs_read_only;
int basic_flags = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED; int basic_flags = BLK_PERM_CONSISTENT_READ | BLK_PERM_WRITE_UNCHANGED;
BlockDriverState *bottom = bdrv_find_overlay(bs, base); BlockDriverState *base_overlay = bdrv_find_overlay(bs, base);
BlockDriverState *above_base;
if (bdrv_freeze_backing_chain(bs, bottom, errp) < 0) { if (!base_overlay) {
error_setg(errp, "'%s' is not in the backing chain of '%s'",
base->node_name, bs->node_name);
return;
}
/*
* Find the node directly above @base. @base_overlay is a COW overlay, so
* it must have a bdrv_cow_child(), but it is the immediate overlay of
* @base, so between the two there can only be filters.
*/
above_base = base_overlay;
if (bdrv_cow_bs(above_base) != base) {
above_base = bdrv_cow_bs(above_base);
while (bdrv_filter_bs(above_base) != base) {
above_base = bdrv_filter_bs(above_base);
}
}
if (bdrv_freeze_backing_chain(bs, above_base, errp) < 0) {
return; return;
} }
@ -255,14 +279,19 @@ void stream_start(const char *job_id, BlockDriverState *bs,
* and resizes. Reassign the base node pointer because the backing BS of the * and resizes. Reassign the base node pointer because the backing BS of the
* bottom node might change after the call to bdrv_reopen_set_read_only() * bottom node might change after the call to bdrv_reopen_set_read_only()
* due to parallel block jobs running. * due to parallel block jobs running.
* above_base node might change after the call to
* bdrv_reopen_set_read_only() due to parallel block jobs running.
*/ */
base = backing_bs(bottom); base = bdrv_filter_or_cow_bs(above_base);
for (iter = backing_bs(bs); iter && iter != base; iter = backing_bs(iter)) { for (iter = bdrv_filter_or_cow_bs(bs); iter != base;
iter = bdrv_filter_or_cow_bs(iter))
{
block_job_add_bdrv(&s->common, "intermediate node", iter, 0, block_job_add_bdrv(&s->common, "intermediate node", iter, 0,
basic_flags, &error_abort); basic_flags, &error_abort);
} }
s->bottom = bottom; s->base_overlay = base_overlay;
s->above_base = above_base;
s->backing_file_str = g_strdup(backing_file_str); s->backing_file_str = g_strdup(backing_file_str);
s->bs_read_only = bs_read_only; s->bs_read_only = bs_read_only;
s->chain_frozen = true; s->chain_frozen = true;
@ -276,5 +305,5 @@ fail:
if (bs_read_only) { if (bs_read_only) {
bdrv_reopen_set_read_only(bs, true, NULL); bdrv_reopen_set_read_only(bs, true, NULL);
} }
bdrv_unfreeze_backing_chain(bs, bottom); bdrv_unfreeze_backing_chain(bs, above_base);
} }

View File

@ -151,6 +151,15 @@ static int coroutine_fn throttle_co_pdiscard(BlockDriverState *bs,
return bdrv_co_pdiscard(bs->file, offset, bytes); return bdrv_co_pdiscard(bs->file, offset, bytes);
} }
static int coroutine_fn throttle_co_pwritev_compressed(BlockDriverState *bs,
uint64_t offset,
uint64_t bytes,
QEMUIOVector *qiov)
{
return throttle_co_pwritev(bs, offset, bytes, qiov,
BDRV_REQ_WRITE_COMPRESSED);
}
static int throttle_co_flush(BlockDriverState *bs) static int throttle_co_flush(BlockDriverState *bs)
{ {
return bdrv_co_flush(bs->file->bs); return bdrv_co_flush(bs->file->bs);
@ -243,6 +252,7 @@ static BlockDriver bdrv_throttle = {
.bdrv_co_pwrite_zeroes = throttle_co_pwrite_zeroes, .bdrv_co_pwrite_zeroes = throttle_co_pwrite_zeroes,
.bdrv_co_pdiscard = throttle_co_pdiscard, .bdrv_co_pdiscard = throttle_co_pdiscard,
.bdrv_co_pwritev_compressed = throttle_co_pwritev_compressed,
.bdrv_attach_aio_context = throttle_attach_aio_context, .bdrv_attach_aio_context = throttle_attach_aio_context,
.bdrv_detach_aio_context = throttle_detach_aio_context, .bdrv_detach_aio_context = throttle_detach_aio_context,
@ -250,7 +260,6 @@ static BlockDriver bdrv_throttle = {
.bdrv_reopen_prepare = throttle_reopen_prepare, .bdrv_reopen_prepare = throttle_reopen_prepare,
.bdrv_reopen_commit = throttle_reopen_commit, .bdrv_reopen_commit = throttle_reopen_commit,
.bdrv_reopen_abort = throttle_reopen_abort, .bdrv_reopen_abort = throttle_reopen_abort,
.bdrv_co_block_status = bdrv_co_block_status_from_file,
.bdrv_co_drain_begin = throttle_co_drain_begin, .bdrv_co_drain_begin = throttle_co_drain_begin,
.bdrv_co_drain_end = throttle_co_drain_end, .bdrv_co_drain_end = throttle_co_drain_end,

View File

@ -2803,21 +2803,6 @@ static void vmdk_close(BlockDriverState *bs)
error_free(s->migration_blocker); error_free(s->migration_blocker);
} }
static coroutine_fn int vmdk_co_flush(BlockDriverState *bs)
{
BDRVVmdkState *s = bs->opaque;
int i, err;
int ret = 0;
for (i = 0; i < s->num_extents; i++) {
err = bdrv_co_flush(s->extents[i].file->bs);
if (err < 0) {
ret = err;
}
}
return ret;
}
static int64_t vmdk_get_allocated_file_size(BlockDriverState *bs) static int64_t vmdk_get_allocated_file_size(BlockDriverState *bs)
{ {
int i; int i;
@ -3081,7 +3066,6 @@ static BlockDriver bdrv_vmdk = {
.bdrv_close = vmdk_close, .bdrv_close = vmdk_close,
.bdrv_co_create_opts = vmdk_co_create_opts, .bdrv_co_create_opts = vmdk_co_create_opts,
.bdrv_co_create = vmdk_co_create, .bdrv_co_create = vmdk_co_create,
.bdrv_co_flush_to_disk = vmdk_co_flush,
.bdrv_co_block_status = vmdk_co_block_status, .bdrv_co_block_status = vmdk_co_block_status,
.bdrv_get_allocated_file_size = vmdk_get_allocated_file_size, .bdrv_get_allocated_file_size = vmdk_get_allocated_file_size,
.bdrv_has_zero_init = vmdk_has_zero_init, .bdrv_has_zero_init = vmdk_has_zero_init,

View File

@ -1562,7 +1562,12 @@ static void external_snapshot_prepare(BlkActionState *common,
goto out; goto out;
} }
if (state->new_bs->backing != NULL) { if (state->new_bs->drv->is_filter) {
error_setg(errp, "Filters cannot be used as overlays");
goto out;
}
if (bdrv_cow_child(state->new_bs)) {
error_setg(errp, "The overlay already has a backing image"); error_setg(errp, "The overlay already has a backing image");
goto out; goto out;
} }
@ -1736,7 +1741,13 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp)
* on top of. * on top of.
*/ */
if (backup->sync == MIRROR_SYNC_MODE_TOP) { if (backup->sync == MIRROR_SYNC_MODE_TOP) {
source = backing_bs(bs); /*
* Backup will not replace the source by the target, so none
* of the filters skipped here will be removed (in contrast to
* mirror). Therefore, we can skip all of them when looking
* for the first COW relationship.
*/
source = bdrv_cow_bs(bdrv_skip_filters(bs));
if (!source) { if (!source) {
backup->sync = MIRROR_SYNC_MODE_FULL; backup->sync = MIRROR_SYNC_MODE_FULL;
} }
@ -1756,9 +1767,14 @@ static void drive_backup_prepare(BlkActionState *common, Error **errp)
if (backup->mode != NEW_IMAGE_MODE_EXISTING) { if (backup->mode != NEW_IMAGE_MODE_EXISTING) {
assert(backup->format); assert(backup->format);
if (source) { if (source) {
bdrv_refresh_filename(source); /* Implicit filters should not appear in the filename */
bdrv_img_create(backup->target, backup->format, source->filename, BlockDriverState *explicit_backing =
source->drv->format_name, NULL, bdrv_skip_implicit_filters(source);
bdrv_refresh_filename(explicit_backing);
bdrv_img_create(backup->target, backup->format,
explicit_backing->filename,
explicit_backing->drv->format_name, NULL,
size, flags, false, &local_err); size, flags, false, &local_err);
} else { } else {
bdrv_img_create(backup->target, backup->format, NULL, NULL, NULL, bdrv_img_create(backup->target, backup->format, NULL, NULL, NULL,
@ -2528,7 +2544,9 @@ void qmp_block_stream(bool has_job_id, const char *job_id, const char *device,
} }
/* Check for op blockers in the whole chain between bs and base */ /* Check for op blockers in the whole chain between bs and base */
for (iter = bs; iter && iter != base_bs; iter = backing_bs(iter)) { for (iter = bs; iter && iter != base_bs;
iter = bdrv_filter_or_cow_bs(iter))
{
if (bdrv_op_is_blocked(iter, BLOCK_OP_TYPE_STREAM, errp)) { if (bdrv_op_is_blocked(iter, BLOCK_OP_TYPE_STREAM, errp)) {
goto out; goto out;
} }
@ -2584,6 +2602,7 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
AioContext *aio_context; AioContext *aio_context;
Error *local_err = NULL; Error *local_err = NULL;
int job_flags = JOB_DEFAULT; int job_flags = JOB_DEFAULT;
uint64_t top_perm, top_shared;
if (!has_speed) { if (!has_speed) {
speed = 0; speed = 0;
@ -2685,7 +2704,9 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
assert(bdrv_get_aio_context(base_bs) == aio_context); assert(bdrv_get_aio_context(base_bs) == aio_context);
for (iter = top_bs; iter != backing_bs(base_bs); iter = backing_bs(iter)) { for (iter = top_bs; iter != bdrv_filter_or_cow_bs(base_bs);
iter = bdrv_filter_or_cow_bs(iter))
{
if (bdrv_op_is_blocked(iter, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) { if (bdrv_op_is_blocked(iter, BLOCK_OP_TYPE_COMMIT_TARGET, errp)) {
goto out; goto out;
} }
@ -2697,14 +2718,38 @@ void qmp_block_commit(bool has_job_id, const char *job_id, const char *device,
goto out; goto out;
} }
if (top_bs == bs) { /*
* Active commit is required if and only if someone has taken a
* WRITE permission on the top node. Historically, we have always
* used active commit for top nodes, so continue that practice
* lest we possibly break clients that rely on this behavior, e.g.
* to later attach this node to a writing parent.
* (Active commit is never really wrong.)
*/
bdrv_get_cumulative_perm(top_bs, &top_perm, &top_shared);
if (top_perm & BLK_PERM_WRITE ||
bdrv_skip_filters(top_bs) == bdrv_skip_filters(bs))
{
if (has_backing_file) { if (has_backing_file) {
if (bdrv_skip_filters(top_bs) == bdrv_skip_filters(bs)) {
error_setg(errp, "'backing-file' specified," error_setg(errp, "'backing-file' specified,"
" but 'top' is the active layer"); " but 'top' is the active layer");
} else {
error_setg(errp, "'backing-file' specified, but 'top' has a "
"writer on it");
}
goto out; goto out;
} }
commit_active_start(has_job_id ? job_id : NULL, bs, base_bs, if (!has_job_id) {
job_flags, speed, on_error, /*
* Emulate here what block_job_create() does, because it
* is possible that @bs != @top_bs (the block job should
* be named after @bs, even if @top_bs is the actual
* source)
*/
job_id = bdrv_get_device_name(bs);
}
commit_active_start(job_id, top_bs, base_bs, job_flags, speed, on_error,
filter_node_name, NULL, NULL, false, &local_err); filter_node_name, NULL, NULL, false, &local_err);
} else { } else {
BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs); BlockDriverState *overlay_bs = bdrv_find_overlay(bs, top_bs);
@ -2892,6 +2937,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
bool has_auto_dismiss, bool auto_dismiss, bool has_auto_dismiss, bool auto_dismiss,
Error **errp) Error **errp)
{ {
BlockDriverState *unfiltered_bs;
int job_flags = JOB_DEFAULT; int job_flags = JOB_DEFAULT;
if (!has_speed) { if (!has_speed) {
@ -2943,10 +2989,19 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
return; return;
} }
if (!bs->backing && sync == MIRROR_SYNC_MODE_TOP) { if (!bdrv_backing_chain_next(bs) && sync == MIRROR_SYNC_MODE_TOP) {
sync = MIRROR_SYNC_MODE_FULL; sync = MIRROR_SYNC_MODE_FULL;
} }
if (!has_replaces) {
/* We want to mirror from @bs, but keep implicit filters on top */
unfiltered_bs = bdrv_skip_implicit_filters(bs);
if (unfiltered_bs != bs) {
replaces = unfiltered_bs->node_name;
has_replaces = true;
}
}
if (has_replaces) { if (has_replaces) {
BlockDriverState *to_replace_bs; BlockDriverState *to_replace_bs;
AioContext *replace_aio_context; AioContext *replace_aio_context;
@ -2993,7 +3048,7 @@ static void blockdev_mirror_common(const char *job_id, BlockDriverState *bs,
void qmp_drive_mirror(DriveMirror *arg, Error **errp) void qmp_drive_mirror(DriveMirror *arg, Error **errp)
{ {
BlockDriverState *bs; BlockDriverState *bs;
BlockDriverState *source, *target_bs; BlockDriverState *target_backing_bs, *target_bs;
AioContext *aio_context; AioContext *aio_context;
AioContext *old_context; AioContext *old_context;
BlockMirrorBackingMode backing_mode; BlockMirrorBackingMode backing_mode;
@ -3028,12 +3083,12 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
} }
flags = bs->open_flags | BDRV_O_RDWR; flags = bs->open_flags | BDRV_O_RDWR;
source = backing_bs(bs); target_backing_bs = bdrv_cow_bs(bdrv_skip_filters(bs));
if (!source && arg->sync == MIRROR_SYNC_MODE_TOP) { if (!target_backing_bs && arg->sync == MIRROR_SYNC_MODE_TOP) {
arg->sync = MIRROR_SYNC_MODE_FULL; arg->sync = MIRROR_SYNC_MODE_FULL;
} }
if (arg->sync == MIRROR_SYNC_MODE_NONE) { if (arg->sync == MIRROR_SYNC_MODE_NONE) {
source = bs; target_backing_bs = bs;
} }
size = bdrv_getlength(bs); size = bdrv_getlength(bs);
@ -3059,7 +3114,7 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
/* Don't open backing image in create() */ /* Don't open backing image in create() */
flags |= BDRV_O_NO_BACKING; flags |= BDRV_O_NO_BACKING;
if ((arg->sync == MIRROR_SYNC_MODE_FULL || !source) if ((arg->sync == MIRROR_SYNC_MODE_FULL || !target_backing_bs)
&& arg->mode != NEW_IMAGE_MODE_EXISTING) && arg->mode != NEW_IMAGE_MODE_EXISTING)
{ {
/* create new image w/o backing file */ /* create new image w/o backing file */
@ -3067,15 +3122,19 @@ void qmp_drive_mirror(DriveMirror *arg, Error **errp)
bdrv_img_create(arg->target, format, bdrv_img_create(arg->target, format,
NULL, NULL, NULL, size, flags, false, &local_err); NULL, NULL, NULL, size, flags, false, &local_err);
} else { } else {
/* Implicit filters should not appear in the filename */
BlockDriverState *explicit_backing =
bdrv_skip_implicit_filters(target_backing_bs);
switch (arg->mode) { switch (arg->mode) {
case NEW_IMAGE_MODE_EXISTING: case NEW_IMAGE_MODE_EXISTING:
break; break;
case NEW_IMAGE_MODE_ABSOLUTE_PATHS: case NEW_IMAGE_MODE_ABSOLUTE_PATHS:
/* create new image with backing file */ /* create new image with backing file */
bdrv_refresh_filename(source); bdrv_refresh_filename(explicit_backing);
bdrv_img_create(arg->target, format, bdrv_img_create(arg->target, format,
source->filename, explicit_backing->filename,
source->drv->format_name, explicit_backing->drv->format_name,
NULL, size, flags, false, &local_err); NULL, size, flags, false, &local_err);
break; break;
default: default:

View File

@ -532,7 +532,7 @@ BlockDriverState *bdrv_next(BdrvNextIterator *it);
void bdrv_next_cleanup(BdrvNextIterator *it); void bdrv_next_cleanup(BdrvNextIterator *it);
BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs); BlockDriverState *bdrv_next_monitor_owned(BlockDriverState *bs);
bool bdrv_is_encrypted(BlockDriverState *bs); bool bdrv_supports_compressed_writes(BlockDriverState *bs);
void bdrv_iterate_format(void (*it)(void *opaque, const char *name), void bdrv_iterate_format(void (*it)(void *opaque, const char *name),
void *opaque, bool read_only); void *opaque, bool read_only);
const char *bdrv_get_node_name(const BlockDriverState *bs); const char *bdrv_get_node_name(const BlockDriverState *bs);

View File

@ -92,9 +92,17 @@ struct BlockDriver {
int instance_size; int instance_size;
/* set to true if the BlockDriver is a block filter. Block filters pass /* set to true if the BlockDriver is a block filter. Block filters pass
* certain callbacks that refer to data (see block.c) to their bs->file if * certain callbacks that refer to data (see block.c) to their bs->file
* the driver doesn't implement them. Drivers that do not wish to forward * or bs->backing (whichever one exists) if the driver doesn't implement
* must implement them and return -ENOTSUP. * them. Drivers that do not wish to forward must implement them and return
* -ENOTSUP.
* Note that filters are not allowed to modify data.
*
* Filters generally cannot have more than a single filtered child,
* because the data they present must at all times be the same as
* that on their filtered child. That would be impossible to
* achieve for multiple filtered children.
* (And this filtered child must then be bs->file or bs->backing.)
*/ */
bool is_filter; bool is_filter;
/* /*
@ -839,11 +847,20 @@ struct BlockDriverState {
bool walking_aio_notifiers; /* to make removal during iteration safe */ bool walking_aio_notifiers; /* to make removal during iteration safe */
char filename[PATH_MAX]; char filename[PATH_MAX];
char backing_file[PATH_MAX]; /* if non zero, the image is a diff of /*
this file image */ * If not empty, this image is a diff in relation to backing_file.
/* The backing filename indicated by the image header; if we ever * Note that this is the name given in the image header and
* open this file, then this is replaced by the resulting BDS's * therefore may or may not be equal to .backing->bs->filename.
* filename (i.e. after a bdrv_refresh_filename() run). */ * If this field contains a relative path, it is to be resolved
* relatively to the overlay's location.
*/
char backing_file[PATH_MAX];
/*
* The backing filename indicated by the image header. Contrary
* to backing_file, if we ever open this file, auto_backing_file
* is replaced by the resulting BDS's filename (i.e. after a
* bdrv_refresh_filename() run).
*/
char auto_backing_file[PATH_MAX]; char auto_backing_file[PATH_MAX];
char backing_format[16]; /* if non-zero and backing_file exists */ char backing_format[16]; /* if non-zero and backing_file exists */
@ -995,11 +1012,6 @@ typedef enum BlockMirrorBackingMode {
MIRROR_LEAVE_BACKING_CHAIN, MIRROR_LEAVE_BACKING_CHAIN,
} BlockMirrorBackingMode; } BlockMirrorBackingMode;
static inline BlockDriverState *backing_bs(BlockDriverState *bs)
{
return bs->backing ? bs->backing->bs : NULL;
}
/* Essential block drivers which must always be statically linked into qemu, and /* Essential block drivers which must always be statically linked into qemu, and
* which therefore can be accessed without using bdrv_find_format() */ * which therefore can be accessed without using bdrv_find_format() */
@ -1050,6 +1062,8 @@ BlockDriver *bdrv_probe_all(const uint8_t *buf, int buf_size,
void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix, void bdrv_parse_filename_strip_prefix(const char *filename, const char *prefix,
QDict *options); QDict *options);
bool bdrv_backing_overridden(BlockDriverState *bs);
/** /**
* bdrv_add_before_write_notifier: * bdrv_add_before_write_notifier:
@ -1300,28 +1314,6 @@ void bdrv_default_perms(BlockDriverState *bs, BdrvChild *c,
uint64_t perm, uint64_t shared, uint64_t perm, uint64_t shared,
uint64_t *nperm, uint64_t *nshared); uint64_t *nperm, uint64_t *nshared);
/*
* Default implementation for drivers to pass bdrv_co_block_status() to
* their file.
*/
int coroutine_fn bdrv_co_block_status_from_file(BlockDriverState *bs,
bool want_zero,
int64_t offset,
int64_t bytes,
int64_t *pnum,
int64_t *map,
BlockDriverState **file);
/*
* Default implementation for drivers to pass bdrv_co_block_status() to
* their backing file.
*/
int coroutine_fn bdrv_co_block_status_from_backing(BlockDriverState *bs,
bool want_zero,
int64_t offset,
int64_t bytes,
int64_t *pnum,
int64_t *map,
BlockDriverState **file);
const char *bdrv_get_parent_name(const BlockDriverState *bs); const char *bdrv_get_parent_name(const BlockDriverState *bs);
void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp); void blk_dev_change_media_cb(BlockBackend *blk, bool load, Error **errp);
bool blk_dev_has_removable_media(BlockBackend *blk); bool blk_dev_has_removable_media(BlockBackend *blk);
@ -1382,4 +1374,37 @@ BdrvDirtyBitmap *block_dirty_bitmap_remove(const char *node, const char *name,
BlockDriverState **bitmap_bs, BlockDriverState **bitmap_bs,
Error **errp); Error **errp);
BdrvChild *bdrv_cow_child(BlockDriverState *bs);
BdrvChild *bdrv_filter_child(BlockDriverState *bs);
BdrvChild *bdrv_filter_or_cow_child(BlockDriverState *bs);
BdrvChild *bdrv_primary_child(BlockDriverState *bs);
BlockDriverState *bdrv_skip_implicit_filters(BlockDriverState *bs);
BlockDriverState *bdrv_skip_filters(BlockDriverState *bs);
BlockDriverState *bdrv_backing_chain_next(BlockDriverState *bs);
static inline BlockDriverState *child_bs(BdrvChild *child)
{
return child ? child->bs : NULL;
}
static inline BlockDriverState *bdrv_cow_bs(BlockDriverState *bs)
{
return child_bs(bdrv_cow_child(bs));
}
static inline BlockDriverState *bdrv_filter_bs(BlockDriverState *bs)
{
return child_bs(bdrv_filter_child(bs));
}
static inline BlockDriverState *bdrv_filter_or_cow_bs(BlockDriverState *bs)
{
return child_bs(bdrv_filter_or_cow_child(bs));
}
static inline BlockDriverState *bdrv_primary_bs(BlockDriverState *bs)
{
return child_bs(bdrv_primary_child(bs));
}
#endif /* BLOCK_INT_H */ #endif /* BLOCK_INT_H */

View File

@ -615,13 +615,7 @@ static int init_dirty_bitmap_migration(DBMSaveState *s)
while (bs && bs->drv && bs->drv->is_filter && while (bs && bs->drv && bs->drv->is_filter &&
!bdrv_has_named_bitmaps(bs)) !bdrv_has_named_bitmaps(bs))
{ {
if (bs->backing) { bs = bdrv_filter_bs(bs);
bs = bs->backing->bs;
} else if (bs->file) {
bs = bs->file->bs;
} else {
bs = NULL;
}
} }
if (bs && bs->drv && !bs->drv->is_filter) { if (bs && bs->drv && !bs->drv->is_filter) {

View File

@ -1567,13 +1567,13 @@ NBDExport *nbd_export_new(BlockDriverState *bs, uint64_t dev_offset,
if (bitmap) { if (bitmap) {
BdrvDirtyBitmap *bm = NULL; BdrvDirtyBitmap *bm = NULL;
while (true) { while (bs) {
bm = bdrv_find_dirty_bitmap(bs, bitmap); bm = bdrv_find_dirty_bitmap(bs, bitmap);
if (bm != NULL || bs->backing == NULL) { if (bm != NULL) {
break; break;
} }
bs = bs->backing->bs; bs = bdrv_filter_or_cow_bs(bs);
} }
if (bm == NULL) { if (bm == NULL) {

View File

@ -1569,6 +1569,18 @@
# Live commit of data from overlay image nodes into backing nodes - i.e., # Live commit of data from overlay image nodes into backing nodes - i.e.,
# writes data between 'top' and 'base' into 'base'. # writes data between 'top' and 'base' into 'base'.
# #
# If top == base, that is an error.
# If top has no overlays on top of it, or if it is in use by a writer,
# the job will not be completed by itself. The user needs to complete
# the job with the block-job-complete command after getting the ready
# event. (Since 2.0)
#
# If the base image is smaller than top, then the base image will be
# resized to be the same size as top. If top is smaller than the base
# image, the base will not be truncated. If you want the base image
# size to match the size of the smaller top, you can safely truncate
# it yourself once the commit operation successfully completes.
#
# @job-id: identifier for the newly-created block job. If # @job-id: identifier for the newly-created block job. If
# omitted, the device name will be used. (Since 2.7) # omitted, the device name will be used. (Since 2.7)
# #
@ -1593,14 +1605,15 @@
# accepted # accepted
# #
# @backing-file: The backing file string to write into the overlay # @backing-file: The backing file string to write into the overlay
# image of 'top'. If 'top' is the active layer, # image of 'top'. If 'top' does not have an overlay
# specifying a backing file string is an error. This # image, or if 'top' is in use by a writer, specifying
# filename is not validated. # a backing file string is an error.
# #
# If a pathname string is such that it cannot be # This filename is not validated. If a pathname string
# resolved by QEMU, that means that subsequent QMP or # is such that it cannot be resolved by QEMU, that
# HMP commands must use node-names for the image in # means that subsequent QMP or HMP commands must use
# question, as filename lookup methods will fail. # node-names for the image in question, as filename
# lookup methods will fail.
# #
# If not specified, QEMU will automatically determine # If not specified, QEMU will automatically determine
# the backing file string to use, or error out if # the backing file string to use, or error out if
@ -1609,18 +1622,6 @@
# filename or protocol. # filename or protocol.
# (Since 2.1) # (Since 2.1)
# #
# If top == base, that is an error.
# If top == active, the job will not be completed by itself,
# user needs to complete the job with the block-job-complete
# command after getting the ready event. (Since 2.0)
#
# If the base image is smaller than top, then the base image
# will be resized to be the same size as top. If top is
# smaller than the base image, the base will not be
# truncated. If you want the base image size to match the
# size of the smaller top, you can safely truncate it
# yourself once the commit operation successfully completes.
#
# @speed: the maximum speed, in bytes per second # @speed: the maximum speed, in bytes per second
# #
# @on-error: the action to take on an error. 'ignore' means that the request # @on-error: the action to take on an error. 'ignore' means that the request
@ -1948,7 +1949,8 @@
# #
# @replaces: with sync=full graph node name to be replaced by the new # @replaces: with sync=full graph node name to be replaced by the new
# image when a whole image copy is done. This can be used to repair # image when a whole image copy is done. This can be used to repair
# broken Quorum files. (Since 2.1) # broken Quorum files. By default, @device is replaced, although
# implicitly created filters on it are kept. (Since 2.1)
# #
# @mode: whether and how QEMU should create a new image, default is # @mode: whether and how QEMU should create a new image, default is
# 'absolute-paths'. # 'absolute-paths'.
@ -2259,7 +2261,8 @@
# #
# @replaces: with sync=full graph node name to be replaced by the new # @replaces: with sync=full graph node name to be replaced by the new
# image when a whole image copy is done. This can be used to repair # image when a whole image copy is done. This can be used to repair
# broken Quorum files. # broken Quorum files. By default, @device is replaced, although
# implicitly created filters on it are kept.
# #
# @speed: the maximum speed, in bytes per second # @speed: the maximum speed, in bytes per second
# #
@ -2484,13 +2487,20 @@
# of 'device'. # of 'device'.
# #
# If a base file is specified then sectors are not copied from that base file and # If a base file is specified then sectors are not copied from that base file and
# its backing chain. When streaming completes the image file will have the base # its backing chain. This can be used to stream a subset of the backing file
# file as its backing file. This can be used to stream a subset of the backing # chain instead of flattening the entire image.
# file chain instead of flattening the entire image. # When streaming completes the image file will have the base file as its backing
# file, unless that node was changed while the job was running. In that case,
# base's parent's backing (or filtered, whichever exists) child (i.e., base at
# the beginning of the job) will be the new backing file.
# #
# On successful completion the image file is updated to drop the backing file # On successful completion the image file is updated to drop the backing file
# and the BLOCK_JOB_COMPLETED event is emitted. # and the BLOCK_JOB_COMPLETED event is emitted.
# #
# In case @device is a filter node, block-stream modifies the first non-filter
# overlay node below it to point to the new backing node instead of modifying
# @device itself.
#
# @job-id: identifier for the newly-created block job. If # @job-id: identifier for the newly-created block job. If
# omitted, the device name will be used. (Since 2.7) # omitted, the device name will be used. (Since 2.7)
# #

View File

@ -1085,7 +1085,7 @@ static int img_commit(int argc, char **argv)
/* This is different from QMP, which by default uses the deepest file in /* This is different from QMP, which by default uses the deepest file in
* the backing chain (i.e., the very base); however, the traditional * the backing chain (i.e., the very base); however, the traditional
* behavior of qemu-img commit is using the immediate backing file. */ * behavior of qemu-img commit is using the immediate backing file. */
base_bs = backing_bs(bs); base_bs = bdrv_backing_chain_next(bs);
if (!base_bs) { if (!base_bs) {
error_setg(&local_err, "Image does not have a backing file"); error_setg(&local_err, "Image does not have a backing file");
goto done; goto done;
@ -1732,18 +1732,20 @@ static int convert_iteration_sectors(ImgConvertState *s, int64_t sector_num)
if (s->sector_next_status <= sector_num) { if (s->sector_next_status <= sector_num) {
uint64_t offset = (sector_num - src_cur_offset) * BDRV_SECTOR_SIZE; uint64_t offset = (sector_num - src_cur_offset) * BDRV_SECTOR_SIZE;
int64_t count; int64_t count;
BlockDriverState *src_bs = blk_bs(s->src[src_cur]);
BlockDriverState *base;
if (s->target_has_backing) {
base = bdrv_cow_bs(bdrv_skip_filters(src_bs));
} else {
base = NULL;
}
do { do {
count = n * BDRV_SECTOR_SIZE; count = n * BDRV_SECTOR_SIZE;
if (s->target_has_backing) { ret = bdrv_block_status_above(src_bs, base, offset, count, &count,
ret = bdrv_block_status(blk_bs(s->src[src_cur]), offset, NULL, NULL);
count, &count, NULL, NULL);
} else {
ret = bdrv_block_status_above(blk_bs(s->src[src_cur]), NULL,
offset, count, &count, NULL,
NULL);
}
if (ret < 0) { if (ret < 0) {
if (s->salvage) { if (s->salvage) {
@ -2665,7 +2667,8 @@ static int img_convert(int argc, char **argv)
* s.target_backing_sectors has to be negative, which it will * s.target_backing_sectors has to be negative, which it will
* be automatically). The backing file length is used only * be automatically). The backing file length is used only
* for optimizations, so such a case is not fatal. */ * for optimizations, so such a case is not fatal. */
s.target_backing_sectors = bdrv_nb_sectors(out_bs->backing->bs); s.target_backing_sectors =
bdrv_nb_sectors(bdrv_backing_chain_next(out_bs));
} else { } else {
s.target_backing_sectors = -1; s.target_backing_sectors = -1;
} }
@ -3035,6 +3038,7 @@ static int get_block_status(BlockDriverState *bs, int64_t offset,
depth = 0; depth = 0;
for (;;) { for (;;) {
bs = bdrv_skip_filters(bs);
ret = bdrv_block_status(bs, offset, bytes, &bytes, &map, &file); ret = bdrv_block_status(bs, offset, bytes, &bytes, &map, &file);
if (ret < 0) { if (ret < 0) {
return ret; return ret;
@ -3043,7 +3047,7 @@ static int get_block_status(BlockDriverState *bs, int64_t offset,
if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) { if (ret & (BDRV_BLOCK_ZERO|BDRV_BLOCK_DATA)) {
break; break;
} }
bs = backing_bs(bs); bs = bdrv_cow_bs(bs);
if (bs == NULL) { if (bs == NULL) {
ret = 0; ret = 0;
break; break;
@ -3425,6 +3429,7 @@ static int img_rebase(int argc, char **argv)
uint8_t *buf_old = NULL; uint8_t *buf_old = NULL;
uint8_t *buf_new = NULL; uint8_t *buf_new = NULL;
BlockDriverState *bs = NULL, *prefix_chain_bs = NULL; BlockDriverState *bs = NULL, *prefix_chain_bs = NULL;
BlockDriverState *unfiltered_bs;
char *filename; char *filename;
const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg; const char *fmt, *cache, *src_cache, *out_basefmt, *out_baseimg;
int c, flags, src_flags, ret; int c, flags, src_flags, ret;
@ -3559,6 +3564,8 @@ static int img_rebase(int argc, char **argv)
} }
bs = blk_bs(blk); bs = blk_bs(blk);
unfiltered_bs = bdrv_skip_filters(bs);
if (out_basefmt != NULL) { if (out_basefmt != NULL) {
if (bdrv_find_format(out_basefmt) == NULL) { if (bdrv_find_format(out_basefmt) == NULL) {
error_report("Invalid format name: '%s'", out_basefmt); error_report("Invalid format name: '%s'", out_basefmt);
@ -3570,7 +3577,7 @@ static int img_rebase(int argc, char **argv)
/* For safe rebasing we need to compare old and new backing file */ /* For safe rebasing we need to compare old and new backing file */
if (!unsafe) { if (!unsafe) {
QDict *options = NULL; QDict *options = NULL;
BlockDriverState *base_bs = backing_bs(bs); BlockDriverState *base_bs = bdrv_cow_bs(unfiltered_bs);
if (base_bs) { if (base_bs) {
blk_old_backing = blk_new(qemu_get_aio_context(), blk_old_backing = blk_new(qemu_get_aio_context(),
@ -3711,7 +3718,7 @@ static int img_rebase(int argc, char **argv)
n = MIN(IO_BUF_SIZE, size - offset); n = MIN(IO_BUF_SIZE, size - offset);
/* If the cluster is allocated, we don't need to take action */ /* If the cluster is allocated, we don't need to take action */
ret = bdrv_is_allocated(bs, offset, n, &n); ret = bdrv_is_allocated(unfiltered_bs, offset, n, &n);
if (ret < 0) { if (ret < 0) {
error_report("error while reading image metadata: %s", error_report("error while reading image metadata: %s",
strerror(-ret)); strerror(-ret));
@ -3726,8 +3733,9 @@ static int img_rebase(int argc, char **argv)
* If cluster wasn't changed since prefix_chain, we don't need * If cluster wasn't changed since prefix_chain, we don't need
* to take action * to take action
*/ */
ret = bdrv_is_allocated_above(backing_bs(bs), prefix_chain_bs, ret = bdrv_is_allocated_above(bdrv_cow_bs(unfiltered_bs),
false, offset, n, &n); prefix_chain_bs, false,
offset, n, &n);
if (ret < 0) { if (ret < 0) {
error_report("error while reading image metadata: %s", error_report("error while reading image metadata: %s",
strerror(-ret)); strerror(-ret));
@ -3805,9 +3813,10 @@ static int img_rebase(int argc, char **argv)
* doesn't change when we switch the backing file. * doesn't change when we switch the backing file.
*/ */
if (out_baseimg && *out_baseimg) { if (out_baseimg && *out_baseimg) {
ret = bdrv_change_backing_file(bs, out_baseimg, out_basefmt, true); ret = bdrv_change_backing_file(unfiltered_bs, out_baseimg, out_basefmt,
true);
} else { } else {
ret = bdrv_change_backing_file(bs, NULL, NULL, false); ret = bdrv_change_backing_file(unfiltered_bs, NULL, NULL, false);
} }
if (ret == -ENOSPC) { if (ret == -ENOSPC) {

View File

@ -31,6 +31,11 @@ _cleanup()
_cleanup_test_img _cleanup_test_img
_rm_test_img "$TEST_IMG.base" _rm_test_img "$TEST_IMG.base"
_rm_test_img "$TEST_IMG.orig" _rm_test_img "$TEST_IMG.orig"
_rm_test_img "$TEST_DIR/subdir/t.$IMGFMT.base"
_rm_test_img "$TEST_DIR/subdir/t.$IMGFMT.mid"
_rm_test_img "$TEST_DIR/subdir/t.$IMGFMT"
rmdir "$TEST_DIR/subdir" &> /dev/null
} }
trap "_cleanup; exit \$status" 0 1 2 3 15 trap "_cleanup; exit \$status" 0 1 2 3 15
@ -139,6 +144,45 @@ $QEMU_IO -c 'writev 0 64k' "$TEST_IMG" | _filter_qemu_io
$QEMU_IMG commit "$TEST_IMG" $QEMU_IMG commit "$TEST_IMG"
_cleanup _cleanup
echo
echo 'Testing commit in sub-directory with relative filenames'
echo
pushd "$TEST_DIR" > /dev/null
mkdir subdir
TEST_IMG="subdir/t.$IMGFMT.base" _make_test_img 1M
TEST_IMG="subdir/t.$IMGFMT.mid" _make_test_img -b "t.$IMGFMT.base" -F $IMGFMT
TEST_IMG="subdir/t.$IMGFMT" _make_test_img -b "t.$IMGFMT.mid" -F $IMGFMT
# Should work
$QEMU_IMG commit -b "t.$IMGFMT.mid" "subdir/t.$IMGFMT"
# Might theoretically work, but does not in practice (we have to
# decide between this and the above; and since we always represent
# backing file names as relative to the overlay, we go for the above)
$QEMU_IMG commit -b "subdir/t.$IMGFMT.mid" "subdir/t.$IMGFMT" 2>&1 | \
_filter_imgfmt
# This should work as well
$QEMU_IMG commit -b "$TEST_DIR/subdir/t.$IMGFMT.mid" "subdir/t.$IMGFMT"
popd > /dev/null
# Now let's try with just absolute filenames
# (This will not work with external data files, though, because when
# using relative paths for those, qemu will always resolve them
# relative to its CWD. Therefore, it cannot find those data files now
# that we left $TEST_DIR.)
if _get_data_file '' > /dev/null; then
echo 'Image committed.' # Skip test
else
$QEMU_IMG commit -b "$TEST_DIR/subdir/t.$IMGFMT.mid" \
"$TEST_DIR/subdir/t.$IMGFMT"
fi
# success, all done # success, all done
echo "*** done" echo "*** done"
rm -f $seq.full rm -f $seq.full

View File

@ -1083,4 +1083,14 @@ Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=1048576 backing_file=json:{'driv
wrote 65536/65536 bytes at offset 0 wrote 65536/65536 bytes at offset 0
64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec) 64 KiB, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
qemu-img: Block job failed: No space left on device qemu-img: Block job failed: No space left on device
Testing commit in sub-directory with relative filenames
Formatting 'subdir/t.IMGFMT.base', fmt=IMGFMT size=1048576
Formatting 'subdir/t.IMGFMT.mid', fmt=IMGFMT size=1048576 backing_file=t.IMGFMT.base backing_fmt=IMGFMT
Formatting 'subdir/t.IMGFMT', fmt=IMGFMT size=1048576 backing_file=t.IMGFMT.mid backing_fmt=IMGFMT
Image committed.
qemu-img: Did not find 'subdir/t.IMGFMT.mid' in the backing chain of 'subdir/t.IMGFMT'
Image committed.
Image committed.
*** done *** done

View File

@ -734,6 +734,244 @@ class TestErrorHandling(iotests.QMPTestCase):
self.assertTrue(iotests.compare_images(mid_img, backing_img, fmt2='raw'), self.assertTrue(iotests.compare_images(mid_img, backing_img, fmt2='raw'),
'target image does not match source after commit') 'target image does not match source after commit')
class TestCommitWithFilters(iotests.QMPTestCase):
img0 = os.path.join(iotests.test_dir, '0.img')
img1 = os.path.join(iotests.test_dir, '1.img')
img2 = os.path.join(iotests.test_dir, '2.img')
img3 = os.path.join(iotests.test_dir, '3.img')
def do_test_io(self, read_or_write):
for index, pattern_file in enumerate(self.pattern_files):
result = qemu_io('-f', iotests.imgfmt,
'-c',
f'{read_or_write} -P {index + 1} {index}M 1M',
pattern_file)
self.assertFalse('Pattern verification failed' in result)
def setUp(self):
qemu_img('create', '-f', iotests.imgfmt, self.img0, '64M')
qemu_img('create', '-f', iotests.imgfmt, self.img1, '64M')
qemu_img('create', '-f', iotests.imgfmt, self.img2, '64M')
qemu_img('create', '-f', iotests.imgfmt, self.img3, '64M')
# Distributions of the patterns in the files; this is checked
# by tearDown() and should be changed by the test cases as is
# necessary
self.pattern_files = [self.img0, self.img1, self.img2, self.img3]
self.do_test_io('write')
self.vm = iotests.VM().add_device('virtio-scsi,id=vio-scsi')
self.vm.launch()
result = self.vm.qmp('object-add', qom_type='throttle-group', id='tg')
self.assert_qmp(result, 'return', {})
result = self.vm.qmp('blockdev-add', **{
'node-name': 'top-filter',
'driver': 'throttle',
'throttle-group': 'tg',
'file': {
'node-name': 'cow-3',
'driver': iotests.imgfmt,
'file': {
'driver': 'file',
'filename': self.img3
},
'backing': {
'node-name': 'cow-2',
'driver': iotests.imgfmt,
'file': {
'driver': 'file',
'filename': self.img2
},
'backing': {
'node-name': 'cow-1',
'driver': iotests.imgfmt,
'file': {
'driver': 'file',
'filename': self.img1
},
'backing': {
'node-name': 'bottom-filter',
'driver': 'throttle',
'throttle-group': 'tg',
'file': {
'node-name': 'cow-0',
'driver': iotests.imgfmt,
'file': {
'driver': 'file',
'filename': self.img0
}
}
}
}
}
}
})
self.assert_qmp(result, 'return', {})
def tearDown(self):
self.vm.shutdown()
self.do_test_io('read')
os.remove(self.img3)
os.remove(self.img2)
os.remove(self.img1)
os.remove(self.img0)
# Filters make for funny filenames, so we cannot just use
# self.imgX to get them
def get_filename(self, node):
return self.vm.node_info(node)['image']['filename']
def test_filterless_commit(self):
result = self.vm.qmp('block-commit',
job_id='commit',
device='top-filter',
top_node='cow-2',
base_node='cow-1')
self.assert_qmp(result, 'return', {})
self.wait_until_completed(drive='commit')
self.assertIsNotNone(self.vm.node_info('cow-3'))
self.assertIsNone(self.vm.node_info('cow-2'))
self.assertIsNotNone(self.vm.node_info('cow-1'))
# 2 has been comitted into 1
self.pattern_files[2] = self.img1
def test_commit_through_filter(self):
result = self.vm.qmp('block-commit',
job_id='commit',
device='top-filter',
top_node='cow-1',
base_node='cow-0')
self.assert_qmp(result, 'return', {})
self.wait_until_completed(drive='commit')
self.assertIsNotNone(self.vm.node_info('cow-2'))
self.assertIsNone(self.vm.node_info('cow-1'))
self.assertIsNone(self.vm.node_info('bottom-filter'))
self.assertIsNotNone(self.vm.node_info('cow-0'))
# 1 has been comitted into 0
self.pattern_files[1] = self.img0
def test_filtered_active_commit_with_filter(self):
# Add a device, so the commit job finds a parent it can change
# to point to the base node (so we can test that top-filter is
# dropped from the graph)
result = self.vm.qmp('device_add', id='drv0', driver='scsi-hd',
bus='vio-scsi.0', drive='top-filter')
self.assert_qmp(result, 'return', {})
# Try to release our reference to top-filter; that should not
# work because drv0 uses it
result = self.vm.qmp('blockdev-del', node_name='top-filter')
self.assert_qmp(result, 'error/class', 'GenericError')
self.assert_qmp(result, 'error/desc', 'Node top-filter is in use')
result = self.vm.qmp('block-commit',
job_id='commit',
device='top-filter',
base_node='cow-2')
self.assert_qmp(result, 'return', {})
self.complete_and_wait(drive='commit')
# Try to release our reference to top-filter again
result = self.vm.qmp('blockdev-del', node_name='top-filter')
self.assert_qmp(result, 'return', {})
self.assertIsNone(self.vm.node_info('top-filter'))
self.assertIsNone(self.vm.node_info('cow-3'))
self.assertIsNotNone(self.vm.node_info('cow-2'))
# Check that drv0 is now connected to cow-2
blockdevs = self.vm.qmp('query-block')['return']
drv0 = next(dev for dev in blockdevs if dev['qdev'] == 'drv0')
self.assertEqual(drv0['inserted']['node-name'], 'cow-2')
# 3 has been comitted into 2
self.pattern_files[3] = self.img2
def test_filtered_active_commit_without_filter(self):
result = self.vm.qmp('block-commit',
job_id='commit',
device='top-filter',
top_node='cow-3',
base_node='cow-2')
self.assert_qmp(result, 'return', {})
self.complete_and_wait(drive='commit')
self.assertIsNotNone(self.vm.node_info('top-filter'))
self.assertIsNone(self.vm.node_info('cow-3'))
self.assertIsNotNone(self.vm.node_info('cow-2'))
# 3 has been comitted into 2
self.pattern_files[3] = self.img2
class TestCommitWithOverriddenBacking(iotests.QMPTestCase):
img_base_a = os.path.join(iotests.test_dir, 'base_a.img')
img_base_b = os.path.join(iotests.test_dir, 'base_b.img')
img_top = os.path.join(iotests.test_dir, 'top.img')
def setUp(self):
qemu_img('create', '-f', iotests.imgfmt, self.img_base_a, '1M')
qemu_img('create', '-f', iotests.imgfmt, self.img_base_b, '1M')
qemu_img('create', '-f', iotests.imgfmt, '-b', self.img_base_a, \
self.img_top)
self.vm = iotests.VM()
self.vm.launch()
# Use base_b instead of base_a as the backing of top
result = self.vm.qmp('blockdev-add', **{
'node-name': 'top',
'driver': iotests.imgfmt,
'file': {
'driver': 'file',
'filename': self.img_top
},
'backing': {
'node-name': 'base',
'driver': iotests.imgfmt,
'file': {
'driver': 'file',
'filename': self.img_base_b
}
}
})
self.assert_qmp(result, 'return', {})
def tearDown(self):
self.vm.shutdown()
os.remove(self.img_top)
os.remove(self.img_base_a)
os.remove(self.img_base_b)
def test_commit_to_a(self):
# Try committing to base_a (which should fail, as top's
# backing image is base_b instead)
result = self.vm.qmp('block-commit',
job_id='commit',
device='top',
base=self.img_base_a)
self.assert_qmp(result, 'error/class', 'GenericError')
def test_commit_to_b(self):
# Try committing to base_b (which should work, since that is
# actually top's backing image)
result = self.vm.qmp('block-commit',
job_id='commit',
device='top',
base=self.img_base_b)
self.assert_qmp(result, 'return', {})
self.vm.event_wait('BLOCK_JOB_READY')
self.vm.qmp('block-job-complete', device='commit')
self.vm.event_wait('BLOCK_JOB_COMPLETED')
if __name__ == '__main__': if __name__ == '__main__':
iotests.main(supported_fmts=['qcow2', 'qed'], iotests.main(supported_fmts=['qcow2', 'qed'],
supported_protocols=['file']) supported_protocols=['file'])

View File

@ -1,5 +1,5 @@
........................................................... .................................................................
---------------------------------------------------------------------- ----------------------------------------------------------------------
Ran 59 tests Ran 65 tests
OK OK

View File

@ -21,8 +21,9 @@
import time import time
import os import os
import re import re
import json
import iotests import iotests
from iotests import qemu_img, qemu_io from iotests import qemu_img, qemu_img_pipe, qemu_io
backing_img = os.path.join(iotests.test_dir, 'backing.img') backing_img = os.path.join(iotests.test_dir, 'backing.img')
target_backing_img = os.path.join(iotests.test_dir, 'target-backing.img') target_backing_img = os.path.join(iotests.test_dir, 'target-backing.img')
@ -1288,6 +1289,149 @@ class TestReplaces(iotests.QMPTestCase):
self.vm.assert_block_path('filter0', '/file', 'target') self.vm.assert_block_path('filter0', '/file', 'target')
# Tests for mirror with filters (and how the mirror filter behaves, as
# an example for an implicit filter)
class TestFilters(iotests.QMPTestCase):
def setUp(self):
qemu_img('create', '-f', iotests.imgfmt, backing_img, '1M')
qemu_img('create', '-f', iotests.imgfmt, '-b', backing_img, test_img)
qemu_img('create', '-f', iotests.imgfmt, '-b', backing_img, target_img)
qemu_io('-c', 'write -P 1 0 512k', backing_img)
qemu_io('-c', 'write -P 2 512k 512k', test_img)
self.vm = iotests.VM().add_device('virtio-scsi,id=vio-scsi')
self.vm.launch()
result = self.vm.qmp('blockdev-add', **{
'node-name': 'target',
'driver': iotests.imgfmt,
'file': {
'driver': 'file',
'filename': target_img
},
'backing': None
})
self.assert_qmp(result, 'return', {})
self.filterless_chain = {
'node-name': 'source',
'driver': iotests.imgfmt,
'file': {
'driver': 'file',
'filename': test_img
},
'backing': {
'node-name': 'backing',
'driver': iotests.imgfmt,
'file': {
'driver': 'file',
'filename': backing_img
}
}
}
def tearDown(self):
self.vm.shutdown()
os.remove(test_img)
os.remove(target_img)
os.remove(backing_img)
def test_cor(self):
result = self.vm.qmp('blockdev-add', **{
'node-name': 'filter',
'driver': 'copy-on-read',
'file': self.filterless_chain
})
self.assert_qmp(result, 'return', {})
result = self.vm.qmp('blockdev-mirror',
job_id='mirror',
device='filter',
target='target',
sync='top')
self.assert_qmp(result, 'return', {})
self.complete_and_wait('mirror')
self.vm.qmp('blockdev-del', node_name='target')
target_map = qemu_img_pipe('map', '--output=json', target_img)
target_map = json.loads(target_map)
assert target_map[0]['start'] == 0
assert target_map[0]['length'] == 512 * 1024
assert target_map[0]['depth'] == 1
assert target_map[1]['start'] == 512 * 1024
assert target_map[1]['length'] == 512 * 1024
assert target_map[1]['depth'] == 0
def test_implicit_mirror_filter(self):
result = self.vm.qmp('blockdev-add', **self.filterless_chain)
self.assert_qmp(result, 'return', {})
# We need this so we can query from above the mirror node
result = self.vm.qmp('device_add',
driver='scsi-hd',
id='virtio',
bus='vio-scsi.0',
drive='source')
self.assert_qmp(result, 'return', {})
result = self.vm.qmp('blockdev-mirror',
job_id='mirror',
device='source',
target='target',
sync='top')
self.assert_qmp(result, 'return', {})
# The mirror filter is now an implicit node, so it should be
# invisible when querying the backing chain
blockdevs = self.vm.qmp('query-block')['return']
device_info = next(dev for dev in blockdevs if dev['qdev'] == 'virtio')
assert device_info['inserted']['node-name'] == 'source'
image_info = device_info['inserted']['image']
assert image_info['filename'] == test_img
assert image_info['backing-image']['filename'] == backing_img
self.complete_and_wait('mirror')
def test_explicit_mirror_filter(self):
# Same test as above, but this time we give the mirror filter
# a node-name so it will not be invisible
result = self.vm.qmp('blockdev-add', **self.filterless_chain)
self.assert_qmp(result, 'return', {})
# We need this so we can query from above the mirror node
result = self.vm.qmp('device_add',
driver='scsi-hd',
id='virtio',
bus='vio-scsi.0',
drive='source')
self.assert_qmp(result, 'return', {})
result = self.vm.qmp('blockdev-mirror',
job_id='mirror',
device='source',
target='target',
sync='top',
filter_node_name='mirror-filter')
self.assert_qmp(result, 'return', {})
# With a node-name given to it, the mirror filter should now
# be visible
blockdevs = self.vm.qmp('query-block')['return']
device_info = next(dev for dev in blockdevs if dev['qdev'] == 'virtio')
assert device_info['inserted']['node-name'] == 'mirror-filter'
self.complete_and_wait('mirror')
if __name__ == '__main__': if __name__ == '__main__':
iotests.main(supported_fmts=['qcow2', 'qed'], iotests.main(supported_fmts=['qcow2', 'qed'],
supported_protocols=['file'], supported_protocols=['file'],

View File

@ -1,5 +1,5 @@
........................................................................................................ ...........................................................................................................
---------------------------------------------------------------------- ----------------------------------------------------------------------
Ran 104 tests Ran 107 tests
OK OK

View File

@ -119,6 +119,10 @@ test_qemu_img create -f $IMGFMT -o compat=1.1,lazy_refcounts=on "$TEST_IMG" 64M
test_qemu_img create -f $IMGFMT -o compat=0.10,lazy_refcounts=off "$TEST_IMG" 64M test_qemu_img create -f $IMGFMT -o compat=0.10,lazy_refcounts=off "$TEST_IMG" 64M
test_qemu_img create -f $IMGFMT -o compat=0.10,lazy_refcounts=on "$TEST_IMG" 64M test_qemu_img create -f $IMGFMT -o compat=0.10,lazy_refcounts=on "$TEST_IMG" 64M
echo "== Expect error when backing file name is empty string =="
echo
test_qemu_img create -f $IMGFMT -b '' $TEST_IMG 1M
# success, all done # success, all done
echo "*** done" echo "*** done"
rm -f $seq.full rm -f $seq.full

View File

@ -209,4 +209,9 @@ qemu-img create -f qcow2 -o compat=0.10,lazy_refcounts=on TEST_DIR/t.qcow2 64M
Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=0.10 lazy_refcounts=on refcount_bits=16 Formatting 'TEST_DIR/t.qcow2', fmt=qcow2 cluster_size=65536 extended_l2=off compression_type=zlib size=67108864 compat=0.10 lazy_refcounts=on refcount_bits=16
qemu-img: TEST_DIR/t.qcow2: Lazy refcounts only supported with compatibility level 1.1 and above (use version=v3 or greater) qemu-img: TEST_DIR/t.qcow2: Lazy refcounts only supported with compatibility level 1.1 and above (use version=v3 or greater)
== Expect error when backing file name is empty string ==
qemu-img create -f qcow2 -b TEST_DIR/t.qcow2 1M
qemu-img: TEST_DIR/t.qcow2: Expected backing file name, got empty string
*** done *** done

View File

@ -464,7 +464,7 @@ No conflict:
image: null-co:// image: null-co://
file format: null-co file format: null-co
virtual size: 1 GiB (1073741824 bytes) virtual size: 1 GiB (1073741824 bytes)
disk size: unavailable disk size: 0 B
Conflict: Conflict:
qemu-img: --force-share/-U conflicts with image options qemu-img: --force-share/-U conflicts with image options

View File

@ -45,8 +45,7 @@ do_run_qemu()
run_qemu() run_qemu()
{ {
do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qemu | _filter_qmp\ do_run_qemu "$@" 2>&1 | _filter_testdir | _filter_qemu | _filter_qmp\
| _filter_qemu_io | _filter_generated_node_ids \ | _filter_qemu_io | _filter_generated_node_ids
| _filter_actual_image_size
} }
test_throttle=$($QEMU_IMG --help|grep throttle) test_throttle=$($QEMU_IMG --help|grep throttle)

View File

@ -27,14 +27,21 @@ Testing:
"iops_rd": 0, "iops_rd": 0,
"detect_zeroes": "off", "detect_zeroes": "off",
"image": { "image": {
"backing-image": {
"virtual-size": 1073741824,
"filename": "null-co://",
"format": "null-co",
"actual-size": 0
},
"virtual-size": 1073741824, "virtual-size": 1073741824,
"filename": "json:{\"throttle-group\": \"group0\", \"driver\": \"throttle\", \"file\": {\"driver\": \"null-co\"}}", "filename": "json:{\"throttle-group\": \"group0\", \"driver\": \"throttle\", \"file\": {\"driver\": \"null-co\"}}",
"format": "throttle" "format": "throttle",
"actual-size": 0
}, },
"iops_wr": 0, "iops_wr": 0,
"ro": false, "ro": false,
"node-name": "throttle0", "node-name": "throttle0",
"backing_file_depth": 0, "backing_file_depth": 1,
"drv": "throttle", "drv": "throttle",
"iops": 0, "iops": 0,
"bps_wr": 0, "bps_wr": 0,
@ -56,7 +63,8 @@ Testing:
"image": { "image": {
"virtual-size": 1073741824, "virtual-size": 1073741824,
"filename": "null-co://", "filename": "null-co://",
"format": "null-co" "format": "null-co",
"actual-size": 0
}, },
"iops_wr": 0, "iops_wr": 0,
"ro": false, "ro": false,

View File

@ -59,5 +59,6 @@ Offset Length File
0x900000 0x2400000 TEST_DIR/t.IMGFMT 0x900000 0x2400000 TEST_DIR/t.IMGFMT
0x3c00000 0x1100000 TEST_DIR/t.IMGFMT 0x3c00000 0x1100000 TEST_DIR/t.IMGFMT
0x6a00000 0x400000 TEST_DIR/t.IMGFMT 0x6a00000 0x400000 TEST_DIR/t.IMGFMT
0x6e00000 0x1200000 TEST_DIR/t.IMGFMT.base
No errors were found on the image. No errors were found on the image.
*** done *** done

View File

@ -36,7 +36,7 @@ def log_node_info(node):
log('bs->filename: ' + node['image']['filename'], log('bs->filename: ' + node['image']['filename'],
filters=[filter_testfiles, filter_imgfmt]) filters=[filter_testfiles, filter_imgfmt])
log('bs->backing_file: ' + node['backing_file'], log('bs->backing_file: ' + node['image']['full-backing-filename'],
filters=[filter_testfiles, filter_imgfmt]) filters=[filter_testfiles, filter_imgfmt])
if 'backing-image' in node['image']: if 'backing-image' in node['image']:
@ -73,8 +73,8 @@ with iotests.FilePath('base.img') as base_img_path, \
}, },
filters=[filter_qmp_testfiles, filter_qmp_imgfmt]) filters=[filter_qmp_testfiles, filter_qmp_imgfmt])
# Filename should be plain, and the backing filename should not # Filename should be plain, and the backing node filename should
# contain the "file:" prefix # not contain the "file:" prefix
log_node_info(vm.node_info('node0')) log_node_info(vm.node_info('node0'))
vm.qmp_log('blockdev-del', node_name='node0') vm.qmp_log('blockdev-del', node_name='node0')

View File

@ -4,7 +4,7 @@
{"return": {}} {"return": {}}
bs->filename: TEST_DIR/PID-top.img bs->filename: TEST_DIR/PID-top.img
bs->backing_file: TEST_DIR/PID-base.img bs->backing_file: file:TEST_DIR/PID-base.img
bs->backing->bs->filename: TEST_DIR/PID-base.img bs->backing->bs->filename: TEST_DIR/PID-base.img
{"execute": "blockdev-del", "arguments": {"node-name": "node0"}} {"execute": "blockdev-del", "arguments": {"node-name": "node0"}}
@ -41,7 +41,7 @@ bs->backing->bs->filename: TEST_DIR/PID-base.img
{"return": {}} {"return": {}}
bs->filename: TEST_DIR/PID-top.img bs->filename: TEST_DIR/PID-top.img
bs->backing_file: TEST_DIR/PID-base.img bs->backing_file: file:TEST_DIR/PID-base.img
bs->backing->bs->filename: TEST_DIR/PID-base.img bs->backing->bs->filename: TEST_DIR/PID-base.img
{"execute": "blockdev-del", "arguments": {"node-name": "node0"}} {"execute": "blockdev-del", "arguments": {"node-name": "node0"}}
@ -55,7 +55,7 @@ bs->backing->bs->filename: TEST_DIR/PID-base.img
{"return": {}} {"return": {}}
bs->filename: json:{"backing": {"driver": "null-co"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top.img"}} bs->filename: json:{"backing": {"driver": "null-co"}, "driver": "IMGFMT", "file": {"driver": "file", "filename": "TEST_DIR/PID-top.img"}}
bs->backing_file: null-co:// bs->backing_file: TEST_DIR/PID-base.img
bs->backing->bs->filename: null-co:// bs->backing->bs->filename: null-co://
{"execute": "blockdev-del", "arguments": {"node-name": "node0"}} {"execute": "blockdev-del", "arguments": {"node-name": "node0"}}

View File

@ -217,6 +217,55 @@ $QEMU_IMG amend -f $IMGFMT -o "data_file=blkdebug::$TEST_IMG.data" "$TEST_IMG"
$QEMU_IMG convert -f $IMGFMT -O $IMGFMT -n -C "$TEST_IMG.src" "$TEST_IMG" $QEMU_IMG convert -f $IMGFMT -O $IMGFMT -n -C "$TEST_IMG.src" "$TEST_IMG"
$QEMU_IMG compare -f $IMGFMT -F $IMGFMT "$TEST_IMG.src" "$TEST_IMG" $QEMU_IMG compare -f $IMGFMT -F $IMGFMT "$TEST_IMG.src" "$TEST_IMG"
echo
echo "=== Flushing should flush the data file ==="
echo
# We are going to flush a qcow2 file with a blkdebug node inserted
# between the qcow2 node and its data file node. The blkdebug node
# will return an error for all flushes and so we if the data file is
# flushed, we will see qemu-io return an error.
# We need to write something or the flush will not do anything; we
# also need -t writeback so the write is not done as a FUA write
# (which would then fail thanks to the implicit flush)
$QEMU_IO -c 'write 0 512' -c flush \
-t writeback \
"json:{
'driver': 'qcow2',
'file': {
'driver': 'file',
'filename': '$TEST_IMG'
},
'data-file': {
'driver': 'blkdebug',
'inject-error': [{
'event': 'none',
'iotype': 'flush'
}],
'image': {
'driver': 'file',
'filename': '$TEST_IMG.data'
}
}
}" \
| _filter_qemu_io
result=${PIPESTATUS[0]}
echo
case $result in
0)
echo "ERROR: qemu-io succeeded, so the data file was not flushed"
;;
1)
echo "Success: qemu-io failed, so the data file was flushed"
;;
*)
echo "ERROR: qemu-io returned unknown exit code $result"
;;
esac
# success, all done # success, all done
echo "*** done" echo "*** done"
rm -f $seq.full rm -f $seq.full

View File

@ -131,4 +131,11 @@ Offset Length Mapped to File
Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 data_file=TEST_DIR/t.IMGFMT.data Formatting 'TEST_DIR/t.IMGFMT', fmt=IMGFMT size=67108864 data_file=TEST_DIR/t.IMGFMT.data
Images are identical. Images are identical.
Images are identical. Images are identical.
=== Flushing should flush the data file ===
wrote 512/512 bytes at offset 0
512 bytes, X ops; XX:XX:XX.X (XXX YYY/sec and XXX ops/sec)
Success: qemu-io failed, so the data file was flushed
*** done *** done

View File

@ -725,7 +725,9 @@ class TestBlockdevReopen(iotests.QMPTestCase):
# Detach hd2 from hd0. # Detach hd2 from hd0.
self.reopen(opts, {'backing': None}) self.reopen(opts, {'backing': None})
self.reopen(opts, {}, "backing is missing for 'hd0'")
# Without a backing file, we can omit 'backing' again
self.reopen(opts)
# Remove both hd0 and hd2 # Remove both hd0 and hd2
result = self.vm.qmp('blockdev-del', conv_keys = True, node_name = 'hd0') result = self.vm.qmp('blockdev-del', conv_keys = True, node_name = 'hd0')

View File

@ -32,7 +32,7 @@ Testing: -blockdev file,node-name=base,filename=TEST_DIR/t.IMGFMT.base -blockdev
"actual-size": SIZE, "actual-size": SIZE,
"dirty-flag": false "dirty-flag": false
}, },
"backing-filename-format": "file", "backing-filename-format": "IMGFMT",
"virtual-size": 67108864, "virtual-size": 67108864,
"filename": "TEST_DIR/t.IMGFMT.mid", "filename": "TEST_DIR/t.IMGFMT.mid",
"cluster-size": 65536, "cluster-size": 65536,
@ -112,7 +112,7 @@ Testing: -blockdev file,node-name=base,filename=TEST_DIR/t.IMGFMT.base -blockdev
"actual-size": SIZE, "actual-size": SIZE,
"dirty-flag": false "dirty-flag": false
}, },
"backing-filename-format": "file", "backing-filename-format": "IMGFMT",
"virtual-size": 67108864, "virtual-size": 67108864,
"filename": "TEST_DIR/t.IMGFMT.mid", "filename": "TEST_DIR/t.IMGFMT.mid",
"cluster-size": 65536, "cluster-size": 65536,

View File

@ -44,7 +44,7 @@ then
_init_error "failed to obtain source tree name from check symlink" _init_error "failed to obtain source tree name from check symlink"
fi fi
source_iotests=$(cd "$source_iotests"; pwd) || _init_error "failed to enter source tree" source_iotests=$(cd "$source_iotests"; pwd) || _init_error "failed to enter source tree"
build_iotests=$PWD build_iotests=$(readlink -f $(dirname "$0"))
else else
# called from the source tree # called from the source tree
source_iotests=$PWD source_iotests=$PWD

View File

@ -972,8 +972,12 @@ class QMPTestCase(unittest.TestCase):
def wait_ready(self, drive='drive0'): def wait_ready(self, drive='drive0'):
"""Wait until a BLOCK_JOB_READY event, and return the event.""" """Wait until a BLOCK_JOB_READY event, and return the event."""
f = {'data': {'type': 'mirror', 'device': drive}} return self.vm.events_wait([
return self.vm.event_wait(name='BLOCK_JOB_READY', match=f) ('BLOCK_JOB_READY',
{'data': {'type': 'mirror', 'device': drive}}),
('BLOCK_JOB_READY',
{'data': {'type': 'commit', 'device': drive}})
])
def wait_ready_and_cancel(self, drive='drive0'): def wait_ready_and_cancel(self, drive='drive0'):
self.wait_ready(drive=drive) self.wait_ready(drive=drive)
@ -992,7 +996,7 @@ class QMPTestCase(unittest.TestCase):
self.assert_qmp(result, 'return', {}) self.assert_qmp(result, 'return', {})
event = self.wait_until_completed(drive=drive, error=completion_error) event = self.wait_until_completed(drive=drive, error=completion_error)
self.assert_qmp(event, 'data/type', 'mirror') self.assertTrue(event['data']['type'] in ['mirror', 'commit'])
def pause_wait(self, job_id='job0'): def pause_wait(self, job_id='job0'):
with Timeout(3, "Timeout waiting for job to pause"): with Timeout(3, "Timeout waiting for job to pause"):