0fe282bb4b
Now that we're checking for duplicates in the reopen queue, there's no
need to force a specific order in which the queue is constructed so we
can revert 3db2bd5508
.
Since both ways of constructing the queue are now valid, this patch
doesn't have any effect on the behavior of QEMU and is not strictly
necessary. However it can help us check that the fix for the reopen
queue is robust: if it stops working properly at some point, iotest
040 will break.
Signed-off-by: Alberto Garcia <berto@igalia.com>
Reviewed-by: Kevin Wolf <kwolf@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
403 lines
11 KiB
C
403 lines
11 KiB
C
/*
|
|
* Live block commit
|
|
*
|
|
* Copyright Red Hat, Inc. 2012
|
|
*
|
|
* Authors:
|
|
* Jeff Cody <jcody@redhat.com>
|
|
* Based on stream.c by Stefan Hajnoczi
|
|
*
|
|
* This work is licensed under the terms of the GNU LGPL, version 2 or later.
|
|
* See the COPYING.LIB file in the top-level directory.
|
|
*
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
#include "trace.h"
|
|
#include "block/block_int.h"
|
|
#include "block/blockjob.h"
|
|
#include "qapi/error.h"
|
|
#include "qapi/qmp/qerror.h"
|
|
#include "qemu/ratelimit.h"
|
|
#include "sysemu/block-backend.h"
|
|
|
|
enum {
|
|
/*
|
|
* Size of data buffer for populating the image file. This should be large
|
|
* enough to process multiple clusters in a single call, so that populating
|
|
* contiguous regions of the image is efficient.
|
|
*/
|
|
COMMIT_BUFFER_SIZE = 512 * 1024, /* in bytes */
|
|
};
|
|
|
|
#define SLICE_TIME 100000000ULL /* ns */
|
|
|
|
typedef struct CommitBlockJob {
|
|
BlockJob common;
|
|
RateLimit limit;
|
|
BlockDriverState *active;
|
|
BlockBackend *top;
|
|
BlockBackend *base;
|
|
BlockdevOnError on_error;
|
|
int base_flags;
|
|
int orig_overlay_flags;
|
|
char *backing_file_str;
|
|
} CommitBlockJob;
|
|
|
|
static int coroutine_fn commit_populate(BlockBackend *bs, BlockBackend *base,
|
|
int64_t sector_num, int nb_sectors,
|
|
void *buf)
|
|
{
|
|
int ret = 0;
|
|
QEMUIOVector qiov;
|
|
struct iovec iov = {
|
|
.iov_base = buf,
|
|
.iov_len = nb_sectors * BDRV_SECTOR_SIZE,
|
|
};
|
|
|
|
qemu_iovec_init_external(&qiov, &iov, 1);
|
|
|
|
ret = blk_co_preadv(bs, sector_num * BDRV_SECTOR_SIZE,
|
|
qiov.size, &qiov, 0);
|
|
if (ret < 0) {
|
|
return ret;
|
|
}
|
|
|
|
ret = blk_co_pwritev(base, sector_num * BDRV_SECTOR_SIZE,
|
|
qiov.size, &qiov, 0);
|
|
if (ret < 0) {
|
|
return ret;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
typedef struct {
|
|
int ret;
|
|
} CommitCompleteData;
|
|
|
|
static void commit_complete(BlockJob *job, void *opaque)
|
|
{
|
|
CommitBlockJob *s = container_of(job, CommitBlockJob, common);
|
|
CommitCompleteData *data = opaque;
|
|
BlockDriverState *active = s->active;
|
|
BlockDriverState *top = blk_bs(s->top);
|
|
BlockDriverState *base = blk_bs(s->base);
|
|
BlockDriverState *overlay_bs = bdrv_find_overlay(active, top);
|
|
int ret = data->ret;
|
|
|
|
if (!block_job_is_cancelled(&s->common) && ret == 0) {
|
|
/* success */
|
|
ret = bdrv_drop_intermediate(active, top, base, s->backing_file_str);
|
|
}
|
|
|
|
/* restore base open flags here if appropriate (e.g., change the base back
|
|
* to r/o). These reopens do not need to be atomic, since we won't abort
|
|
* even on failure here */
|
|
if (s->base_flags != bdrv_get_flags(base)) {
|
|
bdrv_reopen(base, s->base_flags, NULL);
|
|
}
|
|
if (overlay_bs && s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) {
|
|
bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL);
|
|
}
|
|
g_free(s->backing_file_str);
|
|
blk_unref(s->top);
|
|
blk_unref(s->base);
|
|
block_job_completed(&s->common, ret);
|
|
g_free(data);
|
|
}
|
|
|
|
static void coroutine_fn commit_run(void *opaque)
|
|
{
|
|
CommitBlockJob *s = opaque;
|
|
CommitCompleteData *data;
|
|
int64_t sector_num, end;
|
|
uint64_t delay_ns = 0;
|
|
int ret = 0;
|
|
int n = 0;
|
|
void *buf = NULL;
|
|
int bytes_written = 0;
|
|
int64_t base_len;
|
|
|
|
ret = s->common.len = blk_getlength(s->top);
|
|
|
|
|
|
if (s->common.len < 0) {
|
|
goto out;
|
|
}
|
|
|
|
ret = base_len = blk_getlength(s->base);
|
|
if (base_len < 0) {
|
|
goto out;
|
|
}
|
|
|
|
if (base_len < s->common.len) {
|
|
ret = blk_truncate(s->base, s->common.len);
|
|
if (ret) {
|
|
goto out;
|
|
}
|
|
}
|
|
|
|
end = s->common.len >> BDRV_SECTOR_BITS;
|
|
buf = blk_blockalign(s->top, COMMIT_BUFFER_SIZE);
|
|
|
|
for (sector_num = 0; sector_num < end; sector_num += n) {
|
|
bool copy;
|
|
|
|
/* Note that even when no rate limit is applied we need to yield
|
|
* with no pending I/O here so that bdrv_drain_all() returns.
|
|
*/
|
|
block_job_sleep_ns(&s->common, QEMU_CLOCK_REALTIME, delay_ns);
|
|
if (block_job_is_cancelled(&s->common)) {
|
|
break;
|
|
}
|
|
/* Copy if allocated above the base */
|
|
ret = bdrv_is_allocated_above(blk_bs(s->top), blk_bs(s->base),
|
|
sector_num,
|
|
COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE,
|
|
&n);
|
|
copy = (ret == 1);
|
|
trace_commit_one_iteration(s, sector_num, n, ret);
|
|
if (copy) {
|
|
ret = commit_populate(s->top, s->base, sector_num, n, buf);
|
|
bytes_written += n * BDRV_SECTOR_SIZE;
|
|
}
|
|
if (ret < 0) {
|
|
BlockErrorAction action =
|
|
block_job_error_action(&s->common, false, s->on_error, -ret);
|
|
if (action == BLOCK_ERROR_ACTION_REPORT) {
|
|
goto out;
|
|
} else {
|
|
n = 0;
|
|
continue;
|
|
}
|
|
}
|
|
/* Publish progress */
|
|
s->common.offset += n * BDRV_SECTOR_SIZE;
|
|
|
|
if (copy && s->common.speed) {
|
|
delay_ns = ratelimit_calculate_delay(&s->limit, n);
|
|
}
|
|
}
|
|
|
|
ret = 0;
|
|
|
|
out:
|
|
qemu_vfree(buf);
|
|
|
|
data = g_malloc(sizeof(*data));
|
|
data->ret = ret;
|
|
block_job_defer_to_main_loop(&s->common, commit_complete, data);
|
|
}
|
|
|
|
static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp)
|
|
{
|
|
CommitBlockJob *s = container_of(job, CommitBlockJob, common);
|
|
|
|
if (speed < 0) {
|
|
error_setg(errp, QERR_INVALID_PARAMETER, "speed");
|
|
return;
|
|
}
|
|
ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
|
|
}
|
|
|
|
static const BlockJobDriver commit_job_driver = {
|
|
.instance_size = sizeof(CommitBlockJob),
|
|
.job_type = BLOCK_JOB_TYPE_COMMIT,
|
|
.set_speed = commit_set_speed,
|
|
};
|
|
|
|
void commit_start(const char *job_id, BlockDriverState *bs,
|
|
BlockDriverState *base, BlockDriverState *top, int64_t speed,
|
|
BlockdevOnError on_error, BlockCompletionFunc *cb,
|
|
void *opaque, const char *backing_file_str, Error **errp)
|
|
{
|
|
CommitBlockJob *s;
|
|
BlockReopenQueue *reopen_queue = NULL;
|
|
int orig_overlay_flags;
|
|
int orig_base_flags;
|
|
BlockDriverState *overlay_bs;
|
|
Error *local_err = NULL;
|
|
|
|
assert(top != bs);
|
|
if (top == base) {
|
|
error_setg(errp, "Invalid files for merge: top and base are the same");
|
|
return;
|
|
}
|
|
|
|
overlay_bs = bdrv_find_overlay(bs, top);
|
|
|
|
if (overlay_bs == NULL) {
|
|
error_setg(errp, "Could not find overlay image for %s:", top->filename);
|
|
return;
|
|
}
|
|
|
|
s = block_job_create(job_id, &commit_job_driver, bs, speed,
|
|
cb, opaque, errp);
|
|
if (!s) {
|
|
return;
|
|
}
|
|
|
|
orig_base_flags = bdrv_get_flags(base);
|
|
orig_overlay_flags = bdrv_get_flags(overlay_bs);
|
|
|
|
/* convert base & overlay_bs to r/w, if necessary */
|
|
if (!(orig_base_flags & BDRV_O_RDWR)) {
|
|
reopen_queue = bdrv_reopen_queue(reopen_queue, base, NULL,
|
|
orig_base_flags | BDRV_O_RDWR);
|
|
}
|
|
if (!(orig_overlay_flags & BDRV_O_RDWR)) {
|
|
reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs, NULL,
|
|
orig_overlay_flags | BDRV_O_RDWR);
|
|
}
|
|
if (reopen_queue) {
|
|
bdrv_reopen_multiple(reopen_queue, &local_err);
|
|
if (local_err != NULL) {
|
|
error_propagate(errp, local_err);
|
|
block_job_unref(&s->common);
|
|
return;
|
|
}
|
|
}
|
|
|
|
|
|
s->base = blk_new();
|
|
blk_insert_bs(s->base, base);
|
|
|
|
s->top = blk_new();
|
|
blk_insert_bs(s->top, top);
|
|
|
|
s->active = bs;
|
|
|
|
s->base_flags = orig_base_flags;
|
|
s->orig_overlay_flags = orig_overlay_flags;
|
|
|
|
s->backing_file_str = g_strdup(backing_file_str);
|
|
|
|
s->on_error = on_error;
|
|
s->common.co = qemu_coroutine_create(commit_run, s);
|
|
|
|
trace_commit_start(bs, base, top, s, s->common.co, opaque);
|
|
qemu_coroutine_enter(s->common.co);
|
|
}
|
|
|
|
|
|
#define COMMIT_BUF_SECTORS 2048
|
|
|
|
/* commit COW file into the raw image */
|
|
int bdrv_commit(BlockDriverState *bs)
|
|
{
|
|
BlockBackend *src, *backing;
|
|
BlockDriver *drv = bs->drv;
|
|
int64_t sector, total_sectors, length, backing_length;
|
|
int n, ro, open_flags;
|
|
int ret = 0;
|
|
uint8_t *buf = NULL;
|
|
|
|
if (!drv)
|
|
return -ENOMEDIUM;
|
|
|
|
if (!bs->backing) {
|
|
return -ENOTSUP;
|
|
}
|
|
|
|
if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_COMMIT_SOURCE, NULL) ||
|
|
bdrv_op_is_blocked(bs->backing->bs, BLOCK_OP_TYPE_COMMIT_TARGET, NULL)) {
|
|
return -EBUSY;
|
|
}
|
|
|
|
ro = bs->backing->bs->read_only;
|
|
open_flags = bs->backing->bs->open_flags;
|
|
|
|
if (ro) {
|
|
if (bdrv_reopen(bs->backing->bs, open_flags | BDRV_O_RDWR, NULL)) {
|
|
return -EACCES;
|
|
}
|
|
}
|
|
|
|
src = blk_new();
|
|
blk_insert_bs(src, bs);
|
|
|
|
backing = blk_new();
|
|
blk_insert_bs(backing, bs->backing->bs);
|
|
|
|
length = blk_getlength(src);
|
|
if (length < 0) {
|
|
ret = length;
|
|
goto ro_cleanup;
|
|
}
|
|
|
|
backing_length = blk_getlength(backing);
|
|
if (backing_length < 0) {
|
|
ret = backing_length;
|
|
goto ro_cleanup;
|
|
}
|
|
|
|
/* If our top snapshot is larger than the backing file image,
|
|
* grow the backing file image if possible. If not possible,
|
|
* we must return an error */
|
|
if (length > backing_length) {
|
|
ret = blk_truncate(backing, length);
|
|
if (ret < 0) {
|
|
goto ro_cleanup;
|
|
}
|
|
}
|
|
|
|
total_sectors = length >> BDRV_SECTOR_BITS;
|
|
|
|
/* blk_try_blockalign() for src will choose an alignment that works for
|
|
* backing as well, so no need to compare the alignment manually. */
|
|
buf = blk_try_blockalign(src, COMMIT_BUF_SECTORS * BDRV_SECTOR_SIZE);
|
|
if (buf == NULL) {
|
|
ret = -ENOMEM;
|
|
goto ro_cleanup;
|
|
}
|
|
|
|
for (sector = 0; sector < total_sectors; sector += n) {
|
|
ret = bdrv_is_allocated(bs, sector, COMMIT_BUF_SECTORS, &n);
|
|
if (ret < 0) {
|
|
goto ro_cleanup;
|
|
}
|
|
if (ret) {
|
|
ret = blk_pread(src, sector * BDRV_SECTOR_SIZE, buf,
|
|
n * BDRV_SECTOR_SIZE);
|
|
if (ret < 0) {
|
|
goto ro_cleanup;
|
|
}
|
|
|
|
ret = blk_pwrite(backing, sector * BDRV_SECTOR_SIZE, buf,
|
|
n * BDRV_SECTOR_SIZE, 0);
|
|
if (ret < 0) {
|
|
goto ro_cleanup;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (drv->bdrv_make_empty) {
|
|
ret = drv->bdrv_make_empty(bs);
|
|
if (ret < 0) {
|
|
goto ro_cleanup;
|
|
}
|
|
blk_flush(src);
|
|
}
|
|
|
|
/*
|
|
* Make sure all data we wrote to the backing device is actually
|
|
* stable on disk.
|
|
*/
|
|
blk_flush(backing);
|
|
|
|
ret = 0;
|
|
ro_cleanup:
|
|
qemu_vfree(buf);
|
|
|
|
blk_unref(src);
|
|
blk_unref(backing);
|
|
|
|
if (ro) {
|
|
/* ignoring error return here */
|
|
bdrv_reopen(bs->backing->bs, open_flags & ~BDRV_O_RDWR, NULL);
|
|
}
|
|
|
|
return ret;
|
|
}
|