2013-06-24 17:13:11 +02:00
|
|
|
/*
|
|
|
|
* QEMU backup
|
|
|
|
*
|
|
|
|
* Copyright (C) 2013 Proxmox Server Solutions
|
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* Dietmar Maurer (dietmar@proxmox.com)
|
|
|
|
*
|
|
|
|
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
|
|
|
* See the COPYING file in the top-level directory.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
2016-01-18 19:01:42 +01:00
|
|
|
#include "qemu/osdep.h"
|
2013-06-24 17:13:11 +02:00
|
|
|
|
|
|
|
#include "trace.h"
|
|
|
|
#include "block/block.h"
|
|
|
|
#include "block/block_int.h"
|
|
|
|
#include "block/blockjob.h"
|
include/qemu/osdep.h: Don't include qapi/error.h
Commit 57cb38b included qapi/error.h into qemu/osdep.h to get the
Error typedef. Since then, we've moved to include qemu/osdep.h
everywhere. Its file comment explains: "To avoid getting into
possible circular include dependencies, this file should not include
any other QEMU headers, with the exceptions of config-host.h,
compiler.h, os-posix.h and os-win32.h, all of which are doing a
similar job to this file and are under similar constraints."
qapi/error.h doesn't do a similar job, and it doesn't adhere to
similar constraints: it includes qapi-types.h. That's in excess of
100KiB of crap most .c files don't actually need.
Add the typedef to qemu/typedefs.h, and include that instead of
qapi/error.h. Include qapi/error.h in .c files that need it and don't
get it now. Include qapi-types.h in qom/object.h for uint16List.
Update scripts/clean-includes accordingly. Update it further to match
reality: replace config.h by config-target.h, add sysemu/os-posix.h,
sysemu/os-win32.h. Update the list of includes in the qemu/osdep.h
comment quoted above similarly.
This reduces the number of objects depending on qapi/error.h from "all
of them" to less than a third. Unfortunately, the number depending on
qapi-types.h shrinks only a little. More work is needed for that one.
Signed-off-by: Markus Armbruster <armbru@redhat.com>
[Fix compilation without the spice devel packages. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2016-03-14 09:01:28 +01:00
|
|
|
#include "qapi/error.h"
|
2015-03-17 17:22:46 +01:00
|
|
|
#include "qapi/qmp/qerror.h"
|
2013-06-24 17:13:11 +02:00
|
|
|
#include "qemu/ratelimit.h"
|
2016-03-20 18:16:19 +01:00
|
|
|
#include "qemu/cutils.h"
|
2015-10-19 17:53:22 +02:00
|
|
|
#include "sysemu/block-backend.h"
|
2016-03-08 05:44:52 +01:00
|
|
|
#include "qemu/bitmap.h"
|
2013-06-24 17:13:11 +02:00
|
|
|
|
2016-02-25 21:58:29 +01:00
|
|
|
#define BACKUP_CLUSTER_SIZE_DEFAULT (1 << 16)
|
2013-06-24 17:13:11 +02:00
|
|
|
#define SLICE_TIME 100000000ULL /* ns */
|
|
|
|
|
|
|
|
typedef struct CowRequest {
|
|
|
|
int64_t start;
|
|
|
|
int64_t end;
|
|
|
|
QLIST_ENTRY(CowRequest) list;
|
|
|
|
CoQueue wait_queue; /* coroutines blocked on this request */
|
|
|
|
} CowRequest;
|
|
|
|
|
|
|
|
typedef struct BackupBlockJob {
|
|
|
|
BlockJob common;
|
2016-04-14 13:09:53 +02:00
|
|
|
BlockBackend *target;
|
2015-06-05 02:20:34 +02:00
|
|
|
/* bitmap for sync=incremental */
|
2015-04-18 01:49:58 +02:00
|
|
|
BdrvDirtyBitmap *sync_bitmap;
|
2013-07-26 20:39:04 +02:00
|
|
|
MirrorSyncMode sync_mode;
|
2013-06-24 17:13:11 +02:00
|
|
|
RateLimit limit;
|
|
|
|
BlockdevOnError on_source_error;
|
|
|
|
BlockdevOnError on_target_error;
|
|
|
|
CoRwlock flush_rwlock;
|
|
|
|
uint64_t sectors_read;
|
2016-03-08 05:44:52 +01:00
|
|
|
unsigned long *done_bitmap;
|
2016-02-25 21:58:29 +01:00
|
|
|
int64_t cluster_size;
|
2016-01-27 00:54:58 +01:00
|
|
|
NotifierWithReturn before_write;
|
2013-06-24 17:13:11 +02:00
|
|
|
QLIST_HEAD(, CowRequest) inflight_reqs;
|
|
|
|
} BackupBlockJob;
|
|
|
|
|
2016-02-25 21:58:29 +01:00
|
|
|
/* Size of a cluster in sectors, instead of bytes. */
|
|
|
|
static inline int64_t cluster_size_sectors(BackupBlockJob *job)
|
|
|
|
{
|
|
|
|
return job->cluster_size / BDRV_SECTOR_SIZE;
|
|
|
|
}
|
|
|
|
|
2013-06-24 17:13:11 +02:00
|
|
|
/* See if in-flight requests overlap and wait for them to complete */
|
|
|
|
static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job,
|
|
|
|
int64_t start,
|
|
|
|
int64_t end)
|
|
|
|
{
|
|
|
|
CowRequest *req;
|
|
|
|
bool retry;
|
|
|
|
|
|
|
|
do {
|
|
|
|
retry = false;
|
|
|
|
QLIST_FOREACH(req, &job->inflight_reqs, list) {
|
|
|
|
if (end > req->start && start < req->end) {
|
|
|
|
qemu_co_queue_wait(&req->wait_queue);
|
|
|
|
retry = true;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
} while (retry);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Keep track of an in-flight request */
|
|
|
|
static void cow_request_begin(CowRequest *req, BackupBlockJob *job,
|
|
|
|
int64_t start, int64_t end)
|
|
|
|
{
|
|
|
|
req->start = start;
|
|
|
|
req->end = end;
|
|
|
|
qemu_co_queue_init(&req->wait_queue);
|
|
|
|
QLIST_INSERT_HEAD(&job->inflight_reqs, req, list);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Forget about a completed request */
|
|
|
|
static void cow_request_end(CowRequest *req)
|
|
|
|
{
|
|
|
|
QLIST_REMOVE(req, list);
|
|
|
|
qemu_co_queue_restart_all(&req->wait_queue);
|
|
|
|
}
|
|
|
|
|
2016-04-14 15:56:02 +02:00
|
|
|
static int coroutine_fn backup_do_cow(BackupBlockJob *job,
|
2013-06-24 17:13:11 +02:00
|
|
|
int64_t sector_num, int nb_sectors,
|
2015-09-08 05:28:33 +02:00
|
|
|
bool *error_is_read,
|
|
|
|
bool is_write_notifier)
|
2013-06-24 17:13:11 +02:00
|
|
|
{
|
2016-04-14 13:09:53 +02:00
|
|
|
BlockBackend *blk = job->common.blk;
|
2013-06-24 17:13:11 +02:00
|
|
|
CowRequest cow_request;
|
|
|
|
struct iovec iov;
|
|
|
|
QEMUIOVector bounce_qiov;
|
|
|
|
void *bounce_buffer = NULL;
|
|
|
|
int ret = 0;
|
2016-02-25 21:58:29 +01:00
|
|
|
int64_t sectors_per_cluster = cluster_size_sectors(job);
|
2013-06-24 17:13:11 +02:00
|
|
|
int64_t start, end;
|
|
|
|
int n;
|
|
|
|
|
|
|
|
qemu_co_rwlock_rdlock(&job->flush_rwlock);
|
|
|
|
|
2016-02-25 21:58:29 +01:00
|
|
|
start = sector_num / sectors_per_cluster;
|
|
|
|
end = DIV_ROUND_UP(sector_num + nb_sectors, sectors_per_cluster);
|
2013-06-24 17:13:11 +02:00
|
|
|
|
|
|
|
trace_backup_do_cow_enter(job, start, sector_num, nb_sectors);
|
|
|
|
|
|
|
|
wait_for_overlapping_requests(job, start, end);
|
|
|
|
cow_request_begin(&cow_request, job, start, end);
|
|
|
|
|
|
|
|
for (; start < end; start++) {
|
2016-03-08 05:44:52 +01:00
|
|
|
if (test_bit(start, job->done_bitmap)) {
|
2013-06-24 17:13:11 +02:00
|
|
|
trace_backup_do_cow_skip(job, start);
|
|
|
|
continue; /* already copied */
|
|
|
|
}
|
|
|
|
|
|
|
|
trace_backup_do_cow_process(job, start);
|
|
|
|
|
2016-02-25 21:58:29 +01:00
|
|
|
n = MIN(sectors_per_cluster,
|
2013-06-24 17:13:11 +02:00
|
|
|
job->common.len / BDRV_SECTOR_SIZE -
|
2016-02-25 21:58:29 +01:00
|
|
|
start * sectors_per_cluster);
|
2013-06-24 17:13:11 +02:00
|
|
|
|
|
|
|
if (!bounce_buffer) {
|
2016-04-14 13:09:53 +02:00
|
|
|
bounce_buffer = blk_blockalign(blk, job->cluster_size);
|
2013-06-24 17:13:11 +02:00
|
|
|
}
|
|
|
|
iov.iov_base = bounce_buffer;
|
|
|
|
iov.iov_len = n * BDRV_SECTOR_SIZE;
|
|
|
|
qemu_iovec_init_external(&bounce_qiov, &iov, 1);
|
|
|
|
|
2016-04-14 13:09:53 +02:00
|
|
|
ret = blk_co_preadv(blk, start * job->cluster_size,
|
|
|
|
bounce_qiov.size, &bounce_qiov,
|
|
|
|
is_write_notifier ? BDRV_REQ_NO_SERIALISING : 0);
|
2013-06-24 17:13:11 +02:00
|
|
|
if (ret < 0) {
|
|
|
|
trace_backup_do_cow_read_fail(job, start, ret);
|
|
|
|
if (error_is_read) {
|
|
|
|
*error_is_read = true;
|
|
|
|
}
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (buffer_is_zero(iov.iov_base, iov.iov_len)) {
|
2016-04-14 13:09:53 +02:00
|
|
|
ret = blk_co_pwrite_zeroes(job->target, start * job->cluster_size,
|
|
|
|
bounce_qiov.size, BDRV_REQ_MAY_UNMAP);
|
2013-06-24 17:13:11 +02:00
|
|
|
} else {
|
2016-04-14 13:09:53 +02:00
|
|
|
ret = blk_co_pwritev(job->target, start * job->cluster_size,
|
|
|
|
bounce_qiov.size, &bounce_qiov, 0);
|
2013-06-24 17:13:11 +02:00
|
|
|
}
|
|
|
|
if (ret < 0) {
|
|
|
|
trace_backup_do_cow_write_fail(job, start, ret);
|
|
|
|
if (error_is_read) {
|
|
|
|
*error_is_read = false;
|
|
|
|
}
|
|
|
|
goto out;
|
|
|
|
}
|
|
|
|
|
2016-03-08 05:44:52 +01:00
|
|
|
set_bit(start, job->done_bitmap);
|
2013-06-24 17:13:11 +02:00
|
|
|
|
|
|
|
/* Publish progress, guest I/O counts as progress too. Note that the
|
|
|
|
* offset field is an opaque progress value, it is not a disk offset.
|
|
|
|
*/
|
|
|
|
job->sectors_read += n;
|
|
|
|
job->common.offset += n * BDRV_SECTOR_SIZE;
|
|
|
|
}
|
|
|
|
|
|
|
|
out:
|
|
|
|
if (bounce_buffer) {
|
|
|
|
qemu_vfree(bounce_buffer);
|
|
|
|
}
|
|
|
|
|
|
|
|
cow_request_end(&cow_request);
|
|
|
|
|
|
|
|
trace_backup_do_cow_return(job, sector_num, nb_sectors, ret);
|
|
|
|
|
|
|
|
qemu_co_rwlock_unlock(&job->flush_rwlock);
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int coroutine_fn backup_before_write_notify(
|
|
|
|
NotifierWithReturn *notifier,
|
|
|
|
void *opaque)
|
|
|
|
{
|
2016-01-27 00:54:58 +01:00
|
|
|
BackupBlockJob *job = container_of(notifier, BackupBlockJob, before_write);
|
2013-06-24 17:13:11 +02:00
|
|
|
BdrvTrackedRequest *req = opaque;
|
2013-12-03 15:31:25 +01:00
|
|
|
int64_t sector_num = req->offset >> BDRV_SECTOR_BITS;
|
|
|
|
int nb_sectors = req->bytes >> BDRV_SECTOR_BITS;
|
2013-06-24 17:13:11 +02:00
|
|
|
|
2016-04-14 13:09:53 +02:00
|
|
|
assert(req->bs == blk_bs(job->common.blk));
|
2013-12-03 15:31:25 +01:00
|
|
|
assert((req->offset & (BDRV_SECTOR_SIZE - 1)) == 0);
|
|
|
|
assert((req->bytes & (BDRV_SECTOR_SIZE - 1)) == 0);
|
|
|
|
|
2016-04-14 15:56:02 +02:00
|
|
|
return backup_do_cow(job, sector_num, nb_sectors, NULL, true);
|
2013-06-24 17:13:11 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp)
|
|
|
|
{
|
|
|
|
BackupBlockJob *s = container_of(job, BackupBlockJob, common);
|
|
|
|
|
|
|
|
if (speed < 0) {
|
2015-03-17 11:54:50 +01:00
|
|
|
error_setg(errp, QERR_INVALID_PARAMETER, "speed");
|
2013-06-24 17:13:11 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME);
|
|
|
|
}
|
|
|
|
|
2015-11-06 00:13:10 +01:00
|
|
|
static void backup_cleanup_sync_bitmap(BackupBlockJob *job, int ret)
|
|
|
|
{
|
|
|
|
BdrvDirtyBitmap *bm;
|
2016-04-14 13:09:53 +02:00
|
|
|
BlockDriverState *bs = blk_bs(job->common.blk);
|
2015-11-06 00:13:10 +01:00
|
|
|
|
|
|
|
if (ret < 0 || block_job_is_cancelled(&job->common)) {
|
|
|
|
/* Merge the successor back into the parent, delete nothing. */
|
|
|
|
bm = bdrv_reclaim_dirty_bitmap(bs, job->sync_bitmap, NULL);
|
|
|
|
assert(bm);
|
|
|
|
} else {
|
|
|
|
/* Everything is fine, delete this bitmap and install the backup. */
|
|
|
|
bm = bdrv_dirty_bitmap_abdicate(bs, job->sync_bitmap, NULL);
|
|
|
|
assert(bm);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2015-11-06 00:13:16 +01:00
|
|
|
static void backup_commit(BlockJob *job)
|
|
|
|
{
|
|
|
|
BackupBlockJob *s = container_of(job, BackupBlockJob, common);
|
|
|
|
if (s->sync_bitmap) {
|
|
|
|
backup_cleanup_sync_bitmap(s, 0);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static void backup_abort(BlockJob *job)
|
|
|
|
{
|
|
|
|
BackupBlockJob *s = container_of(job, BackupBlockJob, common);
|
|
|
|
if (s->sync_bitmap) {
|
|
|
|
backup_cleanup_sync_bitmap(s, -1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-06-16 18:56:29 +02:00
|
|
|
static void backup_attached_aio_context(BlockJob *job, AioContext *aio_context)
|
|
|
|
{
|
|
|
|
BackupBlockJob *s = container_of(job, BackupBlockJob, common);
|
|
|
|
|
|
|
|
blk_set_aio_context(s->target, aio_context);
|
|
|
|
}
|
|
|
|
|
2013-10-08 11:29:38 +02:00
|
|
|
static const BlockJobDriver backup_job_driver = {
|
2016-06-16 18:56:29 +02:00
|
|
|
.instance_size = sizeof(BackupBlockJob),
|
|
|
|
.job_type = BLOCK_JOB_TYPE_BACKUP,
|
|
|
|
.set_speed = backup_set_speed,
|
|
|
|
.commit = backup_commit,
|
|
|
|
.abort = backup_abort,
|
|
|
|
.attached_aio_context = backup_attached_aio_context,
|
2013-06-24 17:13:11 +02:00
|
|
|
};
|
|
|
|
|
|
|
|
static BlockErrorAction backup_error_action(BackupBlockJob *job,
|
|
|
|
bool read, int error)
|
|
|
|
{
|
|
|
|
if (read) {
|
2016-04-18 11:36:38 +02:00
|
|
|
return block_job_error_action(&job->common, job->on_source_error,
|
|
|
|
true, error);
|
2013-06-24 17:13:11 +02:00
|
|
|
} else {
|
2016-04-18 11:36:38 +02:00
|
|
|
return block_job_error_action(&job->common, job->on_target_error,
|
|
|
|
false, error);
|
2013-06-24 17:13:11 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2014-10-21 13:03:56 +02:00
|
|
|
typedef struct {
|
|
|
|
int ret;
|
|
|
|
} BackupCompleteData;
|
|
|
|
|
|
|
|
static void backup_complete(BlockJob *job, void *opaque)
|
|
|
|
{
|
|
|
|
BackupBlockJob *s = container_of(job, BackupBlockJob, common);
|
|
|
|
BackupCompleteData *data = opaque;
|
|
|
|
|
2016-04-14 13:09:53 +02:00
|
|
|
blk_unref(s->target);
|
2014-10-21 13:03:56 +02:00
|
|
|
|
|
|
|
block_job_completed(job, data->ret);
|
|
|
|
g_free(data);
|
|
|
|
}
|
|
|
|
|
2015-04-18 01:49:58 +02:00
|
|
|
static bool coroutine_fn yield_and_check(BackupBlockJob *job)
|
|
|
|
{
|
|
|
|
if (block_job_is_cancelled(&job->common)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* we need to yield so that bdrv_drain_all() returns.
|
|
|
|
* (without, VM does not reboot)
|
|
|
|
*/
|
|
|
|
if (job->common.speed) {
|
|
|
|
uint64_t delay_ns = ratelimit_calculate_delay(&job->limit,
|
|
|
|
job->sectors_read);
|
|
|
|
job->sectors_read = 0;
|
|
|
|
block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, delay_ns);
|
|
|
|
} else {
|
|
|
|
block_job_sleep_ns(&job->common, QEMU_CLOCK_REALTIME, 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (block_job_is_cancelled(&job->common)) {
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int coroutine_fn backup_run_incremental(BackupBlockJob *job)
|
|
|
|
{
|
|
|
|
bool error_is_read;
|
|
|
|
int ret = 0;
|
|
|
|
int clusters_per_iter;
|
|
|
|
uint32_t granularity;
|
|
|
|
int64_t sector;
|
|
|
|
int64_t cluster;
|
|
|
|
int64_t end;
|
|
|
|
int64_t last_cluster = -1;
|
2016-02-25 21:58:29 +01:00
|
|
|
int64_t sectors_per_cluster = cluster_size_sectors(job);
|
2015-04-18 01:49:58 +02:00
|
|
|
HBitmapIter hbi;
|
|
|
|
|
|
|
|
granularity = bdrv_dirty_bitmap_granularity(job->sync_bitmap);
|
2016-02-25 21:58:29 +01:00
|
|
|
clusters_per_iter = MAX((granularity / job->cluster_size), 1);
|
2015-04-18 01:50:02 +02:00
|
|
|
bdrv_dirty_iter_init(job->sync_bitmap, &hbi);
|
2015-04-18 01:49:58 +02:00
|
|
|
|
|
|
|
/* Find the next dirty sector(s) */
|
|
|
|
while ((sector = hbitmap_iter_next(&hbi)) != -1) {
|
2016-02-25 21:58:29 +01:00
|
|
|
cluster = sector / sectors_per_cluster;
|
2015-04-18 01:49:58 +02:00
|
|
|
|
|
|
|
/* Fake progress updates for any clusters we skipped */
|
|
|
|
if (cluster != last_cluster + 1) {
|
|
|
|
job->common.offset += ((cluster - last_cluster - 1) *
|
2016-02-25 21:58:29 +01:00
|
|
|
job->cluster_size);
|
2015-04-18 01:49:58 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
for (end = cluster + clusters_per_iter; cluster < end; cluster++) {
|
|
|
|
do {
|
|
|
|
if (yield_and_check(job)) {
|
|
|
|
return ret;
|
|
|
|
}
|
2016-04-14 15:56:02 +02:00
|
|
|
ret = backup_do_cow(job, cluster * sectors_per_cluster,
|
2016-02-25 21:58:29 +01:00
|
|
|
sectors_per_cluster, &error_is_read,
|
2015-09-08 05:28:33 +02:00
|
|
|
false);
|
2015-04-18 01:49:58 +02:00
|
|
|
if ((ret < 0) &&
|
|
|
|
backup_error_action(job, error_is_read, -ret) ==
|
|
|
|
BLOCK_ERROR_ACTION_REPORT) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
} while (ret < 0);
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If the bitmap granularity is smaller than the backup granularity,
|
|
|
|
* we need to advance the iterator pointer to the next cluster. */
|
2016-02-25 21:58:29 +01:00
|
|
|
if (granularity < job->cluster_size) {
|
|
|
|
bdrv_set_dirty_iter(&hbi, cluster * sectors_per_cluster);
|
2015-04-18 01:49:58 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
last_cluster = cluster - 1;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Play some final catchup with the progress meter */
|
2016-02-25 21:58:29 +01:00
|
|
|
end = DIV_ROUND_UP(job->common.len, job->cluster_size);
|
2015-04-18 01:49:58 +02:00
|
|
|
if (last_cluster + 1 < end) {
|
2016-02-25 21:58:29 +01:00
|
|
|
job->common.offset += ((end - last_cluster - 1) * job->cluster_size);
|
2015-04-18 01:49:58 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2013-06-24 17:13:11 +02:00
|
|
|
static void coroutine_fn backup_run(void *opaque)
|
|
|
|
{
|
|
|
|
BackupBlockJob *job = opaque;
|
2014-10-21 13:03:56 +02:00
|
|
|
BackupCompleteData *data;
|
2016-04-14 13:09:53 +02:00
|
|
|
BlockDriverState *bs = blk_bs(job->common.blk);
|
|
|
|
BlockBackend *target = job->target;
|
2013-06-24 17:13:11 +02:00
|
|
|
int64_t start, end;
|
2016-02-25 21:58:29 +01:00
|
|
|
int64_t sectors_per_cluster = cluster_size_sectors(job);
|
2013-06-24 17:13:11 +02:00
|
|
|
int ret = 0;
|
|
|
|
|
|
|
|
QLIST_INIT(&job->inflight_reqs);
|
|
|
|
qemu_co_rwlock_init(&job->flush_rwlock);
|
|
|
|
|
|
|
|
start = 0;
|
2016-02-25 21:58:29 +01:00
|
|
|
end = DIV_ROUND_UP(job->common.len, job->cluster_size);
|
2013-06-24 17:13:11 +02:00
|
|
|
|
2016-03-08 05:44:52 +01:00
|
|
|
job->done_bitmap = bitmap_new(end);
|
2013-06-24 17:13:11 +02:00
|
|
|
|
2016-01-27 00:54:58 +01:00
|
|
|
job->before_write.notify = backup_before_write_notify;
|
|
|
|
bdrv_add_before_write_notifier(bs, &job->before_write);
|
2013-06-24 17:13:11 +02:00
|
|
|
|
2013-07-26 20:39:04 +02:00
|
|
|
if (job->sync_mode == MIRROR_SYNC_MODE_NONE) {
|
|
|
|
while (!block_job_is_cancelled(&job->common)) {
|
|
|
|
/* Yield until the job is cancelled. We just let our before_write
|
|
|
|
* notify callback service CoW requests. */
|
2016-06-16 18:56:29 +02:00
|
|
|
block_job_yield(&job->common);
|
2013-06-24 17:13:11 +02:00
|
|
|
}
|
2015-06-05 02:20:34 +02:00
|
|
|
} else if (job->sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
|
2015-04-18 01:49:58 +02:00
|
|
|
ret = backup_run_incremental(job);
|
2013-07-26 20:39:04 +02:00
|
|
|
} else {
|
|
|
|
/* Both FULL and TOP SYNC_MODE's require copying.. */
|
|
|
|
for (; start < end; start++) {
|
|
|
|
bool error_is_read;
|
2015-04-18 01:49:58 +02:00
|
|
|
if (yield_and_check(job)) {
|
2013-06-24 17:13:11 +02:00
|
|
|
break;
|
2013-07-26 20:39:04 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
if (job->sync_mode == MIRROR_SYNC_MODE_TOP) {
|
|
|
|
int i, n;
|
|
|
|
int alloced = 0;
|
|
|
|
|
|
|
|
/* Check to see if these blocks are already in the
|
|
|
|
* backing file. */
|
|
|
|
|
2016-02-25 21:58:29 +01:00
|
|
|
for (i = 0; i < sectors_per_cluster;) {
|
2013-09-04 19:00:22 +02:00
|
|
|
/* bdrv_is_allocated() only returns true/false based
|
2013-08-18 19:40:06 +02:00
|
|
|
* on the first set of sectors it comes across that
|
2013-07-26 20:39:04 +02:00
|
|
|
* are are all in the same state.
|
|
|
|
* For that reason we must verify each sector in the
|
|
|
|
* backup cluster length. We end up copying more than
|
|
|
|
* needed but at some point that is always the case. */
|
|
|
|
alloced =
|
2013-09-04 19:00:22 +02:00
|
|
|
bdrv_is_allocated(bs,
|
2016-02-25 21:58:29 +01:00
|
|
|
start * sectors_per_cluster + i,
|
|
|
|
sectors_per_cluster - i, &n);
|
2013-07-26 20:39:04 +02:00
|
|
|
i += n;
|
|
|
|
|
2014-07-07 16:38:58 +02:00
|
|
|
if (alloced == 1 || n == 0) {
|
2013-07-26 20:39:04 +02:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* If the above loop never found any sectors that are in
|
|
|
|
* the topmost image, skip this backup. */
|
|
|
|
if (alloced == 0) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/* FULL sync mode we copy the whole drive. */
|
2016-04-14 15:56:02 +02:00
|
|
|
ret = backup_do_cow(job, start * sectors_per_cluster,
|
2016-02-25 21:58:29 +01:00
|
|
|
sectors_per_cluster, &error_is_read, false);
|
2013-07-26 20:39:04 +02:00
|
|
|
if (ret < 0) {
|
|
|
|
/* Depending on error action, fail now or retry cluster */
|
|
|
|
BlockErrorAction action =
|
|
|
|
backup_error_action(job, error_is_read, -ret);
|
2014-06-18 08:43:30 +02:00
|
|
|
if (action == BLOCK_ERROR_ACTION_REPORT) {
|
2013-07-26 20:39:04 +02:00
|
|
|
break;
|
|
|
|
} else {
|
|
|
|
start--;
|
|
|
|
continue;
|
|
|
|
}
|
2013-06-24 17:13:11 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-01-27 00:54:58 +01:00
|
|
|
notifier_with_return_remove(&job->before_write);
|
2013-06-24 17:13:11 +02:00
|
|
|
|
|
|
|
/* wait until pending backup_do_cow() calls have completed */
|
|
|
|
qemu_co_rwlock_wrlock(&job->flush_rwlock);
|
|
|
|
qemu_co_rwlock_unlock(&job->flush_rwlock);
|
2016-03-08 05:44:52 +01:00
|
|
|
g_free(job->done_bitmap);
|
2013-06-24 17:13:11 +02:00
|
|
|
|
2016-04-14 13:09:53 +02:00
|
|
|
bdrv_op_unblock_all(blk_bs(target), job->common.blocker);
|
2013-06-24 17:13:11 +02:00
|
|
|
|
2014-10-21 13:03:56 +02:00
|
|
|
data = g_malloc(sizeof(*data));
|
|
|
|
data->ret = ret;
|
|
|
|
block_job_defer_to_main_loop(&job->common, backup_complete, data);
|
2013-06-24 17:13:11 +02:00
|
|
|
}
|
|
|
|
|
2016-07-05 16:28:58 +02:00
|
|
|
void backup_start(const char *job_id, BlockDriverState *bs,
|
|
|
|
BlockDriverState *target, int64_t speed,
|
|
|
|
MirrorSyncMode sync_mode, BdrvDirtyBitmap *sync_bitmap,
|
2013-06-24 17:13:11 +02:00
|
|
|
BlockdevOnError on_source_error,
|
|
|
|
BlockdevOnError on_target_error,
|
2014-10-07 13:59:15 +02:00
|
|
|
BlockCompletionFunc *cb, void *opaque,
|
2015-11-06 00:13:17 +01:00
|
|
|
BlockJobTxn *txn, Error **errp)
|
2013-06-24 17:13:11 +02:00
|
|
|
{
|
|
|
|
int64_t len;
|
block/backup: avoid copying less than full target clusters
During incremental backups, if the target has a cluster size that is
larger than the backup cluster size and we are backing up to a target
that cannot (for whichever reason) pull clusters up from a backing image,
we may inadvertantly create unusable incremental backup images.
For example:
If the bitmap tracks changes at a 64KB granularity and we transmit 64KB
of data at a time but the target uses a 128KB cluster size, it is
possible that only half of a target cluster will be recognized as dirty
by the backup block job. When the cluster is allocated on the target
image but only half populated with data, we lose the ability to
distinguish between zero padding and uninitialized data.
This does not happen if the target image has a backing file that points
to the last known good backup.
Even if we have a backing file, though, it's likely going to be faster
to just buffer the redundant data ourselves from the live image than
fetching it from the backing file, so let's just always round up to the
target granularity.
The same logic applies to backup modes top, none, and full. Copying
fractional clusters without the guarantee of COW is dangerous, but even
if we can rely on COW, it's likely better to just re-copy the data.
Reported-by: Fam Zheng <famz@redhat.com>
Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1456433911-24718-3-git-send-email-jsnow@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-02-25 21:58:30 +01:00
|
|
|
BlockDriverInfo bdi;
|
2016-04-14 12:59:55 +02:00
|
|
|
BackupBlockJob *job = NULL;
|
block/backup: avoid copying less than full target clusters
During incremental backups, if the target has a cluster size that is
larger than the backup cluster size and we are backing up to a target
that cannot (for whichever reason) pull clusters up from a backing image,
we may inadvertantly create unusable incremental backup images.
For example:
If the bitmap tracks changes at a 64KB granularity and we transmit 64KB
of data at a time but the target uses a 128KB cluster size, it is
possible that only half of a target cluster will be recognized as dirty
by the backup block job. When the cluster is allocated on the target
image but only half populated with data, we lose the ability to
distinguish between zero padding and uninitialized data.
This does not happen if the target image has a backing file that points
to the last known good backup.
Even if we have a backing file, though, it's likely going to be faster
to just buffer the redundant data ourselves from the live image than
fetching it from the backing file, so let's just always round up to the
target granularity.
The same logic applies to backup modes top, none, and full. Copying
fractional clusters without the guarantee of COW is dangerous, but even
if we can rely on COW, it's likely better to just re-copy the data.
Reported-by: Fam Zheng <famz@redhat.com>
Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1456433911-24718-3-git-send-email-jsnow@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-02-25 21:58:30 +01:00
|
|
|
int ret;
|
2013-06-24 17:13:11 +02:00
|
|
|
|
|
|
|
assert(bs);
|
|
|
|
assert(target);
|
|
|
|
|
2014-12-18 11:37:05 +01:00
|
|
|
if (bs == target) {
|
|
|
|
error_setg(errp, "Source and target cannot be the same");
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!bdrv_is_inserted(bs)) {
|
|
|
|
error_setg(errp, "Device is not inserted: %s",
|
|
|
|
bdrv_get_device_name(bs));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (!bdrv_is_inserted(target)) {
|
|
|
|
error_setg(errp, "Device is not inserted: %s",
|
|
|
|
bdrv_get_device_name(target));
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bdrv_op_is_blocked(bs, BLOCK_OP_TYPE_BACKUP_SOURCE, errp)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (bdrv_op_is_blocked(target, BLOCK_OP_TYPE_BACKUP_TARGET, errp)) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2015-06-05 02:20:34 +02:00
|
|
|
if (sync_mode == MIRROR_SYNC_MODE_INCREMENTAL) {
|
2015-04-18 01:49:58 +02:00
|
|
|
if (!sync_bitmap) {
|
|
|
|
error_setg(errp, "must provide a valid bitmap name for "
|
2015-06-05 02:20:34 +02:00
|
|
|
"\"incremental\" sync mode");
|
2015-04-18 01:49:58 +02:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Create a new bitmap, and freeze/disable this one. */
|
|
|
|
if (bdrv_dirty_bitmap_create_successor(bs, sync_bitmap, errp) < 0) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
} else if (sync_bitmap) {
|
|
|
|
error_setg(errp,
|
|
|
|
"a sync_bitmap was provided to backup_run, "
|
|
|
|
"but received an incompatible sync_mode (%s)",
|
|
|
|
MirrorSyncMode_lookup[sync_mode]);
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2013-06-24 17:13:11 +02:00
|
|
|
len = bdrv_getlength(bs);
|
|
|
|
if (len < 0) {
|
|
|
|
error_setg_errno(errp, -len, "unable to get length for '%s'",
|
|
|
|
bdrv_get_device_name(bs));
|
2015-04-18 01:49:58 +02:00
|
|
|
goto error;
|
2013-06-24 17:13:11 +02:00
|
|
|
}
|
|
|
|
|
2016-07-05 16:28:58 +02:00
|
|
|
job = block_job_create(job_id, &backup_job_driver, bs, speed,
|
2016-07-05 16:28:56 +02:00
|
|
|
cb, opaque, errp);
|
2013-06-24 17:13:11 +02:00
|
|
|
if (!job) {
|
2015-04-18 01:49:58 +02:00
|
|
|
goto error;
|
2013-06-24 17:13:11 +02:00
|
|
|
}
|
|
|
|
|
2016-04-14 13:09:53 +02:00
|
|
|
job->target = blk_new();
|
|
|
|
blk_insert_bs(job->target, target);
|
|
|
|
|
2013-06-24 17:13:11 +02:00
|
|
|
job->on_source_error = on_source_error;
|
|
|
|
job->on_target_error = on_target_error;
|
2013-07-26 20:39:04 +02:00
|
|
|
job->sync_mode = sync_mode;
|
2015-06-05 02:20:34 +02:00
|
|
|
job->sync_bitmap = sync_mode == MIRROR_SYNC_MODE_INCREMENTAL ?
|
2015-04-18 01:49:58 +02:00
|
|
|
sync_bitmap : NULL;
|
block/backup: avoid copying less than full target clusters
During incremental backups, if the target has a cluster size that is
larger than the backup cluster size and we are backing up to a target
that cannot (for whichever reason) pull clusters up from a backing image,
we may inadvertantly create unusable incremental backup images.
For example:
If the bitmap tracks changes at a 64KB granularity and we transmit 64KB
of data at a time but the target uses a 128KB cluster size, it is
possible that only half of a target cluster will be recognized as dirty
by the backup block job. When the cluster is allocated on the target
image but only half populated with data, we lose the ability to
distinguish between zero padding and uninitialized data.
This does not happen if the target image has a backing file that points
to the last known good backup.
Even if we have a backing file, though, it's likely going to be faster
to just buffer the redundant data ourselves from the live image than
fetching it from the backing file, so let's just always round up to the
target granularity.
The same logic applies to backup modes top, none, and full. Copying
fractional clusters without the guarantee of COW is dangerous, but even
if we can rely on COW, it's likely better to just re-copy the data.
Reported-by: Fam Zheng <famz@redhat.com>
Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1456433911-24718-3-git-send-email-jsnow@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-02-25 21:58:30 +01:00
|
|
|
|
|
|
|
/* If there is no backing file on the target, we cannot rely on COW if our
|
|
|
|
* backup cluster size is smaller than the target cluster size. Even for
|
|
|
|
* targets with a backing file, try to avoid COW if possible. */
|
2016-04-14 13:09:53 +02:00
|
|
|
ret = bdrv_get_info(target, &bdi);
|
block/backup: avoid copying less than full target clusters
During incremental backups, if the target has a cluster size that is
larger than the backup cluster size and we are backing up to a target
that cannot (for whichever reason) pull clusters up from a backing image,
we may inadvertantly create unusable incremental backup images.
For example:
If the bitmap tracks changes at a 64KB granularity and we transmit 64KB
of data at a time but the target uses a 128KB cluster size, it is
possible that only half of a target cluster will be recognized as dirty
by the backup block job. When the cluster is allocated on the target
image but only half populated with data, we lose the ability to
distinguish between zero padding and uninitialized data.
This does not happen if the target image has a backing file that points
to the last known good backup.
Even if we have a backing file, though, it's likely going to be faster
to just buffer the redundant data ourselves from the live image than
fetching it from the backing file, so let's just always round up to the
target granularity.
The same logic applies to backup modes top, none, and full. Copying
fractional clusters without the guarantee of COW is dangerous, but even
if we can rely on COW, it's likely better to just re-copy the data.
Reported-by: Fam Zheng <famz@redhat.com>
Signed-off-by: John Snow <jsnow@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Message-id: 1456433911-24718-3-git-send-email-jsnow@redhat.com
Signed-off-by: Jeff Cody <jcody@redhat.com>
2016-02-25 21:58:30 +01:00
|
|
|
if (ret < 0 && !target->backing) {
|
|
|
|
error_setg_errno(errp, -ret,
|
|
|
|
"Couldn't determine the cluster size of the target image, "
|
|
|
|
"which has no backing file");
|
|
|
|
error_append_hint(errp,
|
|
|
|
"Aborting, since this may create an unusable destination image\n");
|
|
|
|
goto error;
|
|
|
|
} else if (ret < 0 && target->backing) {
|
|
|
|
/* Not fatal; just trudge on ahead. */
|
|
|
|
job->cluster_size = BACKUP_CLUSTER_SIZE_DEFAULT;
|
|
|
|
} else {
|
|
|
|
job->cluster_size = MAX(BACKUP_CLUSTER_SIZE_DEFAULT, bdi.cluster_size);
|
|
|
|
}
|
|
|
|
|
|
|
|
bdrv_op_block_all(target, job->common.blocker);
|
2013-06-24 17:13:11 +02:00
|
|
|
job->common.len = len;
|
coroutine: move entry argument to qemu_coroutine_create
In practice the entry argument is always known at creation time, and
it is confusing that sometimes qemu_coroutine_enter is used with a
non-NULL argument to re-enter a coroutine (this happens in
block/sheepdog.c and tests/test-coroutine.c). So pass the opaque value
at creation time, for consistency with e.g. aio_bh_new.
Mostly done with the following semantic patch:
@ entry1 @
expression entry, arg, co;
@@
- co = qemu_coroutine_create(entry);
+ co = qemu_coroutine_create(entry, arg);
...
- qemu_coroutine_enter(co, arg);
+ qemu_coroutine_enter(co);
@ entry2 @
expression entry, arg;
identifier co;
@@
- Coroutine *co = qemu_coroutine_create(entry);
+ Coroutine *co = qemu_coroutine_create(entry, arg);
...
- qemu_coroutine_enter(co, arg);
+ qemu_coroutine_enter(co);
@ entry3 @
expression entry, arg;
@@
- qemu_coroutine_enter(qemu_coroutine_create(entry), arg);
+ qemu_coroutine_enter(qemu_coroutine_create(entry, arg));
@ reentry @
expression co;
@@
- qemu_coroutine_enter(co, NULL);
+ qemu_coroutine_enter(co);
except for the aforementioned few places where the semantic patch
stumbled (as expected) and for test_co_queue, which would otherwise
produce an uninitialized variable warning.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-04 19:10:01 +02:00
|
|
|
job->common.co = qemu_coroutine_create(backup_run, job);
|
2015-11-06 00:13:17 +01:00
|
|
|
block_job_txn_add_job(txn, &job->common);
|
coroutine: move entry argument to qemu_coroutine_create
In practice the entry argument is always known at creation time, and
it is confusing that sometimes qemu_coroutine_enter is used with a
non-NULL argument to re-enter a coroutine (this happens in
block/sheepdog.c and tests/test-coroutine.c). So pass the opaque value
at creation time, for consistency with e.g. aio_bh_new.
Mostly done with the following semantic patch:
@ entry1 @
expression entry, arg, co;
@@
- co = qemu_coroutine_create(entry);
+ co = qemu_coroutine_create(entry, arg);
...
- qemu_coroutine_enter(co, arg);
+ qemu_coroutine_enter(co);
@ entry2 @
expression entry, arg;
identifier co;
@@
- Coroutine *co = qemu_coroutine_create(entry);
+ Coroutine *co = qemu_coroutine_create(entry, arg);
...
- qemu_coroutine_enter(co, arg);
+ qemu_coroutine_enter(co);
@ entry3 @
expression entry, arg;
@@
- qemu_coroutine_enter(qemu_coroutine_create(entry), arg);
+ qemu_coroutine_enter(qemu_coroutine_create(entry, arg));
@ reentry @
expression co;
@@
- qemu_coroutine_enter(co, NULL);
+ qemu_coroutine_enter(co);
except for the aforementioned few places where the semantic patch
stumbled (as expected) and for test_co_queue, which would otherwise
produce an uninitialized variable warning.
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Reviewed-by: Fam Zheng <famz@redhat.com>
Signed-off-by: Kevin Wolf <kwolf@redhat.com>
2016-07-04 19:10:01 +02:00
|
|
|
qemu_coroutine_enter(job->common.co);
|
2015-04-18 01:49:58 +02:00
|
|
|
return;
|
|
|
|
|
|
|
|
error:
|
|
|
|
if (sync_bitmap) {
|
|
|
|
bdrv_reclaim_dirty_bitmap(bs, sync_bitmap, NULL);
|
|
|
|
}
|
2016-04-14 12:59:55 +02:00
|
|
|
if (job) {
|
2016-04-14 13:09:53 +02:00
|
|
|
blk_unref(job->target);
|
2016-04-14 12:59:55 +02:00
|
|
|
block_job_unref(&job->common);
|
|
|
|
}
|
2013-06-24 17:13:11 +02:00
|
|
|
}
|