From a5c5ea3f60c000bf18c99435439533728a5f34a2 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 11 Jun 2013 10:44:58 +0200 Subject: [PATCH 01/20] raw-posix: Fix /dev/cdrom magic on OS X The raw-posix driver has code to provide a /dev/cdrom on OS X even though it doesn't really exist. However, since commit c66a6157 the real filename is dismissed after finding it, so opening /dev/cdrom fails. Put the filename back into the options QDict to make this work again. Cc: qemu-stable@nongnu.org Signed-off-by: Kevin Wolf Reviewed-by: Stefan Hajnoczi --- block/raw-posix.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/raw-posix.c b/block/raw-posix.c index c0ccf273a3..90ce9f86af 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -1350,6 +1350,7 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags) qemu_close(fd); } filename = bsdPath; + qdict_put(options, "filename", qstring_from_str(filename)); } if ( mediaIterator ) From 5dae8e5fb803f53fadc116cefe353953b938cbe1 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 24 Jun 2013 17:13:09 +0200 Subject: [PATCH 02/20] notify: add NotiferWithReturn so notifier list can abort notifier_list_notify() has no return value. This is fine when we just want to invoke side-effects. Sometimes it's useful for notifiers to produce a return value. This allows notifiers to "veto" an operation and will be used by the block layer before-write notifier. Reviewed-by: Eric Blake Reviewed-by: Kevin Wolf Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- include/qemu/notify.h | 29 +++++++++++++++++++++++++++++ util/notify.c | 30 ++++++++++++++++++++++++++++++ 2 files changed, 59 insertions(+) diff --git a/include/qemu/notify.h b/include/qemu/notify.h index 4e2e7f0ec4..a3d73e4bc7 100644 --- a/include/qemu/notify.h +++ b/include/qemu/notify.h @@ -40,4 +40,33 @@ void notifier_remove(Notifier *notifier); void notifier_list_notify(NotifierList *list, void *data); +/* Same as Notifier but allows .notify() to return errors */ +typedef struct NotifierWithReturn NotifierWithReturn; + +struct NotifierWithReturn { + /** + * Return 0 on success (next notifier will be invoked), otherwise + * notifier_with_return_list_notify() will stop and return the value. + */ + int (*notify)(NotifierWithReturn *notifier, void *data); + QLIST_ENTRY(NotifierWithReturn) node; +}; + +typedef struct NotifierWithReturnList { + QLIST_HEAD(, NotifierWithReturn) notifiers; +} NotifierWithReturnList; + +#define NOTIFIER_WITH_RETURN_LIST_INITIALIZER(head) \ + { QLIST_HEAD_INITIALIZER((head).notifiers) } + +void notifier_with_return_list_init(NotifierWithReturnList *list); + +void notifier_with_return_list_add(NotifierWithReturnList *list, + NotifierWithReturn *notifier); + +void notifier_with_return_remove(NotifierWithReturn *notifier); + +int notifier_with_return_list_notify(NotifierWithReturnList *list, + void *data); + #endif diff --git a/util/notify.c b/util/notify.c index 7b7692acb2..f215dfc214 100644 --- a/util/notify.c +++ b/util/notify.c @@ -39,3 +39,33 @@ void notifier_list_notify(NotifierList *list, void *data) notifier->notify(notifier, data); } } + +void notifier_with_return_list_init(NotifierWithReturnList *list) +{ + QLIST_INIT(&list->notifiers); +} + +void notifier_with_return_list_add(NotifierWithReturnList *list, + NotifierWithReturn *notifier) +{ + QLIST_INSERT_HEAD(&list->notifiers, notifier, node); +} + +void notifier_with_return_remove(NotifierWithReturn *notifier) +{ + QLIST_REMOVE(notifier, node); +} + +int notifier_with_return_list_notify(NotifierWithReturnList *list, void *data) +{ + NotifierWithReturn *notifier, *next; + int ret = 0; + + QLIST_FOREACH_SAFE(notifier, &list->notifiers, node, next) { + ret = notifier->notify(notifier, data); + if (ret != 0) { + break; + } + } + return ret; +} From d616b224745b2c522f965cf8de7da17b553b959a Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 24 Jun 2013 17:13:10 +0200 Subject: [PATCH 03/20] block: add bdrv_add_before_write_notifier() The bdrv_add_before_write_notifier() function installs a callback that is invoked before a write request is processed. This will be used to implement copy-on-write point-in-time snapshots where we need to copy out old data before overwriting it. Note that BdrvTrackedRequest is moved to block_int.h since it is passed to .notify() functions. Reviewed-by: Eric Blake Reviewed-by: Kevin Wolf Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block.c | 23 ++++++++++++----------- include/block/block_int.h | 23 ++++++++++++++++++++++- 2 files changed, 34 insertions(+), 12 deletions(-) diff --git a/block.c b/block.c index 8e77d46b02..0898f6b4b6 100644 --- a/block.c +++ b/block.c @@ -305,6 +305,7 @@ BlockDriverState *bdrv_new(const char *device_name) } bdrv_iostatus_disable(bs); notifier_list_init(&bs->close_notifiers); + notifier_with_return_list_init(&bs->before_write_notifiers); return bs; } @@ -1840,16 +1841,6 @@ int bdrv_commit_all(void) return 0; } -struct BdrvTrackedRequest { - BlockDriverState *bs; - int64_t sector_num; - int nb_sectors; - bool is_write; - QLIST_ENTRY(BdrvTrackedRequest) list; - Coroutine *co; /* owner, used for deadlock detection */ - CoQueue wait_queue; /* coroutines blocked on this request */ -}; - /** * Remove an active request from the tracked requests list * @@ -2620,7 +2611,11 @@ static int coroutine_fn bdrv_co_do_writev(BlockDriverState *bs, tracked_request_begin(&req, bs, sector_num, nb_sectors, true); - if (flags & BDRV_REQ_ZERO_WRITE) { + ret = notifier_with_return_list_notify(&bs->before_write_notifiers, &req); + + if (ret < 0) { + /* Do nothing, write notifier decided to fail this request */ + } else if (flags & BDRV_REQ_ZERO_WRITE) { ret = bdrv_co_do_write_zeroes(bs, sector_num, nb_sectors); } else { ret = drv->bdrv_co_writev(bs, sector_num, nb_sectors, qiov); @@ -4581,3 +4576,9 @@ AioContext *bdrv_get_aio_context(BlockDriverState *bs) /* Currently BlockDriverState always uses the main loop AioContext */ return qemu_get_aio_context(); } + +void bdrv_add_before_write_notifier(BlockDriverState *bs, + NotifierWithReturn *notifier) +{ + notifier_with_return_list_add(&bs->before_write_notifiers, notifier); +} diff --git a/include/block/block_int.h b/include/block/block_int.h index ba52247c1a..2d009556b0 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -59,7 +59,16 @@ #define BLOCK_OPT_LAZY_REFCOUNTS "lazy_refcounts" #define BLOCK_OPT_ADAPTER_TYPE "adapter_type" -typedef struct BdrvTrackedRequest BdrvTrackedRequest; +typedef struct BdrvTrackedRequest { + BlockDriverState *bs; + int64_t sector_num; + int nb_sectors; + bool is_write; + QLIST_ENTRY(BdrvTrackedRequest) list; + Coroutine *co; /* owner, used for deadlock detection */ + CoQueue wait_queue; /* coroutines blocked on this request */ +} BdrvTrackedRequest; + typedef struct BlockIOLimit { int64_t bps[3]; @@ -248,6 +257,9 @@ struct BlockDriverState { NotifierList close_notifiers; + /* Callback before write request is processed */ + NotifierWithReturnList before_write_notifiers; + /* number of in-flight copy-on-read requests */ unsigned int copy_on_read_in_flight; @@ -298,6 +310,15 @@ int get_tmp_filename(char *filename, int size); void bdrv_set_io_limits(BlockDriverState *bs, BlockIOLimit *io_limits); +/** + * bdrv_add_before_write_notifier: + * + * Register a callback that is invoked before write requests are processed but + * after any throttling or waiting for overlapping requests. + */ +void bdrv_add_before_write_notifier(BlockDriverState *bs, + NotifierWithReturn *notifier); + /** * bdrv_get_aio_context: * From 98d2c6f2cd80afaa2dc10091f5e35a97c181e4f5 Mon Sep 17 00:00:00 2001 From: Dietmar Maurer Date: Mon, 24 Jun 2013 17:13:11 +0200 Subject: [PATCH 04/20] block: add basic backup support to block driver backup_start() creates a block job that copies a point-in-time snapshot of a block device to a target block device. We call backup_do_cow() for each write during backup. That function reads the original data from the block device before it gets overwritten. The data is then written to the target device. Currently backup cluster size is hardcoded to 65536 bytes. [I made a number of changes to Dietmar's original patch and folded them in to make code review easy. Here is the full list: * Drop BackupDumpFunc interface in favor of a target block device * Detect zero clusters with buffer_is_zero() and use bdrv_co_write_zeroes() * Use 0 delay instead of 1us, like other block jobs * Unify creation/start functions into backup_start() * Simplify cleanup, free bitmap in backup_run() instead of cb * function * Use HBitmap to avoid duplicating bitmap code * Use bdrv_getlength() instead of accessing ->total_sectors * directly * Delete the backup.h header file, it is no longer necessary * Move ./backup.c to block/backup.c * Remove #ifdefed out code * Coding style and whitespace cleanups * Use bdrv_add_before_write_notifier() instead of blockjob-specific hooks * Keep our own in-flight CowRequest list instead of using block.c tracked requests. This means a little code duplication but is much simpler than trying to share the tracked requests list and use the backup block size. * Add on_source_error and on_target_error error handling. * Use trace events instead of DPRINTF() -- stefanha] Signed-off-by: Dietmar Maurer Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- block/Makefile.objs | 1 + block/backup.c | 341 ++++++++++++++++++++++++++++++++++++++ include/block/block_int.h | 19 +++ trace-events | 8 + 4 files changed, 369 insertions(+) create mode 100644 block/backup.c diff --git a/block/Makefile.objs b/block/Makefile.objs index 2981654846..4cf9aa499f 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -21,5 +21,6 @@ endif common-obj-y += stream.o common-obj-y += commit.o common-obj-y += mirror.o +common-obj-y += backup.o $(obj)/curl.o: QEMU_CFLAGS+=$(CURL_CFLAGS) diff --git a/block/backup.c b/block/backup.c new file mode 100644 index 0000000000..16105d40b1 --- /dev/null +++ b/block/backup.c @@ -0,0 +1,341 @@ +/* + * QEMU backup + * + * Copyright (C) 2013 Proxmox Server Solutions + * + * Authors: + * Dietmar Maurer (dietmar@proxmox.com) + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + * + */ + +#include +#include +#include + +#include "trace.h" +#include "block/block.h" +#include "block/block_int.h" +#include "block/blockjob.h" +#include "qemu/ratelimit.h" + +#define BACKUP_CLUSTER_BITS 16 +#define BACKUP_CLUSTER_SIZE (1 << BACKUP_CLUSTER_BITS) +#define BACKUP_SECTORS_PER_CLUSTER (BACKUP_CLUSTER_SIZE / BDRV_SECTOR_SIZE) + +#define SLICE_TIME 100000000ULL /* ns */ + +typedef struct CowRequest { + int64_t start; + int64_t end; + QLIST_ENTRY(CowRequest) list; + CoQueue wait_queue; /* coroutines blocked on this request */ +} CowRequest; + +typedef struct BackupBlockJob { + BlockJob common; + BlockDriverState *target; + RateLimit limit; + BlockdevOnError on_source_error; + BlockdevOnError on_target_error; + CoRwlock flush_rwlock; + uint64_t sectors_read; + HBitmap *bitmap; + QLIST_HEAD(, CowRequest) inflight_reqs; +} BackupBlockJob; + +/* See if in-flight requests overlap and wait for them to complete */ +static void coroutine_fn wait_for_overlapping_requests(BackupBlockJob *job, + int64_t start, + int64_t end) +{ + CowRequest *req; + bool retry; + + do { + retry = false; + QLIST_FOREACH(req, &job->inflight_reqs, list) { + if (end > req->start && start < req->end) { + qemu_co_queue_wait(&req->wait_queue); + retry = true; + break; + } + } + } while (retry); +} + +/* Keep track of an in-flight request */ +static void cow_request_begin(CowRequest *req, BackupBlockJob *job, + int64_t start, int64_t end) +{ + req->start = start; + req->end = end; + qemu_co_queue_init(&req->wait_queue); + QLIST_INSERT_HEAD(&job->inflight_reqs, req, list); +} + +/* Forget about a completed request */ +static void cow_request_end(CowRequest *req) +{ + QLIST_REMOVE(req, list); + qemu_co_queue_restart_all(&req->wait_queue); +} + +static int coroutine_fn backup_do_cow(BlockDriverState *bs, + int64_t sector_num, int nb_sectors, + bool *error_is_read) +{ + BackupBlockJob *job = (BackupBlockJob *)bs->job; + CowRequest cow_request; + struct iovec iov; + QEMUIOVector bounce_qiov; + void *bounce_buffer = NULL; + int ret = 0; + int64_t start, end; + int n; + + qemu_co_rwlock_rdlock(&job->flush_rwlock); + + start = sector_num / BACKUP_SECTORS_PER_CLUSTER; + end = DIV_ROUND_UP(sector_num + nb_sectors, BACKUP_SECTORS_PER_CLUSTER); + + trace_backup_do_cow_enter(job, start, sector_num, nb_sectors); + + wait_for_overlapping_requests(job, start, end); + cow_request_begin(&cow_request, job, start, end); + + for (; start < end; start++) { + if (hbitmap_get(job->bitmap, start)) { + trace_backup_do_cow_skip(job, start); + continue; /* already copied */ + } + + trace_backup_do_cow_process(job, start); + + n = MIN(BACKUP_SECTORS_PER_CLUSTER, + job->common.len / BDRV_SECTOR_SIZE - + start * BACKUP_SECTORS_PER_CLUSTER); + + if (!bounce_buffer) { + bounce_buffer = qemu_blockalign(bs, BACKUP_CLUSTER_SIZE); + } + iov.iov_base = bounce_buffer; + iov.iov_len = n * BDRV_SECTOR_SIZE; + qemu_iovec_init_external(&bounce_qiov, &iov, 1); + + ret = bdrv_co_readv(bs, start * BACKUP_SECTORS_PER_CLUSTER, n, + &bounce_qiov); + if (ret < 0) { + trace_backup_do_cow_read_fail(job, start, ret); + if (error_is_read) { + *error_is_read = true; + } + goto out; + } + + if (buffer_is_zero(iov.iov_base, iov.iov_len)) { + ret = bdrv_co_write_zeroes(job->target, + start * BACKUP_SECTORS_PER_CLUSTER, n); + } else { + ret = bdrv_co_writev(job->target, + start * BACKUP_SECTORS_PER_CLUSTER, n, + &bounce_qiov); + } + if (ret < 0) { + trace_backup_do_cow_write_fail(job, start, ret); + if (error_is_read) { + *error_is_read = false; + } + goto out; + } + + hbitmap_set(job->bitmap, start, 1); + + /* Publish progress, guest I/O counts as progress too. Note that the + * offset field is an opaque progress value, it is not a disk offset. + */ + job->sectors_read += n; + job->common.offset += n * BDRV_SECTOR_SIZE; + } + +out: + if (bounce_buffer) { + qemu_vfree(bounce_buffer); + } + + cow_request_end(&cow_request); + + trace_backup_do_cow_return(job, sector_num, nb_sectors, ret); + + qemu_co_rwlock_unlock(&job->flush_rwlock); + + return ret; +} + +static int coroutine_fn backup_before_write_notify( + NotifierWithReturn *notifier, + void *opaque) +{ + BdrvTrackedRequest *req = opaque; + + return backup_do_cow(req->bs, req->sector_num, req->nb_sectors, NULL); +} + +static void backup_set_speed(BlockJob *job, int64_t speed, Error **errp) +{ + BackupBlockJob *s = container_of(job, BackupBlockJob, common); + + if (speed < 0) { + error_set(errp, QERR_INVALID_PARAMETER, "speed"); + return; + } + ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME); +} + +static void backup_iostatus_reset(BlockJob *job) +{ + BackupBlockJob *s = container_of(job, BackupBlockJob, common); + + bdrv_iostatus_reset(s->target); +} + +static const BlockJobType backup_job_type = { + .instance_size = sizeof(BackupBlockJob), + .job_type = "backup", + .set_speed = backup_set_speed, + .iostatus_reset = backup_iostatus_reset, +}; + +static BlockErrorAction backup_error_action(BackupBlockJob *job, + bool read, int error) +{ + if (read) { + return block_job_error_action(&job->common, job->common.bs, + job->on_source_error, true, error); + } else { + return block_job_error_action(&job->common, job->target, + job->on_target_error, false, error); + } +} + +static void coroutine_fn backup_run(void *opaque) +{ + BackupBlockJob *job = opaque; + BlockDriverState *bs = job->common.bs; + BlockDriverState *target = job->target; + BlockdevOnError on_target_error = job->on_target_error; + NotifierWithReturn before_write = { + .notify = backup_before_write_notify, + }; + int64_t start, end; + int ret = 0; + + QLIST_INIT(&job->inflight_reqs); + qemu_co_rwlock_init(&job->flush_rwlock); + + start = 0; + end = DIV_ROUND_UP(job->common.len / BDRV_SECTOR_SIZE, + BACKUP_SECTORS_PER_CLUSTER); + + job->bitmap = hbitmap_alloc(end, 0); + + bdrv_set_enable_write_cache(target, true); + bdrv_set_on_error(target, on_target_error, on_target_error); + bdrv_iostatus_enable(target); + + bdrv_add_before_write_notifier(bs, &before_write); + + for (; start < end; start++) { + bool error_is_read; + + if (block_job_is_cancelled(&job->common)) { + break; + } + + /* we need to yield so that qemu_aio_flush() returns. + * (without, VM does not reboot) + */ + if (job->common.speed) { + uint64_t delay_ns = ratelimit_calculate_delay( + &job->limit, job->sectors_read); + job->sectors_read = 0; + block_job_sleep_ns(&job->common, rt_clock, delay_ns); + } else { + block_job_sleep_ns(&job->common, rt_clock, 0); + } + + if (block_job_is_cancelled(&job->common)) { + break; + } + + ret = backup_do_cow(bs, start * BACKUP_SECTORS_PER_CLUSTER, + BACKUP_SECTORS_PER_CLUSTER, &error_is_read); + if (ret < 0) { + /* Depending on error action, fail now or retry cluster */ + BlockErrorAction action = + backup_error_action(job, error_is_read, -ret); + if (action == BDRV_ACTION_REPORT) { + break; + } else { + start--; + continue; + } + } + } + + notifier_with_return_remove(&before_write); + + /* wait until pending backup_do_cow() calls have completed */ + qemu_co_rwlock_wrlock(&job->flush_rwlock); + qemu_co_rwlock_unlock(&job->flush_rwlock); + + hbitmap_free(job->bitmap); + + bdrv_iostatus_disable(target); + bdrv_delete(target); + + block_job_completed(&job->common, ret); +} + +void backup_start(BlockDriverState *bs, BlockDriverState *target, + int64_t speed, + BlockdevOnError on_source_error, + BlockdevOnError on_target_error, + BlockDriverCompletionFunc *cb, void *opaque, + Error **errp) +{ + int64_t len; + + assert(bs); + assert(target); + assert(cb); + + if ((on_source_error == BLOCKDEV_ON_ERROR_STOP || + on_source_error == BLOCKDEV_ON_ERROR_ENOSPC) && + !bdrv_iostatus_is_enabled(bs)) { + error_set(errp, QERR_INVALID_PARAMETER, "on-source-error"); + return; + } + + len = bdrv_getlength(bs); + if (len < 0) { + error_setg_errno(errp, -len, "unable to get length for '%s'", + bdrv_get_device_name(bs)); + return; + } + + BackupBlockJob *job = block_job_create(&backup_job_type, bs, speed, + cb, opaque, errp); + if (!job) { + return; + } + + job->on_source_error = on_source_error; + job->on_target_error = on_target_error; + job->target = target; + job->common.len = len; + job->common.co = qemu_coroutine_create(backup_run); + qemu_coroutine_enter(job->common.co, job); +} diff --git a/include/block/block_int.h b/include/block/block_int.h index 2d009556b0..c6ac871e21 100644 --- a/include/block/block_int.h +++ b/include/block/block_int.h @@ -399,4 +399,23 @@ void mirror_start(BlockDriverState *bs, BlockDriverState *target, BlockDriverCompletionFunc *cb, void *opaque, Error **errp); +/* + * backup_start: + * @bs: Block device to operate on. + * @target: Block device to write to. + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @on_source_error: The action to take upon error reading from the source. + * @on_target_error: The action to take upon error writing to the target. + * @cb: Completion function for the job. + * @opaque: Opaque pointer value passed to @cb. + * + * Start a backup operation on @bs. Clusters in @bs are written to @target + * until the job is cancelled or manually completed. + */ +void backup_start(BlockDriverState *bs, BlockDriverState *target, + int64_t speed, BlockdevOnError on_source_error, + BlockdevOnError on_target_error, + BlockDriverCompletionFunc *cb, void *opaque, + Error **errp); + #endif /* BLOCK_INT_H */ diff --git a/trace-events b/trace-events index c5f1ccb96d..0acce7b350 100644 --- a/trace-events +++ b/trace-events @@ -92,6 +92,14 @@ mirror_yield_in_flight(void *s, int64_t sector_num, int in_flight) "s %p sector_ mirror_yield_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d" mirror_break_buf_busy(void *s, int nb_chunks, int in_flight) "s %p requested chunks %d in_flight %d" +# block/backup.c +backup_do_cow_enter(void *job, int64_t start, int64_t sector_num, int nb_sectors) "job %p start %"PRId64" sector_num %"PRId64" nb_sectors %d" +backup_do_cow_return(void *job, int64_t sector_num, int nb_sectors, int ret) "job %p sector_num %"PRId64" nb_sectors %d ret %d" +backup_do_cow_skip(void *job, int64_t start) "job %p start %"PRId64 +backup_do_cow_process(void *job, int64_t start) "job %p start %"PRId64 +backup_do_cow_read_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d" +backup_do_cow_write_fail(void *job, int64_t start, int ret) "job %p start %"PRId64" ret %d" + # blockdev.c qmp_block_job_cancel(void *job) "job %p" qmp_block_job_pause(void *job) "job %p" From cb78466ef60ccf707a6f38a1294c435b65a828e0 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 24 Jun 2013 17:13:12 +0200 Subject: [PATCH 05/20] blockdev: drop redundant proto_drv check It is not necessary to check that we can find a protocol block driver since we create or open the image file. This produces the error that we need anyway. Besides, the QERR_INVALID_BLOCK_FORMAT is inappropriate since the protocol is incorrect rather than the format. Also drop an empty line between bdrv_open() and checking its return value. This may be due to copy-pasting from earlier code that performed other operations before handling errors. Reported-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Wenchao Xia Reviewed-by: Kevin Wolf Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- blockdev.c | 15 --------------- 1 file changed, 15 deletions(-) diff --git a/blockdev.c b/blockdev.c index 459a87e8d0..c17b525154 100644 --- a/blockdev.c +++ b/blockdev.c @@ -818,7 +818,6 @@ typedef struct ExternalSnapshotStates { static void external_snapshot_prepare(BlkTransactionStates *common, Error **errp) { - BlockDriver *proto_drv; BlockDriver *drv; int flags, ret; Error *local_err = NULL; @@ -874,12 +873,6 @@ static void external_snapshot_prepare(BlkTransactionStates *common, flags = states->old_bs->open_flags; - proto_drv = bdrv_find_protocol(new_image_file); - if (!proto_drv) { - error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); - return; - } - /* create new image w/backing file */ if (mode != NEW_IMAGE_MODE_EXISTING) { bdrv_img_create(new_image_file, format, @@ -1375,7 +1368,6 @@ void qmp_drive_mirror(const char *device, const char *target, { BlockDriverState *bs; BlockDriverState *source, *target_bs; - BlockDriver *proto_drv; BlockDriver *drv = NULL; Error *local_err = NULL; int flags; @@ -1443,12 +1435,6 @@ void qmp_drive_mirror(const char *device, const char *target, sync = MIRROR_SYNC_MODE_FULL; } - proto_drv = bdrv_find_protocol(target); - if (!proto_drv) { - error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); - return; - } - bdrv_get_geometry(bs, &size); size *= 512; if (sync == MIRROR_SYNC_MODE_FULL && mode != NEW_IMAGE_MODE_EXISTING) { @@ -1483,7 +1469,6 @@ void qmp_drive_mirror(const char *device, const char *target, */ target_bs = bdrv_new(""); ret = bdrv_open(target_bs, target, NULL, flags | BDRV_O_NO_BACKING, drv); - if (ret < 0) { bdrv_delete(target_bs); error_setg_file_open(errp, -ret, target); From ac3c5d831aa0ff796659300e186be1a35862dbd3 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 24 Jun 2013 17:13:13 +0200 Subject: [PATCH 06/20] blockdev: use bdrv_getlength() in qmp_drive_mirror() Use bdrv_getlength() for its byte units and error return instead of bdrv_get_geometry(). Reported-by: Kevin Wolf Reviewed-by: Eric Blake Reviewed-by: Kevin Wolf Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- blockdev.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/blockdev.c b/blockdev.c index c17b525154..445f6e1d84 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1371,7 +1371,7 @@ void qmp_drive_mirror(const char *device, const char *target, BlockDriver *drv = NULL; Error *local_err = NULL; int flags; - uint64_t size; + int64_t size; int ret; if (!has_speed) { @@ -1435,8 +1435,12 @@ void qmp_drive_mirror(const char *device, const char *target, sync = MIRROR_SYNC_MODE_FULL; } - bdrv_get_geometry(bs, &size); - size *= 512; + size = bdrv_getlength(bs); + if (size < 0) { + error_setg_errno(errp, -size, "bdrv_getlength failed"); + return; + } + if (sync == MIRROR_SYNC_MODE_FULL && mode != NEW_IMAGE_MODE_EXISTING) { /* create new image w/o backing file */ assert(format && drv); From 99a9addf567e31244d934376060dd1d34f0f012c Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 24 Jun 2013 17:13:14 +0200 Subject: [PATCH 07/20] block: add drive-backup QMP command @drive-backup Start a point-in-time copy of a block device to a new destination. The status of ongoing drive-backup operations can be checked with query-block-jobs where the BlockJobInfo.type field has the value 'backup'. The operation can be stopped before it has completed using the block-job-cancel command. @device: the name of the device which should be copied. @target: the target of the new image. If the file exists, or if it is a device, the existing file/device will be used as the new destination. If it does not exist, a new file will be created. @format: #optional the format of the new destination, default is to probe if @mode is 'existing', else the format of the source @mode: #optional whether and how QEMU should create a new image, default is 'absolute-paths'. @speed: #optional the maximum speed, in bytes per second @on-source-error: #optional the action to take on an error on the source, default 'report'. 'stop' and 'enospc' can only be used if the block device supports io-status (see BlockInfo). @on-target-error: #optional the action to take on an error on the target, default 'report' (no limitations, since this applies to a different block device than @device). Note that @on-source-error and @on-target-error only affect background I/O. If an error occurs during a guest write request, the device's rerror/werror actions will be used. Returns: nothing on success If @device is not a valid block device, DeviceNotFound Since 1.6 Reviewed-by: Eric Blake Reviewed-by: Kevin Wolf Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- blockdev.c | 97 ++++++++++++++++++++++++++++++++++++++++++++++++ qapi-schema.json | 46 +++++++++++++++++++++++ qmp-commands.hx | 46 +++++++++++++++++++++++ 3 files changed, 189 insertions(+) diff --git a/blockdev.c b/blockdev.c index 445f6e1d84..c77b2a84c6 100644 --- a/blockdev.c +++ b/blockdev.c @@ -1353,6 +1353,103 @@ void qmp_block_commit(const char *device, drive_get_ref(drive_get_by_blockdev(bs)); } +void qmp_drive_backup(const char *device, const char *target, + bool has_format, const char *format, + bool has_mode, enum NewImageMode mode, + bool has_speed, int64_t speed, + bool has_on_source_error, BlockdevOnError on_source_error, + bool has_on_target_error, BlockdevOnError on_target_error, + Error **errp) +{ + BlockDriverState *bs; + BlockDriverState *target_bs; + BlockDriver *drv = NULL; + Error *local_err = NULL; + int flags; + int64_t size; + int ret; + + if (!has_speed) { + speed = 0; + } + if (!has_on_source_error) { + on_source_error = BLOCKDEV_ON_ERROR_REPORT; + } + if (!has_on_target_error) { + on_target_error = BLOCKDEV_ON_ERROR_REPORT; + } + if (!has_mode) { + mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; + } + + bs = bdrv_find(device); + if (!bs) { + error_set(errp, QERR_DEVICE_NOT_FOUND, device); + return; + } + + if (!bdrv_is_inserted(bs)) { + error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device); + return; + } + + if (!has_format) { + format = mode == NEW_IMAGE_MODE_EXISTING ? NULL : bs->drv->format_name; + } + if (format) { + drv = bdrv_find_format(format); + if (!drv) { + error_set(errp, QERR_INVALID_BLOCK_FORMAT, format); + return; + } + } + + if (bdrv_in_use(bs)) { + error_set(errp, QERR_DEVICE_IN_USE, device); + return; + } + + flags = bs->open_flags | BDRV_O_RDWR; + + size = bdrv_getlength(bs); + if (size < 0) { + error_setg_errno(errp, -size, "bdrv_getlength failed"); + return; + } + + if (mode != NEW_IMAGE_MODE_EXISTING) { + assert(format && drv); + bdrv_img_create(target, format, + NULL, NULL, NULL, size, flags, &local_err, false); + } + + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + return; + } + + target_bs = bdrv_new(""); + ret = bdrv_open(target_bs, target, NULL, flags, drv); + if (ret < 0) { + bdrv_delete(target_bs); + error_setg_file_open(errp, -ret, target); + return; + } + + backup_start(bs, target_bs, speed, on_source_error, on_target_error, + block_job_cb, bs, &local_err); + if (local_err != NULL) { + bdrv_delete(target_bs); + error_propagate(errp, local_err); + return; + } + + /* Grab a reference so hotplug does not delete the BlockDriverState from + * underneath us. + */ + drive_get_ref(drive_get_by_blockdev(bs)); +} + #define DEFAULT_MIRROR_BUF_SIZE (10 << 20) void qmp_drive_mirror(const char *device, const char *target, diff --git a/qapi-schema.json b/qapi-schema.json index a30a728dde..6fe7efc65a 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1743,6 +1743,52 @@ 'data': { 'device': 'str', '*base': 'str', 'top': 'str', '*speed': 'int' } } +## +# @drive-backup +# +# Start a point-in-time copy of a block device to a new destination. The +# status of ongoing drive-backup operations can be checked with +# query-block-jobs where the BlockJobInfo.type field has the value 'backup'. +# The operation can be stopped before it has completed using the +# block-job-cancel command. +# +# @device: the name of the device which should be copied. +# +# @target: the target of the new image. If the file exists, or if it +# is a device, the existing file/device will be used as the new +# destination. If it does not exist, a new file will be created. +# +# @format: #optional the format of the new destination, default is to +# probe if @mode is 'existing', else the format of the source +# +# @mode: #optional whether and how QEMU should create a new image, default is +# 'absolute-paths'. +# +# @speed: #optional the maximum speed, in bytes per second +# +# @on-source-error: #optional the action to take on an error on the source, +# default 'report'. 'stop' and 'enospc' can only be used +# if the block device supports io-status (see BlockInfo). +# +# @on-target-error: #optional the action to take on an error on the target, +# default 'report' (no limitations, since this applies to +# a different block device than @device). +# +# Note that @on-source-error and @on-target-error only affect background I/O. +# If an error occurs during a guest write request, the device's rerror/werror +# actions will be used. +# +# Returns: nothing on success +# If @device is not a valid block device, DeviceNotFound +# +# Since 1.6 +## +{ 'command': 'drive-backup', + 'data': { 'device': 'str', 'target': 'str', '*format': 'str', + '*mode': 'NewImageMode', '*speed': 'int', + '*on-source-error': 'BlockdevOnError', + '*on-target-error': 'BlockdevOnError' } } + ## # @drive-mirror # diff --git a/qmp-commands.hx b/qmp-commands.hx index 8cea5e554c..362f0e1ef8 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -911,6 +911,52 @@ EQMP .mhandler.cmd_new = qmp_marshal_input_block_commit, }, + { + .name = "drive-backup", + .args_type = "device:B,target:s,speed:i?,mode:s?,format:s?," + "on-source-error:s?,on-target-error:s?", + .mhandler.cmd_new = qmp_marshal_input_drive_backup, + }, + +SQMP +drive-backup +------------ + +Start a point-in-time copy of a block device to a new destination. The +status of ongoing drive-backup operations can be checked with +query-block-jobs where the BlockJobInfo.type field has the value 'backup'. +The operation can be stopped before it has completed using the +block-job-cancel command. + +Arguments: + +- "device": the name of the device which should be copied. + (json-string) +- "target": the target of the new image. If the file exists, or if it is a + device, the existing file/device will be used as the new + destination. If it does not exist, a new file will be created. + (json-string) +- "format": the format of the new destination, default is to probe if 'mode' is + 'existing', else the format of the source + (json-string, optional) +- "mode": whether and how QEMU should create a new image + (NewImageMode, optional, default 'absolute-paths') +- "speed": the maximum speed, in bytes per second (json-int, optional) +- "on-source-error": the action to take on an error on the source, default + 'report'. 'stop' and 'enospc' can only be used + if the block device supports io-status. + (BlockdevOnError, optional) +- "on-target-error": the action to take on an error on the target, default + 'report' (no limitations, since this applies to + a different block device than device). + (BlockdevOnError, optional) + +Example: +-> { "execute": "drive-backup", "arguments": { "device": "drive0", + "target": "backup.img" } } +<- { "return": {} } +EQMP + { .name = "block-job-set-speed", .args_type = "device:B,speed:o", From ba5d6ab68f7bc55520cddd5e00bd48b041c7aecd Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 24 Jun 2013 17:13:15 +0200 Subject: [PATCH 08/20] blockdev: rename BlkTransactionStates to singular The QMP 'transaction' command keeps a list of in-flight transactions. The transaction state structure is called BlkTransactionStates even though it only deals with a single transaction. The only plural thing is the linked list of transaction states. I find it confusing to call the single structure "States". This patch renames it to "State", just like BlockDriverState is singular. Reviewed-by: Eric Blake Reviewed-by: Kevin Wolf Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- blockdev.c | 104 ++++++++++++++++++++++++++--------------------------- 1 file changed, 52 insertions(+), 52 deletions(-) diff --git a/blockdev.c b/blockdev.c index c77b2a84c6..5312e6065f 100644 --- a/blockdev.c +++ b/blockdev.c @@ -780,7 +780,7 @@ void qmp_blockdev_snapshot_sync(const char *device, const char *snapshot_file, /* New and old BlockDriverState structs for group snapshots */ -typedef struct BlkTransactionStates BlkTransactionStates; +typedef struct BlkTransactionState BlkTransactionState; /* Only prepare() may fail. In a single transaction, only one of commit() or abort() will be called, clean() will always be called if it present. */ @@ -788,13 +788,13 @@ typedef struct BdrvActionOps { /* Size of state struct, in bytes. */ size_t instance_size; /* Prepare the work, must NOT be NULL. */ - void (*prepare)(BlkTransactionStates *common, Error **errp); + void (*prepare)(BlkTransactionState *common, Error **errp); /* Commit the changes, must NOT be NULL. */ - void (*commit)(BlkTransactionStates *common); + void (*commit)(BlkTransactionState *common); /* Abort the changes on fail, can be NULL. */ - void (*abort)(BlkTransactionStates *common); + void (*abort)(BlkTransactionState *common); /* Clean up resource in the end, can be NULL. */ - void (*clean)(BlkTransactionStates *common); + void (*clean)(BlkTransactionState *common); } BdrvActionOps; /* @@ -802,20 +802,20 @@ typedef struct BdrvActionOps { * that compiler will also arrange it to the same address with parent instance. * Later it will be used in free(). */ -struct BlkTransactionStates { +struct BlkTransactionState { TransactionAction *action; const BdrvActionOps *ops; - QSIMPLEQ_ENTRY(BlkTransactionStates) entry; + QSIMPLEQ_ENTRY(BlkTransactionState) entry; }; /* external snapshot private data */ -typedef struct ExternalSnapshotStates { - BlkTransactionStates common; +typedef struct ExternalSnapshotState { + BlkTransactionState common; BlockDriverState *old_bs; BlockDriverState *new_bs; -} ExternalSnapshotStates; +} ExternalSnapshotState; -static void external_snapshot_prepare(BlkTransactionStates *common, +static void external_snapshot_prepare(BlkTransactionState *common, Error **errp) { BlockDriver *drv; @@ -825,8 +825,8 @@ static void external_snapshot_prepare(BlkTransactionStates *common, const char *new_image_file; const char *format = "qcow2"; enum NewImageMode mode = NEW_IMAGE_MODE_ABSOLUTE_PATHS; - ExternalSnapshotStates *states = - DO_UPCAST(ExternalSnapshotStates, common, common); + ExternalSnapshotState *state = + DO_UPCAST(ExternalSnapshotState, common, common); TransactionAction *action = common->action; /* get parameters */ @@ -848,36 +848,36 @@ static void external_snapshot_prepare(BlkTransactionStates *common, return; } - states->old_bs = bdrv_find(device); - if (!states->old_bs) { + state->old_bs = bdrv_find(device); + if (!state->old_bs) { error_set(errp, QERR_DEVICE_NOT_FOUND, device); return; } - if (!bdrv_is_inserted(states->old_bs)) { + if (!bdrv_is_inserted(state->old_bs)) { error_set(errp, QERR_DEVICE_HAS_NO_MEDIUM, device); return; } - if (bdrv_in_use(states->old_bs)) { + if (bdrv_in_use(state->old_bs)) { error_set(errp, QERR_DEVICE_IN_USE, device); return; } - if (!bdrv_is_read_only(states->old_bs)) { - if (bdrv_flush(states->old_bs)) { + if (!bdrv_is_read_only(state->old_bs)) { + if (bdrv_flush(state->old_bs)) { error_set(errp, QERR_IO_ERROR); return; } } - flags = states->old_bs->open_flags; + flags = state->old_bs->open_flags; /* create new image w/backing file */ if (mode != NEW_IMAGE_MODE_EXISTING) { bdrv_img_create(new_image_file, format, - states->old_bs->filename, - states->old_bs->drv->format_name, + state->old_bs->filename, + state->old_bs->drv->format_name, NULL, -1, flags, &local_err, false); if (error_is_set(&local_err)) { error_propagate(errp, local_err); @@ -886,42 +886,42 @@ static void external_snapshot_prepare(BlkTransactionStates *common, } /* We will manually add the backing_hd field to the bs later */ - states->new_bs = bdrv_new(""); + state->new_bs = bdrv_new(""); /* TODO Inherit bs->options or only take explicit options with an * extended QMP command? */ - ret = bdrv_open(states->new_bs, new_image_file, NULL, + ret = bdrv_open(state->new_bs, new_image_file, NULL, flags | BDRV_O_NO_BACKING, drv); if (ret != 0) { error_setg_file_open(errp, -ret, new_image_file); } } -static void external_snapshot_commit(BlkTransactionStates *common) +static void external_snapshot_commit(BlkTransactionState *common) { - ExternalSnapshotStates *states = - DO_UPCAST(ExternalSnapshotStates, common, common); + ExternalSnapshotState *state = + DO_UPCAST(ExternalSnapshotState, common, common); - /* This removes our old bs from the bdrv_states, and adds the new bs */ - bdrv_append(states->new_bs, states->old_bs); + /* This removes our old bs and adds the new bs */ + bdrv_append(state->new_bs, state->old_bs); /* We don't need (or want) to use the transactional * bdrv_reopen_multiple() across all the entries at once, because we * don't want to abort all of them if one of them fails the reopen */ - bdrv_reopen(states->new_bs, states->new_bs->open_flags & ~BDRV_O_RDWR, + bdrv_reopen(state->new_bs, state->new_bs->open_flags & ~BDRV_O_RDWR, NULL); } -static void external_snapshot_abort(BlkTransactionStates *common) +static void external_snapshot_abort(BlkTransactionState *common) { - ExternalSnapshotStates *states = - DO_UPCAST(ExternalSnapshotStates, common, common); - if (states->new_bs) { - bdrv_delete(states->new_bs); + ExternalSnapshotState *state = + DO_UPCAST(ExternalSnapshotState, common, common); + if (state->new_bs) { + bdrv_delete(state->new_bs); } } static const BdrvActionOps actions[] = { [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC] = { - .instance_size = sizeof(ExternalSnapshotStates), + .instance_size = sizeof(ExternalSnapshotState), .prepare = external_snapshot_prepare, .commit = external_snapshot_commit, .abort = external_snapshot_abort, @@ -936,10 +936,10 @@ static const BdrvActionOps actions[] = { void qmp_transaction(TransactionActionList *dev_list, Error **errp) { TransactionActionList *dev_entry = dev_list; - BlkTransactionStates *states, *next; + BlkTransactionState *state, *next; Error *local_err = NULL; - QSIMPLEQ_HEAD(snap_bdrv_states, BlkTransactionStates) snap_bdrv_states; + QSIMPLEQ_HEAD(snap_bdrv_states, BlkTransactionState) snap_bdrv_states; QSIMPLEQ_INIT(&snap_bdrv_states); /* drain all i/o before any snapshots */ @@ -956,20 +956,20 @@ void qmp_transaction(TransactionActionList *dev_list, Error **errp) assert(dev_info->kind < ARRAY_SIZE(actions)); ops = &actions[dev_info->kind]; - states = g_malloc0(ops->instance_size); - states->ops = ops; - states->action = dev_info; - QSIMPLEQ_INSERT_TAIL(&snap_bdrv_states, states, entry); + state = g_malloc0(ops->instance_size); + state->ops = ops; + state->action = dev_info; + QSIMPLEQ_INSERT_TAIL(&snap_bdrv_states, state, entry); - states->ops->prepare(states, &local_err); + state->ops->prepare(state, &local_err); if (error_is_set(&local_err)) { error_propagate(errp, local_err); goto delete_and_fail; } } - QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) { - states->ops->commit(states); + QSIMPLEQ_FOREACH(state, &snap_bdrv_states, entry) { + state->ops->commit(state); } /* success */ @@ -980,17 +980,17 @@ delete_and_fail: * failure, and it is all-or-none; abandon each new bs, and keep using * the original bs for all images */ - QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) { - if (states->ops->abort) { - states->ops->abort(states); + QSIMPLEQ_FOREACH(state, &snap_bdrv_states, entry) { + if (state->ops->abort) { + state->ops->abort(state); } } exit: - QSIMPLEQ_FOREACH_SAFE(states, &snap_bdrv_states, entry, next) { - if (states->ops->clean) { - states->ops->clean(states); + QSIMPLEQ_FOREACH_SAFE(state, &snap_bdrv_states, entry, next) { + if (state->ops->clean) { + state->ops->clean(state); } - g_free(states); + g_free(state); } } From f9ea81e82519f44071b3dd617de98f0d6d6cca0a Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 24 Jun 2013 17:13:16 +0200 Subject: [PATCH 09/20] blockdev: allow BdrvActionOps->commit() to be NULL Some QMP 'transaction' types don't need to do anything on .commit(). Make .commit() optional just like .abort(). The "drive-backup" action will take advantage of this, it only needs to cancel the block job on .abort(). Other block job actions will probably follow the same pattern, so allow .commit() to be NULL. Suggested-by: Eric Blake Reviewed-by: Eric Blake Reviewed-by: Kevin Wolf Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- blockdev.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/blockdev.c b/blockdev.c index 5312e6065f..0cf80810f5 100644 --- a/blockdev.c +++ b/blockdev.c @@ -789,7 +789,7 @@ typedef struct BdrvActionOps { size_t instance_size; /* Prepare the work, must NOT be NULL. */ void (*prepare)(BlkTransactionState *common, Error **errp); - /* Commit the changes, must NOT be NULL. */ + /* Commit the changes, can be NULL. */ void (*commit)(BlkTransactionState *common); /* Abort the changes on fail, can be NULL. */ void (*abort)(BlkTransactionState *common); @@ -969,7 +969,9 @@ void qmp_transaction(TransactionActionList *dev_list, Error **errp) } QSIMPLEQ_FOREACH(state, &snap_bdrv_states, entry) { - state->ops->commit(state); + if (state->ops->commit) { + state->ops->commit(state); + } } /* success */ From 3037f36446eb3556c14757ac468463c3902f331b Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 24 Jun 2013 17:13:17 +0200 Subject: [PATCH 10/20] blockdev: add DriveBackup transaction This patch adds a transactional version of the drive-backup QMP command. It allows atomic snapshots of multiple drives along with automatic cleanup if there is a failure to start one of the backup jobs. Note that QMP events are emitted for block job completion/cancellation and the block job will be listed by query-block-jobs. @device: the name of the device whose writes should be mirrored. @target: the target of the new image. If the file exists, or if it is a device, the existing file/device will be used as the new destination. If it does not exist, a new file will be created. @format: #optional the format of the new destination, default is to probe if @mode is 'existing', else the format of the source @mode: #optional whether and how QEMU should create a new image, default is 'absolute-paths'. @speed: #optional the maximum speed, in bytes per second @on-source-error: #optional the action to take on an error on the source, default 'report'. 'stop' and 'enospc' can only be used if the block device supports io-status (see BlockInfo). @on-target-error: #optional the action to take on an error on the target, default 'report' (no limitations, since this applies to a different block device than @device). Reviewed-by: Eric Blake Reviewed-by: Kevin Wolf Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- blockdev.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++++ qapi-schema.json | 40 ++++++++++++++++++++++++++++++++++++++- 2 files changed, 88 insertions(+), 1 deletion(-) diff --git a/blockdev.c b/blockdev.c index 0cf80810f5..6200c0dc0a 100644 --- a/blockdev.c +++ b/blockdev.c @@ -919,6 +919,50 @@ static void external_snapshot_abort(BlkTransactionState *common) } } +typedef struct DriveBackupState { + BlkTransactionState common; + BlockDriverState *bs; + BlockJob *job; +} DriveBackupState; + +static void drive_backup_prepare(BlkTransactionState *common, Error **errp) +{ + DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); + DriveBackup *backup; + Error *local_err = NULL; + + assert(common->action->kind == TRANSACTION_ACTION_KIND_DRIVE_BACKUP); + backup = common->action->drive_backup; + + qmp_drive_backup(backup->device, backup->target, + backup->has_format, backup->format, + backup->has_mode, backup->mode, + backup->has_speed, backup->speed, + backup->has_on_source_error, backup->on_source_error, + backup->has_on_target_error, backup->on_target_error, + &local_err); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + state->bs = NULL; + state->job = NULL; + return; + } + + state->bs = bdrv_find(backup->device); + state->job = state->bs->job; +} + +static void drive_backup_abort(BlkTransactionState *common) +{ + DriveBackupState *state = DO_UPCAST(DriveBackupState, common, common); + BlockDriverState *bs = state->bs; + + /* Only cancel if it's the job we started */ + if (bs && bs->job && bs->job == state->job) { + block_job_cancel_sync(bs->job); + } +} + static const BdrvActionOps actions[] = { [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC] = { .instance_size = sizeof(ExternalSnapshotState), @@ -926,6 +970,11 @@ static const BdrvActionOps actions[] = { .commit = external_snapshot_commit, .abort = external_snapshot_abort, }, + [TRANSACTION_ACTION_KIND_DRIVE_BACKUP] = { + .instance_size = sizeof(DriveBackupState), + .prepare = drive_backup_prepare, + .abort = drive_backup_abort, + }, }; /* diff --git a/qapi-schema.json b/qapi-schema.json index 6fe7efc65a..714108dc12 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1622,6 +1622,43 @@ 'data': { 'device': 'str', 'snapshot-file': 'str', '*format': 'str', '*mode': 'NewImageMode' } } +## +# @DriveBackup +# +# @device: the name of the device which should be copied. +# +# @target: the target of the new image. If the file exists, or if it +# is a device, the existing file/device will be used as the new +# destination. If it does not exist, a new file will be created. +# +# @format: #optional the format of the new destination, default is to +# probe if @mode is 'existing', else the format of the source +# +# @mode: #optional whether and how QEMU should create a new image, default is +# 'absolute-paths'. +# +# @speed: #optional the maximum speed, in bytes per second +# +# @on-source-error: #optional the action to take on an error on the source, +# default 'report'. 'stop' and 'enospc' can only be used +# if the block device supports io-status (see BlockInfo). +# +# @on-target-error: #optional the action to take on an error on the target, +# default 'report' (no limitations, since this applies to +# a different block device than @device). +# +# Note that @on-source-error and @on-target-error only affect background I/O. +# If an error occurs during a guest write request, the device's rerror/werror +# actions will be used. +# +# Since: 1.6 +## +{ 'type': 'DriveBackup', + 'data': { 'device': 'str', 'target': 'str', '*format': 'str', + '*mode': 'NewImageMode', '*speed': 'int', + '*on-source-error': 'BlockdevOnError', + '*on-target-error': 'BlockdevOnError' } } + ## # @TransactionAction # @@ -1630,7 +1667,8 @@ ## { 'union': 'TransactionAction', 'data': { - 'blockdev-snapshot-sync': 'BlockdevSnapshot' + 'blockdev-snapshot-sync': 'BlockdevSnapshot', + 'drive-backup': 'DriveBackup' } } ## From 78b18b78aa89c28aecbd007ae1967c978d39bfd6 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 24 Jun 2013 17:13:18 +0200 Subject: [PATCH 11/20] blockdev: add Abort transaction The Abort action can be used to test QMP 'transaction' failure. Add it as the last action to exercise the .abort() and .cleanup() code paths for all previous actions. Reviewed-by: Kevin Wolf Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- blockdev.c | 15 +++++++++++++++ qapi-schema.json | 13 ++++++++++++- 2 files changed, 27 insertions(+), 1 deletion(-) diff --git a/blockdev.c b/blockdev.c index 6200c0dc0a..b3a57e0c1d 100644 --- a/blockdev.c +++ b/blockdev.c @@ -963,6 +963,16 @@ static void drive_backup_abort(BlkTransactionState *common) } } +static void abort_prepare(BlkTransactionState *common, Error **errp) +{ + error_setg(errp, "Transaction aborted using Abort action"); +} + +static void abort_commit(BlkTransactionState *common) +{ + assert(false); /* this action never succeeds */ +} + static const BdrvActionOps actions[] = { [TRANSACTION_ACTION_KIND_BLOCKDEV_SNAPSHOT_SYNC] = { .instance_size = sizeof(ExternalSnapshotState), @@ -975,6 +985,11 @@ static const BdrvActionOps actions[] = { .prepare = drive_backup_prepare, .abort = drive_backup_abort, }, + [TRANSACTION_ACTION_KIND_ABORT] = { + .instance_size = sizeof(BlkTransactionState), + .prepare = abort_prepare, + .commit = abort_commit, + }, }; /* diff --git a/qapi-schema.json b/qapi-schema.json index 714108dc12..6590307812 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1659,6 +1659,16 @@ '*on-source-error': 'BlockdevOnError', '*on-target-error': 'BlockdevOnError' } } +## +# @Abort +# +# This action can be used to test transaction failure. +# +# Since: 1.6 +### +{ 'type': 'Abort', + 'data': { } } + ## # @TransactionAction # @@ -1668,7 +1678,8 @@ { 'union': 'TransactionAction', 'data': { 'blockdev-snapshot-sync': 'BlockdevSnapshot', - 'drive-backup': 'DriveBackup' + 'drive-backup': 'DriveBackup', + 'abort': 'Abort' } } ## From 0dbe8a1b042b5eb22c6587dcc9884ebe8cedcbb6 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 24 Jun 2013 17:13:19 +0200 Subject: [PATCH 12/20] qemu-iotests: extract wait_until_completed() into iotests.py The 'drive-mirror' tests often issue 'block-job-complete' and wait for the QMP completion event. Other types of block jobs also want to wait for completion but they may not need to issue 'block-job-complete'. Extract wait_until_completed() from 041 and put it into iotests.py. Return the QMP event object so the caller can make additional assertions, if necessary. Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- tests/qemu-iotests/041 | 14 ++------------ tests/qemu-iotests/iotests.py | 15 +++++++++++++++ 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/tests/qemu-iotests/041 b/tests/qemu-iotests/041 index 1e923e70ca..6661c0395d 100755 --- a/tests/qemu-iotests/041 +++ b/tests/qemu-iotests/041 @@ -57,18 +57,8 @@ class ImageMirroringTestCase(iotests.QMPTestCase): result = self.vm.qmp('block-job-complete', device=drive) self.assert_qmp(result, 'return', {}) - completed = False - while not completed: - for event in self.vm.get_qmp_events(wait=True): - if event['event'] == 'BLOCK_JOB_COMPLETED': - self.assert_qmp(event, 'data/type', 'mirror') - self.assert_qmp(event, 'data/device', drive) - self.assert_qmp_absent(event, 'data/error') - self.assert_qmp(event, 'data/offset', self.image_len) - self.assert_qmp(event, 'data/len', self.image_len) - completed = True - - self.assert_no_active_block_jobs() + event = self.wait_until_completed() + self.assert_qmp(event, 'data/type', 'mirror') class TestSingleDrive(ImageMirroringTestCase): image_len = 1 * 1024 * 1024 # MB diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index 8a8f1814bd..b028a890e6 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -208,6 +208,21 @@ class QMPTestCase(unittest.TestCase): self.assert_no_active_block_jobs() return result + def wait_until_completed(self, drive='drive0'): + '''Wait for a block job to finish, returning the event''' + completed = False + while not completed: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_COMPLETED': + self.assert_qmp(event, 'data/device', drive) + self.assert_qmp_absent(event, 'data/error') + self.assert_qmp(event, 'data/offset', self.image_len) + self.assert_qmp(event, 'data/len', self.image_len) + completed = True + + self.assert_no_active_block_jobs() + return event + def notrun(reason): '''Skip this test suite''' # Each test in qemu-iotests has a number ("seq") From e5ca8fdd407431e281c6f303dc9f45f63a28048f Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Mon, 24 Jun 2013 17:13:20 +0200 Subject: [PATCH 13/20] qemu-iotests: add 055 drive-backup test case Testing drive-backup is similar to image streaming and drive mirroring. This test case is based on 041. Signed-off-by: Stefan Hajnoczi Signed-off-by: Kevin Wolf --- tests/qemu-iotests/055 | 282 +++++++++++++++++++++++++++++++++++++ tests/qemu-iotests/055.out | 5 + tests/qemu-iotests/group | 1 + 3 files changed, 288 insertions(+) create mode 100755 tests/qemu-iotests/055 create mode 100644 tests/qemu-iotests/055.out diff --git a/tests/qemu-iotests/055 b/tests/qemu-iotests/055 new file mode 100755 index 0000000000..887c959a3d --- /dev/null +++ b/tests/qemu-iotests/055 @@ -0,0 +1,282 @@ +#!/usr/bin/env python +# +# Tests for drive-backup +# +# Copyright (C) 2013 Red Hat, Inc. +# +# Based on 041. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# + +import time +import os +import iotests +from iotests import qemu_img, qemu_io + +test_img = os.path.join(iotests.test_dir, 'test.img') +target_img = os.path.join(iotests.test_dir, 'target.img') + +class TestSingleDrive(iotests.QMPTestCase): + image_len = 64 * 1024 * 1024 # MB + + def setUp(self): + # Write data to the image so we can compare later + qemu_img('create', '-f', iotests.imgfmt, test_img, str(TestSingleDrive.image_len)) + qemu_io('-c', 'write -P0x5d 0 64k', test_img) + qemu_io('-c', 'write -P0xd5 1M 32k', test_img) + qemu_io('-c', 'write -P0xdc 32M 124k', test_img) + qemu_io('-c', 'write -P0xdc 67043328 64k', test_img) + + self.vm = iotests.VM().add_drive(test_img) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(test_img) + try: + os.remove(target_img) + except OSError: + pass + + def test_cancel(self): + self.assert_no_active_block_jobs() + + result = self.vm.qmp('drive-backup', device='drive0', + target=target_img) + self.assert_qmp(result, 'return', {}) + + event = self.cancel_and_wait() + self.assert_qmp(event, 'data/type', 'backup') + + def test_pause(self): + self.assert_no_active_block_jobs() + + result = self.vm.qmp('drive-backup', device='drive0', + target=target_img) + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('block-job-pause', device='drive0') + self.assert_qmp(result, 'return', {}) + + time.sleep(1) + result = self.vm.qmp('query-block-jobs') + offset = self.dictpath(result, 'return[0]/offset') + + time.sleep(1) + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/offset', offset) + + result = self.vm.qmp('block-job-resume', device='drive0') + self.assert_qmp(result, 'return', {}) + + self.wait_until_completed() + + self.vm.shutdown() + self.assertTrue(iotests.compare_images(test_img, target_img), + 'target image does not match source after backup') + + def test_medium_not_found(self): + result = self.vm.qmp('drive-backup', device='ide1-cd0', + target=target_img) + self.assert_qmp(result, 'error/class', 'GenericError') + + def test_image_not_found(self): + result = self.vm.qmp('drive-backup', device='drive0', + mode='existing', target=target_img) + self.assert_qmp(result, 'error/class', 'GenericError') + + def test_device_not_found(self): + result = self.vm.qmp('drive-backup', device='nonexistent', + target=target_img) + self.assert_qmp(result, 'error/class', 'DeviceNotFound') + +class TestSetSpeed(iotests.QMPTestCase): + image_len = 80 * 1024 * 1024 # MB + + def setUp(self): + qemu_img('create', '-f', iotests.imgfmt, test_img, str(TestSetSpeed.image_len)) + self.vm = iotests.VM().add_drive(test_img) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(test_img) + os.remove(target_img) + + def test_set_speed(self): + self.assert_no_active_block_jobs() + + result = self.vm.qmp('drive-backup', device='drive0', + target=target_img) + self.assert_qmp(result, 'return', {}) + + # Default speed is 0 + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/device', 'drive0') + self.assert_qmp(result, 'return[0]/speed', 0) + + result = self.vm.qmp('block-job-set-speed', device='drive0', speed=8 * 1024 * 1024) + self.assert_qmp(result, 'return', {}) + + # Ensure the speed we set was accepted + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/device', 'drive0') + self.assert_qmp(result, 'return[0]/speed', 8 * 1024 * 1024) + + event = self.cancel_and_wait() + self.assert_qmp(event, 'data/type', 'backup') + + # Check setting speed in drive-backup works + result = self.vm.qmp('drive-backup', device='drive0', + target=target_img, speed=4*1024*1024) + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/device', 'drive0') + self.assert_qmp(result, 'return[0]/speed', 4 * 1024 * 1024) + + event = self.cancel_and_wait() + self.assert_qmp(event, 'data/type', 'backup') + + def test_set_speed_invalid(self): + self.assert_no_active_block_jobs() + + result = self.vm.qmp('drive-backup', device='drive0', + target=target_img, speed=-1) + self.assert_qmp(result, 'error/class', 'GenericError') + + self.assert_no_active_block_jobs() + + result = self.vm.qmp('drive-backup', device='drive0', + target=target_img) + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('block-job-set-speed', device='drive0', speed=-1) + self.assert_qmp(result, 'error/class', 'GenericError') + + event = self.cancel_and_wait() + self.assert_qmp(event, 'data/type', 'backup') + +class TestSingleTransaction(iotests.QMPTestCase): + image_len = 64 * 1024 * 1024 # MB + + def setUp(self): + qemu_img('create', '-f', iotests.imgfmt, test_img, str(TestSingleTransaction.image_len)) + qemu_io('-c', 'write -P0x5d 0 64k', test_img) + qemu_io('-c', 'write -P0xd5 1M 32k', test_img) + qemu_io('-c', 'write -P0xdc 32M 124k', test_img) + qemu_io('-c', 'write -P0xdc 67043328 64k', test_img) + + self.vm = iotests.VM().add_drive(test_img) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(test_img) + try: + os.remove(target_img) + except OSError: + pass + + def test_cancel(self): + self.assert_no_active_block_jobs() + + result = self.vm.qmp('transaction', actions=[{ + 'type': 'drive-backup', + 'data': { 'device': 'drive0', + 'target': target_img }, + } + ]) + self.assert_qmp(result, 'return', {}) + + event = self.cancel_and_wait() + self.assert_qmp(event, 'data/type', 'backup') + + def test_pause(self): + self.assert_no_active_block_jobs() + + result = self.vm.qmp('transaction', actions=[{ + 'type': 'drive-backup', + 'data': { 'device': 'drive0', + 'target': target_img }, + } + ]) + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('block-job-pause', device='drive0') + self.assert_qmp(result, 'return', {}) + + time.sleep(1) + result = self.vm.qmp('query-block-jobs') + offset = self.dictpath(result, 'return[0]/offset') + + time.sleep(1) + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/offset', offset) + + result = self.vm.qmp('block-job-resume', device='drive0') + self.assert_qmp(result, 'return', {}) + + self.wait_until_completed() + + self.vm.shutdown() + self.assertTrue(iotests.compare_images(test_img, target_img), + 'target image does not match source after backup') + + def test_medium_not_found(self): + result = self.vm.qmp('transaction', actions=[{ + 'type': 'drive-backup', + 'data': { 'device': 'ide1-cd0', + 'target': target_img }, + } + ]) + self.assert_qmp(result, 'error/class', 'GenericError') + + def test_image_not_found(self): + result = self.vm.qmp('transaction', actions=[{ + 'type': 'drive-backup', + 'data': { 'device': 'drive0', + 'mode': 'existing', + 'target': target_img }, + } + ]) + self.assert_qmp(result, 'error/class', 'GenericError') + + def test_device_not_found(self): + result = self.vm.qmp('transaction', actions=[{ + 'type': 'drive-backup', + 'data': { 'device': 'nonexistent', + 'mode': 'existing', + 'target': target_img }, + } + ]) + self.assert_qmp(result, 'error/class', 'DeviceNotFound') + + def test_abort(self): + result = self.vm.qmp('transaction', actions=[{ + 'type': 'drive-backup', + 'data': { 'device': 'nonexistent', + 'mode': 'existing', + 'target': target_img }, + }, { + 'type': 'Abort', + 'data': {}, + } + ]) + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_no_active_block_jobs() + +if __name__ == '__main__': + iotests.main(supported_fmts=['raw', 'qcow2']) diff --git a/tests/qemu-iotests/055.out b/tests/qemu-iotests/055.out new file mode 100644 index 0000000000..fa16b5ccef --- /dev/null +++ b/tests/qemu-iotests/055.out @@ -0,0 +1,5 @@ +............. +---------------------------------------------------------------------- +Ran 13 tests + +OK diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index f487a8fd93..fdc6ed14ca 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -61,3 +61,4 @@ 052 rw auto backing 053 rw auto 054 rw auto +055 rw auto From f59fee8d509b446df24843c1145a99b740492725 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Tue, 25 Jun 2013 15:13:43 +0200 Subject: [PATCH 14/20] block: Make BlockJobTypes const Signed-off-by: Kevin Wolf --- block/commit.c | 2 +- block/mirror.c | 2 +- block/stream.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/block/commit.c b/block/commit.c index 553447efe7..2227fc2e6c 100644 --- a/block/commit.c +++ b/block/commit.c @@ -173,7 +173,7 @@ static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp) ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME); } -static BlockJobType commit_job_type = { +static const BlockJobType commit_job_type = { .instance_size = sizeof(CommitBlockJob), .job_type = "commit", .set_speed = commit_set_speed, diff --git a/block/mirror.c b/block/mirror.c index 1ae724f705..bed4a7eadd 100644 --- a/block/mirror.c +++ b/block/mirror.c @@ -524,7 +524,7 @@ static void mirror_complete(BlockJob *job, Error **errp) block_job_resume(job); } -static BlockJobType mirror_job_type = { +static const BlockJobType mirror_job_type = { .instance_size = sizeof(MirrorBlockJob), .job_type = "mirror", .set_speed = mirror_set_speed, diff --git a/block/stream.c b/block/stream.c index d6df06f35a..7fe9e486bf 100644 --- a/block/stream.c +++ b/block/stream.c @@ -198,7 +198,7 @@ static void stream_set_speed(BlockJob *job, int64_t speed, Error **errp) ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME); } -static BlockJobType stream_job_type = { +static const BlockJobType stream_job_type = { .instance_size = sizeof(StreamBlockJob), .job_type = "stream", .set_speed = stream_set_speed, From 0b3f21e6a99c025c829d342ee417f317fe2e03b2 Mon Sep 17 00:00:00 2001 From: "Richard W.M. Jones" Date: Tue, 25 Jun 2013 18:15:18 +0100 Subject: [PATCH 15/20] block/ssh: Set bdrv_has_zero_init according to the file type. If the remote is a regular file, set it to true (ie. reads of uninitialized areas in a newly created file will return zeroes). If we can't prove that, return false (a safe default). Tested by adding a debugging print statement [not part of this commit] and creating a remote file and a remote block device: $ ./qemu-img create ssh://localhost/tmp/new 100M Formatting 'ssh://localhost/tmp/new', fmt=raw size=104857600 filename ssh://localhost/tmp/new: has_zero_init = 1 $ sudo lvcreate -L 1G -n tmp /dev/fedora Logical volume "tmp" created $ ./qemu-img create ssh://localhost/dev/fedora/tmp 1G Formatting 'ssh://localhost/dev/fedora/tmp', fmt=raw size=1073741824 filename ssh://localhost/dev/fedora/tmp: has_zero_init = 0 Cc: Kevin Wolf Cc: qemu-stable@nongnu.org Signed-off-by: Richard W.M. Jones Signed-off-by: Kevin Wolf --- block/ssh.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/block/ssh.c b/block/ssh.c index 246a70d274..d7e7bf8dd2 100644 --- a/block/ssh.c +++ b/block/ssh.c @@ -716,6 +716,21 @@ static void ssh_close(BlockDriverState *bs) ssh_state_free(s); } +static int ssh_has_zero_init(BlockDriverState *bs) +{ + BDRVSSHState *s = bs->opaque; + /* Assume false, unless we can positively prove it's true. */ + int has_zero_init = 0; + + if (s->attrs.flags & LIBSSH2_SFTP_ATTR_PERMISSIONS) { + if (s->attrs.permissions & LIBSSH2_SFTP_S_IFREG) { + has_zero_init = 1; + } + } + + return has_zero_init; +} + static void restart_coroutine(void *opaque) { Coroutine *co = opaque; @@ -1037,6 +1052,7 @@ static BlockDriver bdrv_ssh = { .bdrv_file_open = ssh_file_open, .bdrv_create = ssh_create, .bdrv_close = ssh_close, + .bdrv_has_zero_init = ssh_has_zero_init, .bdrv_co_readv = ssh_co_readv, .bdrv_co_writev = ssh_co_writev, .bdrv_getlength = ssh_getlength, From 8ab6feec2c7500faafd9a4571fb40d03dd360a64 Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Wed, 26 Jun 2013 09:41:57 +0200 Subject: [PATCH 16/20] gluster: Return bdrv_has_zero_init = 0 GlusterFS volumes can be backed by block devices, in which case bdrv_create() doesn't make sure that the image is zeroed out. It is currently not possibly to detect whether a given image is backed by a file or a block device, and incorrectly assuming that it is zeroed corrupts images during qemu-img convert, so let's err on the side of caution and always return 0. Cc: qemu-stable@nongnu.org Signed-off-by: Kevin Wolf --- block/gluster.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/block/gluster.c b/block/gluster.c index 91acde248c..61424bcb01 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -574,6 +574,12 @@ static void qemu_gluster_close(BlockDriverState *bs) glfs_fini(s->glfs); } +static int qemu_gluster_has_zero_init(BlockDriverState *bs) +{ + /* GlusterFS volume could be backed by a block device */ + return 0; +} + static QEMUOptionParameter qemu_gluster_create_options[] = { { .name = BLOCK_OPT_SIZE, @@ -595,6 +601,7 @@ static BlockDriver bdrv_gluster = { .bdrv_aio_readv = qemu_gluster_aio_readv, .bdrv_aio_writev = qemu_gluster_aio_writev, .bdrv_aio_flush = qemu_gluster_aio_flush, + .bdrv_has_zero_init = qemu_gluster_has_zero_init, .create_options = qemu_gluster_create_options, }; @@ -610,6 +617,7 @@ static BlockDriver bdrv_gluster_tcp = { .bdrv_aio_readv = qemu_gluster_aio_readv, .bdrv_aio_writev = qemu_gluster_aio_writev, .bdrv_aio_flush = qemu_gluster_aio_flush, + .bdrv_has_zero_init = qemu_gluster_has_zero_init, .create_options = qemu_gluster_create_options, }; @@ -625,6 +633,7 @@ static BlockDriver bdrv_gluster_unix = { .bdrv_aio_readv = qemu_gluster_aio_readv, .bdrv_aio_writev = qemu_gluster_aio_writev, .bdrv_aio_flush = qemu_gluster_aio_flush, + .bdrv_has_zero_init = qemu_gluster_has_zero_init, .create_options = qemu_gluster_create_options, }; @@ -640,6 +649,7 @@ static BlockDriver bdrv_gluster_rdma = { .bdrv_aio_readv = qemu_gluster_aio_readv, .bdrv_aio_writev = qemu_gluster_aio_writev, .bdrv_aio_flush = qemu_gluster_aio_flush, + .bdrv_has_zero_init = qemu_gluster_has_zero_init, .create_options = qemu_gluster_create_options, }; From 8ed610a1c983dd2ed1eed8841036af55751d115f Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Wed, 26 Jun 2013 17:24:32 +0800 Subject: [PATCH 17/20] vmdk: remove wrong calculation of relative path When creating image with backing file, the driver tries to calculate the relative path from created image file to backing file, but the path computation is incorrect. e.g.: $ qemu-img create -f vmdk -b vmdk-data-disk.vmdk vmdk-data-snapshot1 Formatting 'vmdk-data-snapshot1', fmt=vmdk size=10737418240 backing_file='vmdk-data-disk.vmdk' compat6=off zeroed_grain=off $ qemu-img info vmdk-data-snapshot1 image: vmdk-data-snapshot1 file format: vmdk virtual size: 10G (10737418240 bytes) disk size: 12K -> backing file: disk.vmdk The common part in file names, "vmdk-data-", is incorrectly forgotten by relative_path(). As the VMDK specification has no restriction on parentNameHint to be relative path, we simply remove this by using the backing_file option. Cc: qemu-stable@nongnu.org Signed-off-by: Fam Zheng Signed-off-by: Kevin Wolf --- block/vmdk.c | 44 +------------------------------------------- 1 file changed, 1 insertion(+), 43 deletions(-) diff --git a/block/vmdk.c b/block/vmdk.c index 975e1d41ff..a28fb5e8e9 100644 --- a/block/vmdk.c +++ b/block/vmdk.c @@ -1487,45 +1487,6 @@ static int filename_decompose(const char *filename, char *path, char *prefix, return VMDK_OK; } -static int relative_path(char *dest, int dest_size, - const char *base, const char *target) -{ - int i = 0; - int n = 0; - const char *p, *q; -#ifdef _WIN32 - const char *sep = "\\"; -#else - const char *sep = "/"; -#endif - - if (!(dest && base && target)) { - return VMDK_ERROR; - } - if (path_is_absolute(target)) { - pstrcpy(dest, dest_size, target); - return VMDK_OK; - } - while (base[i] == target[i]) { - i++; - } - p = &base[i]; - q = &target[i]; - while (*p) { - if (*p == *sep) { - n++; - } - p++; - } - dest[0] = '\0'; - for (; n; n--) { - pstrcat(dest, dest_size, ".."); - pstrcat(dest, dest_size, sep); - } - pstrcat(dest, dest_size, q); - return VMDK_OK; -} - static int vmdk_create(const char *filename, QEMUOptionParameter *options) { int fd, idx = 0; @@ -1625,7 +1586,6 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options) return -ENOTSUP; } if (backing_file) { - char parent_filename[PATH_MAX]; BlockDriverState *bs = bdrv_new(""); ret = bdrv_open(bs, backing_file, NULL, 0, NULL); if (ret != 0) { @@ -1638,10 +1598,8 @@ static int vmdk_create(const char *filename, QEMUOptionParameter *options) } parent_cid = vmdk_read_cid(bs, 0); bdrv_delete(bs); - relative_path(parent_filename, sizeof(parent_filename), - filename, backing_file); snprintf(parent_desc_line, sizeof(parent_desc_line), - "parentFileNameHint=\"%s\"", parent_filename); + "parentFileNameHint=\"%s\"", backing_file); } /* Create extents */ From 72c6cc94daa727f41ecfc2b2ff94aa6f0e459b7f Mon Sep 17 00:00:00 2001 From: Kevin Wolf Date: Fri, 28 Jun 2013 10:21:00 +0200 Subject: [PATCH 18/20] vpc: Implement .bdrv_has_zero_init Depending on the subformat, has_zero_init on VHD must behave like raw and query the underlying storage (fixed) or like other sparse formats that can always return 1 (dynamic, differencing). Signed-off-by: Kevin Wolf --- block/vpc.c | 25 +++++++++++++++++++------ 1 file changed, 19 insertions(+), 6 deletions(-) diff --git a/block/vpc.c b/block/vpc.c index 3cad52e54c..fe4f311d50 100644 --- a/block/vpc.c +++ b/block/vpc.c @@ -786,6 +786,18 @@ static int vpc_create(const char *filename, QEMUOptionParameter *options) return ret; } +static int vpc_has_zero_init(BlockDriverState *bs) +{ + BDRVVPCState *s = bs->opaque; + struct vhd_footer *footer = (struct vhd_footer *) s->footer_buf; + + if (cpu_to_be32(footer->type) == VHD_FIXED) { + return bdrv_has_zero_init(bs->file); + } else { + return 1; + } +} + static void vpc_close(BlockDriverState *bs) { BDRVVPCState *s = bs->opaque; @@ -818,16 +830,17 @@ static BlockDriver bdrv_vpc = { .format_name = "vpc", .instance_size = sizeof(BDRVVPCState), - .bdrv_probe = vpc_probe, - .bdrv_open = vpc_open, - .bdrv_close = vpc_close, - .bdrv_reopen_prepare = vpc_reopen_prepare, - .bdrv_create = vpc_create, + .bdrv_probe = vpc_probe, + .bdrv_open = vpc_open, + .bdrv_close = vpc_close, + .bdrv_reopen_prepare = vpc_reopen_prepare, + .bdrv_create = vpc_create, .bdrv_read = vpc_co_read, .bdrv_write = vpc_co_write, - .create_options = vpc_create_options, + .create_options = vpc_create_options, + .bdrv_has_zero_init = vpc_has_zero_init, }; static void bdrv_vpc_init(void) From 3ac216270a62418519c08e88c17005a8f1539cf2 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Fri, 28 Jun 2013 12:47:42 +0200 Subject: [PATCH 19/20] block: change default of .has_zero_init to 0 .has_zero_init defaults to 1 for all formats and protocols. this is a dangerous default since this means that all new added drivers need to manually overwrite it to 0 if they do not ensure that a device is zero initialized after bdrv_create(). if a driver needs to explicitly set this value to 1 its easier to verify the correctness in the review process. during review of the existing drivers it turned out that ssh and gluster had a wrong default of 1. both protocols support host_devices as backend which are not by default zero initialized. this wrong assumption will lead to possible corruption if qemu-img convert is used to write to such a backend. vpc and vmdk also defaulted to 1 altough they support fixed respectively flat extends. this has to be addresses in separate patches. both formats as well as the mentioned ssh and gluster are turned to the default of 0 with this patch for safety. a similar problem with the wrong default existed for iscsi most likely because the driver developer did oversee the default value of 1. Signed-off-by: Peter Lieven Signed-off-by: Kevin Wolf --- block.c | 8 +++++++- block/cow.c | 1 + block/qcow.c | 1 + block/qcow2.c | 1 + block/qed.c | 1 + block/raw-posix.c | 10 +--------- block/raw-win32.c | 7 +------ block/rbd.c | 1 + block/sheepdog.c | 1 + block/vdi.c | 1 + include/block/block.h | 1 + 11 files changed, 17 insertions(+), 16 deletions(-) diff --git a/block.c b/block.c index 0898f6b4b6..6c493ad457 100644 --- a/block.c +++ b/block.c @@ -2911,6 +2911,11 @@ void bdrv_flush_all(void) } } +int bdrv_has_zero_init_1(BlockDriverState *bs) +{ + return 1; +} + int bdrv_has_zero_init(BlockDriverState *bs) { assert(bs->drv); @@ -2919,7 +2924,8 @@ int bdrv_has_zero_init(BlockDriverState *bs) return bs->drv->bdrv_has_zero_init(bs); } - return 1; + /* safe default */ + return 0; } typedef struct BdrvCoIsAllocatedData { diff --git a/block/cow.c b/block/cow.c index 9f94599661..1cc2e89c7c 100644 --- a/block/cow.c +++ b/block/cow.c @@ -340,6 +340,7 @@ static BlockDriver bdrv_cow = { .bdrv_open = cow_open, .bdrv_close = cow_close, .bdrv_create = cow_create, + .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_read = cow_co_read, .bdrv_write = cow_co_write, diff --git a/block/qcow.c b/block/qcow.c index e2a64c79b1..5239bd68f1 100644 --- a/block/qcow.c +++ b/block/qcow.c @@ -892,6 +892,7 @@ static BlockDriver bdrv_qcow = { .bdrv_close = qcow_close, .bdrv_reopen_prepare = qcow_reopen_prepare, .bdrv_create = qcow_create, + .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_co_readv = qcow_co_readv, .bdrv_co_writev = qcow_co_writev, diff --git a/block/qcow2.c b/block/qcow2.c index 9383990193..0eceefe2cd 100644 --- a/block/qcow2.c +++ b/block/qcow2.c @@ -1785,6 +1785,7 @@ static BlockDriver bdrv_qcow2 = { .bdrv_close = qcow2_close, .bdrv_reopen_prepare = qcow2_reopen_prepare, .bdrv_create = qcow2_create, + .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_co_is_allocated = qcow2_co_is_allocated, .bdrv_set_key = qcow2_set_key, .bdrv_make_empty = qcow2_make_empty, diff --git a/block/qed.c b/block/qed.c index 4651403fef..f767b0528c 100644 --- a/block/qed.c +++ b/block/qed.c @@ -1574,6 +1574,7 @@ static BlockDriver bdrv_qed = { .bdrv_close = bdrv_qed_close, .bdrv_reopen_prepare = bdrv_qed_reopen_prepare, .bdrv_create = bdrv_qed_create, + .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_co_is_allocated = bdrv_qed_co_is_allocated, .bdrv_make_empty = bdrv_qed_make_empty, .bdrv_aio_readv = bdrv_qed_aio_readv, diff --git a/block/raw-posix.c b/block/raw-posix.c index 90ce9f86af..ba721d3f5b 100644 --- a/block/raw-posix.c +++ b/block/raw-posix.c @@ -1199,6 +1199,7 @@ static BlockDriver bdrv_file = { .bdrv_reopen_abort = raw_reopen_abort, .bdrv_close = raw_close, .bdrv_create = raw_create, + .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_co_is_allocated = raw_co_is_allocated, .bdrv_aio_readv = raw_aio_readv, @@ -1527,11 +1528,6 @@ static int hdev_create(const char *filename, QEMUOptionParameter *options) return ret; } -static int hdev_has_zero_init(BlockDriverState *bs) -{ - return 0; -} - static BlockDriver bdrv_host_device = { .format_name = "host_device", .protocol_name = "host_device", @@ -1544,7 +1540,6 @@ static BlockDriver bdrv_host_device = { .bdrv_reopen_abort = raw_reopen_abort, .bdrv_create = hdev_create, .create_options = raw_create_options, - .bdrv_has_zero_init = hdev_has_zero_init, .bdrv_aio_readv = raw_aio_readv, .bdrv_aio_writev = raw_aio_writev, @@ -1669,7 +1664,6 @@ static BlockDriver bdrv_host_floppy = { .bdrv_reopen_abort = raw_reopen_abort, .bdrv_create = hdev_create, .create_options = raw_create_options, - .bdrv_has_zero_init = hdev_has_zero_init, .bdrv_aio_readv = raw_aio_readv, .bdrv_aio_writev = raw_aio_writev, @@ -1771,7 +1765,6 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_reopen_abort = raw_reopen_abort, .bdrv_create = hdev_create, .create_options = raw_create_options, - .bdrv_has_zero_init = hdev_has_zero_init, .bdrv_aio_readv = raw_aio_readv, .bdrv_aio_writev = raw_aio_writev, @@ -1893,7 +1886,6 @@ static BlockDriver bdrv_host_cdrom = { .bdrv_reopen_abort = raw_reopen_abort, .bdrv_create = hdev_create, .create_options = raw_create_options, - .bdrv_has_zero_init = hdev_has_zero_init, .bdrv_aio_readv = raw_aio_readv, .bdrv_aio_writev = raw_aio_writev, diff --git a/block/raw-win32.c b/block/raw-win32.c index 7c03b6df52..9b5b2af4e8 100644 --- a/block/raw-win32.c +++ b/block/raw-win32.c @@ -459,6 +459,7 @@ static BlockDriver bdrv_file = { .bdrv_file_open = raw_open, .bdrv_close = raw_close, .bdrv_create = raw_create, + .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_aio_readv = raw_aio_readv, .bdrv_aio_writev = raw_aio_writev, @@ -570,11 +571,6 @@ static int hdev_open(BlockDriverState *bs, QDict *options, int flags) return 0; } -static int hdev_has_zero_init(BlockDriverState *bs) -{ - return 0; -} - static BlockDriver bdrv_host_device = { .format_name = "host_device", .protocol_name = "host_device", @@ -582,7 +578,6 @@ static BlockDriver bdrv_host_device = { .bdrv_probe_device = hdev_probe_device, .bdrv_file_open = hdev_open, .bdrv_close = raw_close, - .bdrv_has_zero_init = hdev_has_zero_init, .bdrv_aio_readv = raw_aio_readv, .bdrv_aio_writev = raw_aio_writev, diff --git a/block/rbd.c b/block/rbd.c index 0f2608b287..cb71751218 100644 --- a/block/rbd.c +++ b/block/rbd.c @@ -996,6 +996,7 @@ static BlockDriver bdrv_rbd = { .bdrv_file_open = qemu_rbd_open, .bdrv_close = qemu_rbd_close, .bdrv_create = qemu_rbd_create, + .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_get_info = qemu_rbd_getinfo, .create_options = qemu_rbd_create_options, .bdrv_getlength = qemu_rbd_getlength, diff --git a/block/sheepdog.c b/block/sheepdog.c index 1b7c3f19b0..b397b5b4d3 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -2401,6 +2401,7 @@ static BlockDriver bdrv_sheepdog_unix = { .bdrv_file_open = sd_open, .bdrv_close = sd_close, .bdrv_create = sd_create, + .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_getlength = sd_getlength, .bdrv_truncate = sd_truncate, diff --git a/block/vdi.c b/block/vdi.c index 2662d89af6..8a915257e8 100644 --- a/block/vdi.c +++ b/block/vdi.c @@ -779,6 +779,7 @@ static BlockDriver bdrv_vdi = { .bdrv_close = vdi_close, .bdrv_reopen_prepare = vdi_reopen_prepare, .bdrv_create = vdi_create, + .bdrv_has_zero_init = bdrv_has_zero_init_1, .bdrv_co_is_allocated = vdi_co_is_allocated, .bdrv_make_empty = vdi_make_empty, diff --git a/include/block/block.h b/include/block/block.h index 2307f67b0e..dd8eca1be1 100644 --- a/include/block/block.h +++ b/include/block/block.h @@ -272,6 +272,7 @@ void bdrv_drain_all(void); int bdrv_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors); int bdrv_co_discard(BlockDriverState *bs, int64_t sector_num, int nb_sectors); +int bdrv_has_zero_init_1(BlockDriverState *bs); int bdrv_has_zero_init(BlockDriverState *bs); int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum); From 721da65c6eba9c053d73744ecaa882b0f7cd634a Mon Sep 17 00:00:00 2001 From: Mark Cave-Ayland Date: Fri, 28 Jun 2013 14:43:16 +0100 Subject: [PATCH 20/20] cmd646: fix build when DEBUG_IDE is enabled. Make sure we use the correct TARGET/PRI macros in the debug statements. Signed-off-by: Mark Cave-Ayland CC: Kevin Wolf Signed-off-by: Kevin Wolf --- hw/ide/cmd646.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/ide/cmd646.c b/hw/ide/cmd646.c index a73eb9a5f7..99167011dd 100644 --- a/hw/ide/cmd646.c +++ b/hw/ide/cmd646.c @@ -154,7 +154,7 @@ static uint64_t bmdma_read(void *opaque, hwaddr addr, break; } #ifdef DEBUG_IDE - printf("bmdma: readb 0x%02x : 0x%02x\n", addr, val); + printf("bmdma: readb " TARGET_FMT_plx " : 0x%02x\n", addr, val); #endif return val; } @@ -170,7 +170,7 @@ static void bmdma_write(void *opaque, hwaddr addr, } #ifdef DEBUG_IDE - printf("bmdma: writeb 0x%02x : 0x%02x\n", addr, val); + printf("bmdma: writeb " TARGET_FMT_plx " : 0x%" PRIx64 "\n", addr, val); #endif switch(addr & 3) { case 0: