diff --git a/Makefile.objs b/Makefile.objs index 4412757309..b1f3e22547 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -42,7 +42,8 @@ coroutine-obj-$(CONFIG_WIN32) += coroutine-win32.o # block-obj-y is code used by both qemu system emulation and qemu-img block-obj-y = cutils.o iov.o cache-utils.o qemu-option.o module.o async.o -block-obj-y += nbd.o block.o aio.o aes.o qemu-config.o qemu-progress.o qemu-sockets.o +block-obj-y += nbd.o block.o blockjob.o aio.o aes.o qemu-config.o +block-obj-y += qemu-progress.o qemu-sockets.o uri.o block-obj-y += $(coroutine-obj-y) $(qobject-obj-y) $(version-obj-y) block-obj-$(CONFIG_POSIX) += posix-aio-compat.o block-obj-$(CONFIG_LINUX_AIO) += linux-aio.o @@ -59,7 +60,7 @@ endif # suppress *all* target specific code in case of system emulation, i.e. a # single QEMU executable should support all CPUs and machines. -common-obj-y = $(block-obj-y) blockdev.o +common-obj-y = $(block-obj-y) blockdev.o block/ common-obj-y += net.o net/ common-obj-y += qom/ common-obj-y += readline.o console.o cursor.o diff --git a/QMP/qmp-events.txt b/QMP/qmp-events.txt index 287805825f..987c5756b3 100644 --- a/QMP/qmp-events.txt +++ b/QMP/qmp-events.txt @@ -50,7 +50,8 @@ Emitted when a block job has been cancelled. Data: -- "type": Job type ("stream" for image streaming, json-string) +- "type": Job type (json-string; "stream" for image streaming + "commit" for block commit) - "device": Device name (json-string) - "len": Maximum progress value (json-int) - "offset": Current progress value (json-int) @@ -73,7 +74,8 @@ Emitted when a block job has completed. Data: -- "type": Job type ("stream" for image streaming, json-string) +- "type": Job type (json-string; "stream" for image streaming + "commit" for block commit) - "device": Device name (json-string) - "len": Maximum progress value (json-int) - "offset": Current progress value (json-int) @@ -94,6 +96,28 @@ Example: "speed": 0 }, "timestamp": { "seconds": 1267061043, "microseconds": 959568 } } +BLOCK_JOB_ERROR +--------------- + +Emitted when a block job encounters an error. + +Data: + +- "device": device name (json-string) +- "operation": I/O operation (json-string, "read" or "write") +- "action": action that has been taken, it's one of the following (json-string): + "ignore": error has been ignored, the job may fail later + "report": error will be reported and the job canceled + "stop": error caused job to be paused + +Example: + +{ "event": "BLOCK_JOB_ERROR", + "data": { "device": "ide0-hd1", + "operation": "write", + "action": "stop" }, + "timestamp": { "seconds": 1265044230, "microseconds": 450486 } } + DEVICE_TRAY_MOVED ----------------- diff --git a/aio.c b/aio.c index 0a9eb10c76..c738a4e15d 100644 --- a/aio.c +++ b/aio.c @@ -119,7 +119,7 @@ bool qemu_aio_wait(void) return true; } - walking_handlers = 1; + walking_handlers++; FD_ZERO(&rdfds); FD_ZERO(&wrfds); @@ -147,7 +147,7 @@ bool qemu_aio_wait(void) } } - walking_handlers = 0; + walking_handlers--; /* No AIO operations? Get us out of here */ if (!busy) { @@ -159,14 +159,14 @@ bool qemu_aio_wait(void) /* if we have any readable fds, dispatch event */ if (ret > 0) { - walking_handlers = 1; - /* we have to walk very carefully in case * qemu_aio_set_fd_handler is called while we're walking */ node = QLIST_FIRST(&aio_handlers); while (node) { AioHandler *tmp; + walking_handlers++; + if (!node->deleted && FD_ISSET(node->fd, &rdfds) && node->io_read) { @@ -181,13 +181,13 @@ bool qemu_aio_wait(void) tmp = node; node = QLIST_NEXT(node, node); - if (tmp->deleted) { + walking_handlers--; + + if (!walking_handlers && tmp->deleted) { QLIST_REMOVE(tmp, node); g_free(tmp); } } - - walking_handlers = 0; } return true; diff --git a/block-migration.c b/block-migration.c index 7def8ab197..ed933017f9 100644 --- a/block-migration.c +++ b/block-migration.c @@ -519,6 +519,8 @@ static void blk_mig_cleanup(void) BlkMigDevState *bmds; BlkMigBlock *blk; + bdrv_drain_all(); + set_dirty_tracking(0); while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) { diff --git a/block.c b/block.c index 751ebdc06b..c108a76952 100644 --- a/block.c +++ b/block.c @@ -26,8 +26,10 @@ #include "trace.h" #include "monitor.h" #include "block_int.h" +#include "blockjob.h" #include "module.h" #include "qjson.h" +#include "sysemu.h" #include "qemu-coroutine.h" #include "qmp-commands.h" #include "qemu-timer.h" @@ -1386,7 +1388,8 @@ void bdrv_set_dev_ops(BlockDriverState *bs, const BlockDevOps *ops, } void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv, - BlockQMPEventAction action, int is_read) + enum MonitorEvent ev, + BlockErrorAction action, bool is_read) { QObject *data; const char *action_str; @@ -1409,7 +1412,7 @@ void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv, bdrv->device_name, action_str, is_read ? "read" : "write"); - monitor_protocol_event(QEVENT_BLOCK_IO_ERROR, data); + monitor_protocol_event(ev, data); qobject_decref(data); } @@ -1724,6 +1727,149 @@ int bdrv_change_backing_file(BlockDriverState *bs, return ret; } +/* + * Finds the image layer in the chain that has 'bs' as its backing file. + * + * active is the current topmost image. + * + * Returns NULL if bs is not found in active's image chain, + * or if active == bs. + */ +BlockDriverState *bdrv_find_overlay(BlockDriverState *active, + BlockDriverState *bs) +{ + BlockDriverState *overlay = NULL; + BlockDriverState *intermediate; + + assert(active != NULL); + assert(bs != NULL); + + /* if bs is the same as active, then by definition it has no overlay + */ + if (active == bs) { + return NULL; + } + + intermediate = active; + while (intermediate->backing_hd) { + if (intermediate->backing_hd == bs) { + overlay = intermediate; + break; + } + intermediate = intermediate->backing_hd; + } + + return overlay; +} + +typedef struct BlkIntermediateStates { + BlockDriverState *bs; + QSIMPLEQ_ENTRY(BlkIntermediateStates) entry; +} BlkIntermediateStates; + + +/* + * Drops images above 'base' up to and including 'top', and sets the image + * above 'top' to have base as its backing file. + * + * Requires that the overlay to 'top' is opened r/w, so that the backing file + * information in 'bs' can be properly updated. + * + * E.g., this will convert the following chain: + * bottom <- base <- intermediate <- top <- active + * + * to + * + * bottom <- base <- active + * + * It is allowed for bottom==base, in which case it converts: + * + * base <- intermediate <- top <- active + * + * to + * + * base <- active + * + * Error conditions: + * if active == top, that is considered an error + * + */ +int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, + BlockDriverState *base) +{ + BlockDriverState *intermediate; + BlockDriverState *base_bs = NULL; + BlockDriverState *new_top_bs = NULL; + BlkIntermediateStates *intermediate_state, *next; + int ret = -EIO; + + QSIMPLEQ_HEAD(states_to_delete, BlkIntermediateStates) states_to_delete; + QSIMPLEQ_INIT(&states_to_delete); + + if (!top->drv || !base->drv) { + goto exit; + } + + new_top_bs = bdrv_find_overlay(active, top); + + if (new_top_bs == NULL) { + /* we could not find the image above 'top', this is an error */ + goto exit; + } + + /* special case of new_top_bs->backing_hd already pointing to base - nothing + * to do, no intermediate images */ + if (new_top_bs->backing_hd == base) { + ret = 0; + goto exit; + } + + intermediate = top; + + /* now we will go down through the list, and add each BDS we find + * into our deletion queue, until we hit the 'base' + */ + while (intermediate) { + intermediate_state = g_malloc0(sizeof(BlkIntermediateStates)); + intermediate_state->bs = intermediate; + QSIMPLEQ_INSERT_TAIL(&states_to_delete, intermediate_state, entry); + + if (intermediate->backing_hd == base) { + base_bs = intermediate->backing_hd; + break; + } + intermediate = intermediate->backing_hd; + } + if (base_bs == NULL) { + /* something went wrong, we did not end at the base. safely + * unravel everything, and exit with error */ + goto exit; + } + + /* success - we can delete the intermediate states, and link top->base */ + ret = bdrv_change_backing_file(new_top_bs, base_bs->filename, + base_bs->drv ? base_bs->drv->format_name : ""); + if (ret) { + goto exit; + } + new_top_bs->backing_hd = base_bs; + + + QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) { + /* so that bdrv_close() does not recursively close the chain */ + intermediate_state->bs->backing_hd = NULL; + bdrv_delete(intermediate_state->bs); + } + ret = 0; + +exit: + QSIMPLEQ_FOREACH_SAFE(intermediate_state, &states_to_delete, entry, next) { + g_free(intermediate_state); + } + return ret; +} + + static int bdrv_check_byte_request(BlockDriverState *bs, int64_t offset, size_t size) { @@ -2330,18 +2476,51 @@ void bdrv_set_io_limits(BlockDriverState *bs, bs->io_limits_enabled = bdrv_io_limits_enabled(bs); } -void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error, - BlockErrorAction on_write_error) +void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error, + BlockdevOnError on_write_error) { bs->on_read_error = on_read_error; bs->on_write_error = on_write_error; } -BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read) +BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read) { return is_read ? bs->on_read_error : bs->on_write_error; } +BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error) +{ + BlockdevOnError on_err = is_read ? bs->on_read_error : bs->on_write_error; + + switch (on_err) { + case BLOCKDEV_ON_ERROR_ENOSPC: + return (error == ENOSPC) ? BDRV_ACTION_STOP : BDRV_ACTION_REPORT; + case BLOCKDEV_ON_ERROR_STOP: + return BDRV_ACTION_STOP; + case BLOCKDEV_ON_ERROR_REPORT: + return BDRV_ACTION_REPORT; + case BLOCKDEV_ON_ERROR_IGNORE: + return BDRV_ACTION_IGNORE; + default: + abort(); + } +} + +/* This is done by device models because, while the block layer knows + * about the error, it does not know whether an operation comes from + * the device or the block layer (from a job, for example). + */ +void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action, + bool is_read, int error) +{ + assert(error >= 0); + bdrv_emit_qmp_error_event(bs, QEVENT_BLOCK_IO_ERROR, action, is_read); + if (action == BDRV_ACTION_STOP) { + vm_stop(RUN_STATE_IO_ERROR); + bdrv_iostatus_set_err(bs, error); + } +} + int bdrv_is_read_only(BlockDriverState *bs) { return bs->read_only; @@ -2974,6 +3153,22 @@ int bdrv_get_backing_file_depth(BlockDriverState *bs) return 1 + bdrv_get_backing_file_depth(bs->backing_hd); } +BlockDriverState *bdrv_find_base(BlockDriverState *bs) +{ + BlockDriverState *curr_bs = NULL; + + if (!bs) { + return NULL; + } + + curr_bs = bs; + + while (curr_bs->backing_hd) { + curr_bs = curr_bs->backing_hd; + } + return curr_bs; +} + #define NB_SUFFIXES 4 char *get_human_readable_size(char *buf, int buf_size, int64_t size) @@ -4049,9 +4244,9 @@ void bdrv_iostatus_enable(BlockDriverState *bs) bool bdrv_iostatus_is_enabled(const BlockDriverState *bs) { return (bs->iostatus_enabled && - (bs->on_write_error == BLOCK_ERR_STOP_ENOSPC || - bs->on_write_error == BLOCK_ERR_STOP_ANY || - bs->on_read_error == BLOCK_ERR_STOP_ANY)); + (bs->on_write_error == BLOCKDEV_ON_ERROR_ENOSPC || + bs->on_write_error == BLOCKDEV_ON_ERROR_STOP || + bs->on_read_error == BLOCKDEV_ON_ERROR_STOP)); } void bdrv_iostatus_disable(BlockDriverState *bs) @@ -4066,14 +4261,10 @@ void bdrv_iostatus_reset(BlockDriverState *bs) } } -/* XXX: Today this is set by device models because it makes the implementation - quite simple. However, the block layer knows about the error, so it's - possible to implement this without device models being involved */ void bdrv_iostatus_set_err(BlockDriverState *bs, int error) { - if (bdrv_iostatus_is_enabled(bs) && - bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { - assert(error >= 0); + assert(bdrv_iostatus_is_enabled(bs)); + if (bs->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { bs->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE : BLOCK_DEVICE_IO_STATUS_FAILED; } @@ -4247,130 +4438,3 @@ out: return ret; } - -void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs, - int64_t speed, BlockDriverCompletionFunc *cb, - void *opaque, Error **errp) -{ - BlockJob *job; - - if (bs->job || bdrv_in_use(bs)) { - error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs)); - return NULL; - } - bdrv_set_in_use(bs, 1); - - job = g_malloc0(job_type->instance_size); - job->job_type = job_type; - job->bs = bs; - job->cb = cb; - job->opaque = opaque; - job->busy = true; - bs->job = job; - - /* Only set speed when necessary to avoid NotSupported error */ - if (speed != 0) { - Error *local_err = NULL; - - block_job_set_speed(job, speed, &local_err); - if (error_is_set(&local_err)) { - bs->job = NULL; - g_free(job); - bdrv_set_in_use(bs, 0); - error_propagate(errp, local_err); - return NULL; - } - } - return job; -} - -void block_job_complete(BlockJob *job, int ret) -{ - BlockDriverState *bs = job->bs; - - assert(bs->job == job); - job->cb(job->opaque, ret); - bs->job = NULL; - g_free(job); - bdrv_set_in_use(bs, 0); -} - -void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) -{ - Error *local_err = NULL; - - if (!job->job_type->set_speed) { - error_set(errp, QERR_NOT_SUPPORTED); - return; - } - job->job_type->set_speed(job, speed, &local_err); - if (error_is_set(&local_err)) { - error_propagate(errp, local_err); - return; - } - - job->speed = speed; -} - -void block_job_cancel(BlockJob *job) -{ - job->cancelled = true; - if (job->co && !job->busy) { - qemu_coroutine_enter(job->co, NULL); - } -} - -bool block_job_is_cancelled(BlockJob *job) -{ - return job->cancelled; -} - -struct BlockCancelData { - BlockJob *job; - BlockDriverCompletionFunc *cb; - void *opaque; - bool cancelled; - int ret; -}; - -static void block_job_cancel_cb(void *opaque, int ret) -{ - struct BlockCancelData *data = opaque; - - data->cancelled = block_job_is_cancelled(data->job); - data->ret = ret; - data->cb(data->opaque, ret); -} - -int block_job_cancel_sync(BlockJob *job) -{ - struct BlockCancelData data; - BlockDriverState *bs = job->bs; - - assert(bs->job == job); - - /* Set up our own callback to store the result and chain to - * the original callback. - */ - data.job = job; - data.cb = job->cb; - data.opaque = job->opaque; - data.ret = -EINPROGRESS; - job->cb = block_job_cancel_cb; - job->opaque = &data; - block_job_cancel(job); - while (data.ret == -EINPROGRESS) { - qemu_aio_wait(); - } - return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret; -} - -void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns) -{ - /* Check cancellation *before* setting busy = false, too! */ - if (!block_job_is_cancelled(job)) { - job->busy = false; - co_sleep_ns(clock, ns); - job->busy = true; - } -} diff --git a/block.h b/block.h index b1095d8599..e2d89d7bc1 100644 --- a/block.h +++ b/block.h @@ -6,9 +6,11 @@ #include "qemu-option.h" #include "qemu-coroutine.h" #include "qobject.h" +#include "qapi-types.h" /* block.c */ typedef struct BlockDriver BlockDriver; +typedef struct BlockJob BlockJob; typedef struct BlockDriverInfo { /* in bytes, 0 if irrelevant */ @@ -88,14 +90,9 @@ typedef struct BlockDevOps { #define BDRV_SECTOR_SIZE (1ULL << BDRV_SECTOR_BITS) #define BDRV_SECTOR_MASK ~(BDRV_SECTOR_SIZE - 1) -typedef enum { - BLOCK_ERR_REPORT, BLOCK_ERR_IGNORE, BLOCK_ERR_STOP_ENOSPC, - BLOCK_ERR_STOP_ANY -} BlockErrorAction; - typedef enum { BDRV_ACTION_REPORT, BDRV_ACTION_IGNORE, BDRV_ACTION_STOP -} BlockQMPEventAction; +} BlockErrorAction; typedef QSIMPLEQ_HEAD(BlockReopenQueue, BlockReopenQueueEntry) BlockReopenQueue; @@ -111,8 +108,6 @@ void bdrv_iostatus_reset(BlockDriverState *bs); void bdrv_iostatus_disable(BlockDriverState *bs); bool bdrv_iostatus_is_enabled(const BlockDriverState *bs); void bdrv_iostatus_set_err(BlockDriverState *bs, int error); -void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv, - BlockQMPEventAction action, int is_read); void bdrv_info_print(Monitor *mon, const QObject *data); void bdrv_info(Monitor *mon, QObject **ret_data); void bdrv_stats_print(Monitor *mon, const QObject *data); @@ -203,6 +198,11 @@ int bdrv_commit_all(void); int bdrv_change_backing_file(BlockDriverState *bs, const char *backing_file, const char *backing_fmt); void bdrv_register(BlockDriver *bdrv); +int bdrv_drop_intermediate(BlockDriverState *active, BlockDriverState *top, + BlockDriverState *base); +BlockDriverState *bdrv_find_overlay(BlockDriverState *active, + BlockDriverState *bs); +BlockDriverState *bdrv_find_base(BlockDriverState *bs); typedef struct BdrvCheckResult { @@ -277,9 +277,12 @@ int bdrv_has_zero_init(BlockDriverState *bs); int bdrv_is_allocated(BlockDriverState *bs, int64_t sector_num, int nb_sectors, int *pnum); -void bdrv_set_on_error(BlockDriverState *bs, BlockErrorAction on_read_error, - BlockErrorAction on_write_error); -BlockErrorAction bdrv_get_on_error(BlockDriverState *bs, int is_read); +void bdrv_set_on_error(BlockDriverState *bs, BlockdevOnError on_read_error, + BlockdevOnError on_write_error); +BlockdevOnError bdrv_get_on_error(BlockDriverState *bs, bool is_read); +BlockErrorAction bdrv_get_error_action(BlockDriverState *bs, bool is_read, int error); +void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action, + bool is_read, int error); int bdrv_is_read_only(BlockDriverState *bs); int bdrv_is_sg(BlockDriverState *bs); int bdrv_enable_write_cache(BlockDriverState *bs); diff --git a/block/Makefile.objs b/block/Makefile.objs index b5754d39bf..554f429d05 100644 --- a/block/Makefile.objs +++ b/block/Makefile.objs @@ -3,9 +3,12 @@ block-obj-y += qcow2.o qcow2-refcount.o qcow2-cluster.o qcow2-snapshot.o qcow2-c block-obj-y += qed.o qed-gencb.o qed-l2-cache.o qed-table.o qed-cluster.o block-obj-y += qed-check.o block-obj-y += parallels.o nbd.o blkdebug.o sheepdog.o blkverify.o -block-obj-y += stream.o block-obj-$(CONFIG_WIN32) += raw-win32.o block-obj-$(CONFIG_POSIX) += raw-posix.o block-obj-$(CONFIG_LIBISCSI) += iscsi.o block-obj-$(CONFIG_CURL) += curl.o block-obj-$(CONFIG_RBD) += rbd.o +block-obj-$(CONFIG_GLUSTERFS) += gluster.o + +common-obj-y += stream.o +common-obj-y += commit.o diff --git a/block/blkdebug.c b/block/blkdebug.c index 59dcea0650..1206d5256b 100644 --- a/block/blkdebug.c +++ b/block/blkdebug.c @@ -28,6 +28,7 @@ typedef struct BDRVBlkdebugState { int state; + int new_state; QLIST_HEAD(, BlkdebugRule) rules[BLKDBG_EVENT_MAX]; QSIMPLEQ_HEAD(, BlkdebugRule) active_rules; } BDRVBlkdebugState; @@ -403,12 +404,12 @@ static void blkdebug_close(BlockDriverState *bs) } static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule, - int old_state, bool injected) + bool injected) { BDRVBlkdebugState *s = bs->opaque; /* Only process rules for the current state */ - if (rule->state && rule->state != old_state) { + if (rule->state && rule->state != s->state) { return injected; } @@ -423,7 +424,7 @@ static bool process_rule(BlockDriverState *bs, struct BlkdebugRule *rule, break; case ACTION_SET_STATE: - s->state = rule->options.set_state.new_state; + s->new_state = rule->options.set_state.new_state; break; } return injected; @@ -433,15 +434,16 @@ static void blkdebug_debug_event(BlockDriverState *bs, BlkDebugEvent event) { BDRVBlkdebugState *s = bs->opaque; struct BlkdebugRule *rule; - int old_state = s->state; bool injected; assert((int)event >= 0 && event < BLKDBG_EVENT_MAX); injected = false; + s->new_state = s->state; QLIST_FOREACH(rule, &s->rules[event], next) { - injected = process_rule(bs, rule, old_state, injected); + injected = process_rule(bs, rule, injected); } + s->state = s->new_state; } static int64_t blkdebug_getlength(BlockDriverState *bs) diff --git a/block/commit.c b/block/commit.c new file mode 100644 index 0000000000..733c91403c --- /dev/null +++ b/block/commit.c @@ -0,0 +1,268 @@ +/* + * Live block commit + * + * Copyright Red Hat, Inc. 2012 + * + * Authors: + * Jeff Cody + * Based on stream.c by Stefan Hajnoczi + * + * This work is licensed under the terms of the GNU LGPL, version 2 or later. + * See the COPYING.LIB file in the top-level directory. + * + */ + +#include "trace.h" +#include "block_int.h" +#include "blockjob.h" +#include "qemu/ratelimit.h" + +enum { + /* + * Size of data buffer for populating the image file. This should be large + * enough to process multiple clusters in a single call, so that populating + * contiguous regions of the image is efficient. + */ + COMMIT_BUFFER_SIZE = 512 * 1024, /* in bytes */ +}; + +#define SLICE_TIME 100000000ULL /* ns */ + +typedef struct CommitBlockJob { + BlockJob common; + RateLimit limit; + BlockDriverState *active; + BlockDriverState *top; + BlockDriverState *base; + BlockdevOnError on_error; + int base_flags; + int orig_overlay_flags; +} CommitBlockJob; + +static int coroutine_fn commit_populate(BlockDriverState *bs, + BlockDriverState *base, + int64_t sector_num, int nb_sectors, + void *buf) +{ + int ret = 0; + + ret = bdrv_read(bs, sector_num, buf, nb_sectors); + if (ret) { + return ret; + } + + ret = bdrv_write(base, sector_num, buf, nb_sectors); + if (ret) { + return ret; + } + + return 0; +} + +static void coroutine_fn commit_run(void *opaque) +{ + CommitBlockJob *s = opaque; + BlockDriverState *active = s->active; + BlockDriverState *top = s->top; + BlockDriverState *base = s->base; + BlockDriverState *overlay_bs = NULL; + int64_t sector_num, end; + int ret = 0; + int n = 0; + void *buf; + int bytes_written = 0; + int64_t base_len; + + ret = s->common.len = bdrv_getlength(top); + + + if (s->common.len < 0) { + goto exit_restore_reopen; + } + + ret = base_len = bdrv_getlength(base); + if (base_len < 0) { + goto exit_restore_reopen; + } + + if (base_len < s->common.len) { + ret = bdrv_truncate(base, s->common.len); + if (ret) { + goto exit_restore_reopen; + } + } + + overlay_bs = bdrv_find_overlay(active, top); + + end = s->common.len >> BDRV_SECTOR_BITS; + buf = qemu_blockalign(top, COMMIT_BUFFER_SIZE); + + for (sector_num = 0; sector_num < end; sector_num += n) { + uint64_t delay_ns = 0; + bool copy; + +wait: + /* Note that even when no rate limit is applied we need to yield + * with no pending I/O here so that qemu_aio_flush() returns. + */ + block_job_sleep_ns(&s->common, rt_clock, delay_ns); + if (block_job_is_cancelled(&s->common)) { + break; + } + /* Copy if allocated above the base */ + ret = bdrv_co_is_allocated_above(top, base, sector_num, + COMMIT_BUFFER_SIZE / BDRV_SECTOR_SIZE, + &n); + copy = (ret == 1); + trace_commit_one_iteration(s, sector_num, n, ret); + if (copy) { + if (s->common.speed) { + delay_ns = ratelimit_calculate_delay(&s->limit, n); + if (delay_ns > 0) { + goto wait; + } + } + ret = commit_populate(top, base, sector_num, n, buf); + bytes_written += n * BDRV_SECTOR_SIZE; + } + if (ret < 0) { + if (s->on_error == BLOCKDEV_ON_ERROR_STOP || + s->on_error == BLOCKDEV_ON_ERROR_REPORT|| + (s->on_error == BLOCKDEV_ON_ERROR_ENOSPC && ret == -ENOSPC)) { + goto exit_free_buf; + } else { + n = 0; + continue; + } + } + /* Publish progress */ + s->common.offset += n * BDRV_SECTOR_SIZE; + } + + ret = 0; + + if (!block_job_is_cancelled(&s->common) && sector_num == end) { + /* success */ + ret = bdrv_drop_intermediate(active, top, base); + } + +exit_free_buf: + qemu_vfree(buf); + +exit_restore_reopen: + /* restore base open flags here if appropriate (e.g., change the base back + * to r/o). These reopens do not need to be atomic, since we won't abort + * even on failure here */ + if (s->base_flags != bdrv_get_flags(base)) { + bdrv_reopen(base, s->base_flags, NULL); + } + if (s->orig_overlay_flags != bdrv_get_flags(overlay_bs)) { + bdrv_reopen(overlay_bs, s->orig_overlay_flags, NULL); + } + + block_job_complete(&s->common, ret); +} + +static void commit_set_speed(BlockJob *job, int64_t speed, Error **errp) +{ + CommitBlockJob *s = container_of(job, CommitBlockJob, common); + + if (speed < 0) { + error_set(errp, QERR_INVALID_PARAMETER, "speed"); + return; + } + ratelimit_set_speed(&s->limit, speed / BDRV_SECTOR_SIZE, SLICE_TIME); +} + +static BlockJobType commit_job_type = { + .instance_size = sizeof(CommitBlockJob), + .job_type = "commit", + .set_speed = commit_set_speed, +}; + +void commit_start(BlockDriverState *bs, BlockDriverState *base, + BlockDriverState *top, int64_t speed, + BlockdevOnError on_error, BlockDriverCompletionFunc *cb, + void *opaque, Error **errp) +{ + CommitBlockJob *s; + BlockReopenQueue *reopen_queue = NULL; + int orig_overlay_flags; + int orig_base_flags; + BlockDriverState *overlay_bs; + Error *local_err = NULL; + + if ((on_error == BLOCKDEV_ON_ERROR_STOP || + on_error == BLOCKDEV_ON_ERROR_ENOSPC) && + !bdrv_iostatus_is_enabled(bs)) { + error_set(errp, QERR_INVALID_PARAMETER_COMBINATION); + return; + } + + /* Once we support top == active layer, remove this check */ + if (top == bs) { + error_setg(errp, + "Top image as the active layer is currently unsupported"); + return; + } + + if (top == base) { + error_setg(errp, "Invalid files for merge: top and base are the same"); + return; + } + + /* top and base may be valid, but let's make sure that base is reachable + * from top */ + if (bdrv_find_backing_image(top, base->filename) != base) { + error_setg(errp, + "Base (%s) is not reachable from top (%s)", + base->filename, top->filename); + return; + } + + overlay_bs = bdrv_find_overlay(bs, top); + + if (overlay_bs == NULL) { + error_setg(errp, "Could not find overlay image for %s:", top->filename); + return; + } + + orig_base_flags = bdrv_get_flags(base); + orig_overlay_flags = bdrv_get_flags(overlay_bs); + + /* convert base & overlay_bs to r/w, if necessary */ + if (!(orig_base_flags & BDRV_O_RDWR)) { + reopen_queue = bdrv_reopen_queue(reopen_queue, base, + orig_base_flags | BDRV_O_RDWR); + } + if (!(orig_overlay_flags & BDRV_O_RDWR)) { + reopen_queue = bdrv_reopen_queue(reopen_queue, overlay_bs, + orig_overlay_flags | BDRV_O_RDWR); + } + if (reopen_queue) { + bdrv_reopen_multiple(reopen_queue, &local_err); + if (local_err != NULL) { + error_propagate(errp, local_err); + return; + } + } + + + s = block_job_create(&commit_job_type, bs, speed, cb, opaque, errp); + if (!s) { + return; + } + + s->base = base; + s->top = top; + s->active = bs; + + s->base_flags = orig_base_flags; + s->orig_overlay_flags = orig_overlay_flags; + + s->on_error = on_error; + s->common.co = qemu_coroutine_create(commit_run); + + trace_commit_start(bs, base, top, s, s->common.co, opaque); + qemu_coroutine_enter(s->common.co, s); +} diff --git a/block/gluster.c b/block/gluster.c new file mode 100644 index 0000000000..3588d7377f --- /dev/null +++ b/block/gluster.c @@ -0,0 +1,624 @@ +/* + * GlusterFS backend for QEMU + * + * Copyright (C) 2012 Bharata B Rao + * + * Pipe handling mechanism in AIO implementation is derived from + * block/rbd.c. Hence, + * + * Copyright (C) 2010-2011 Christian Brunner , + * Josh Durgin + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + * Contributions after 2012-01-13 are licensed under the terms of the + * GNU GPL, version 2 or (at your option) any later version. + */ +#include +#include "block_int.h" +#include "qemu_socket.h" +#include "uri.h" + +typedef struct GlusterAIOCB { + BlockDriverAIOCB common; + int64_t size; + int ret; + bool *finished; + QEMUBH *bh; +} GlusterAIOCB; + +typedef struct BDRVGlusterState { + struct glfs *glfs; + int fds[2]; + struct glfs_fd *fd; + int qemu_aio_count; + int event_reader_pos; + GlusterAIOCB *event_acb; +} BDRVGlusterState; + +#define GLUSTER_FD_READ 0 +#define GLUSTER_FD_WRITE 1 + +typedef struct GlusterConf { + char *server; + int port; + char *volname; + char *image; + char *transport; +} GlusterConf; + +static void qemu_gluster_gconf_free(GlusterConf *gconf) +{ + g_free(gconf->server); + g_free(gconf->volname); + g_free(gconf->image); + g_free(gconf->transport); + g_free(gconf); +} + +static int parse_volume_options(GlusterConf *gconf, char *path) +{ + char *p, *q; + + if (!path) { + return -EINVAL; + } + + /* volume */ + p = q = path + strspn(path, "/"); + p += strcspn(p, "/"); + if (*p == '\0') { + return -EINVAL; + } + gconf->volname = g_strndup(q, p - q); + + /* image */ + p += strspn(p, "/"); + if (*p == '\0') { + return -EINVAL; + } + gconf->image = g_strdup(p); + return 0; +} + +/* + * file=gluster[+transport]://[server[:port]]/volname/image[?socket=...] + * + * 'gluster' is the protocol. + * + * 'transport' specifies the transport type used to connect to gluster + * management daemon (glusterd). Valid transport types are + * tcp, unix and rdma. If a transport type isn't specified, then tcp + * type is assumed. + * + * 'server' specifies the server where the volume file specification for + * the given volume resides. This can be either hostname, ipv4 address + * or ipv6 address. ipv6 address needs to be within square brackets [ ]. + * If transport type is 'unix', then 'server' field should not be specifed. + * The 'socket' field needs to be populated with the path to unix domain + * socket. + * + * 'port' is the port number on which glusterd is listening. This is optional + * and if not specified, QEMU will send 0 which will make gluster to use the + * default port. If the transport type is unix, then 'port' should not be + * specified. + * + * 'volname' is the name of the gluster volume which contains the VM image. + * + * 'image' is the path to the actual VM image that resides on gluster volume. + * + * Examples: + * + * file=gluster://1.2.3.4/testvol/a.img + * file=gluster+tcp://1.2.3.4/testvol/a.img + * file=gluster+tcp://1.2.3.4:24007/testvol/dir/a.img + * file=gluster+tcp://[1:2:3:4:5:6:7:8]/testvol/dir/a.img + * file=gluster+tcp://[1:2:3:4:5:6:7:8]:24007/testvol/dir/a.img + * file=gluster+tcp://server.domain.com:24007/testvol/dir/a.img + * file=gluster+unix:///testvol/dir/a.img?socket=/tmp/glusterd.socket + * file=gluster+rdma://1.2.3.4:24007/testvol/a.img + */ +static int qemu_gluster_parseuri(GlusterConf *gconf, const char *filename) +{ + URI *uri; + QueryParams *qp = NULL; + bool is_unix = false; + int ret = 0; + + uri = uri_parse(filename); + if (!uri) { + return -EINVAL; + } + + /* transport */ + if (!strcmp(uri->scheme, "gluster")) { + gconf->transport = g_strdup("tcp"); + } else if (!strcmp(uri->scheme, "gluster+tcp")) { + gconf->transport = g_strdup("tcp"); + } else if (!strcmp(uri->scheme, "gluster+unix")) { + gconf->transport = g_strdup("unix"); + is_unix = true; + } else if (!strcmp(uri->scheme, "gluster+rdma")) { + gconf->transport = g_strdup("rdma"); + } else { + ret = -EINVAL; + goto out; + } + + ret = parse_volume_options(gconf, uri->path); + if (ret < 0) { + goto out; + } + + qp = query_params_parse(uri->query); + if (qp->n > 1 || (is_unix && !qp->n) || (!is_unix && qp->n)) { + ret = -EINVAL; + goto out; + } + + if (is_unix) { + if (uri->server || uri->port) { + ret = -EINVAL; + goto out; + } + if (strcmp(qp->p[0].name, "socket")) { + ret = -EINVAL; + goto out; + } + gconf->server = g_strdup(qp->p[0].value); + } else { + gconf->server = g_strdup(uri->server); + gconf->port = uri->port; + } + +out: + if (qp) { + query_params_free(qp); + } + uri_free(uri); + return ret; +} + +static struct glfs *qemu_gluster_init(GlusterConf *gconf, const char *filename) +{ + struct glfs *glfs = NULL; + int ret; + int old_errno; + + ret = qemu_gluster_parseuri(gconf, filename); + if (ret < 0) { + error_report("Usage: file=gluster[+transport]://[server[:port]]/" + "volname/image[?socket=...]"); + errno = -ret; + goto out; + } + + glfs = glfs_new(gconf->volname); + if (!glfs) { + goto out; + } + + ret = glfs_set_volfile_server(glfs, gconf->transport, gconf->server, + gconf->port); + if (ret < 0) { + goto out; + } + + /* + * TODO: Use GF_LOG_ERROR instead of hard code value of 4 here when + * GlusterFS makes GF_LOG_* macros available to libgfapi users. + */ + ret = glfs_set_logging(glfs, "-", 4); + if (ret < 0) { + goto out; + } + + ret = glfs_init(glfs); + if (ret) { + error_report("Gluster connection failed for server=%s port=%d " + "volume=%s image=%s transport=%s\n", gconf->server, gconf->port, + gconf->volname, gconf->image, gconf->transport); + goto out; + } + return glfs; + +out: + if (glfs) { + old_errno = errno; + glfs_fini(glfs); + errno = old_errno; + } + return NULL; +} + +static void qemu_gluster_complete_aio(GlusterAIOCB *acb, BDRVGlusterState *s) +{ + int ret; + bool *finished = acb->finished; + BlockDriverCompletionFunc *cb = acb->common.cb; + void *opaque = acb->common.opaque; + + if (!acb->ret || acb->ret == acb->size) { + ret = 0; /* Success */ + } else if (acb->ret < 0) { + ret = acb->ret; /* Read/Write failed */ + } else { + ret = -EIO; /* Partial read/write - fail it */ + } + + s->qemu_aio_count--; + qemu_aio_release(acb); + cb(opaque, ret); + if (finished) { + *finished = true; + } +} + +static void qemu_gluster_aio_event_reader(void *opaque) +{ + BDRVGlusterState *s = opaque; + ssize_t ret; + + do { + char *p = (char *)&s->event_acb; + + ret = read(s->fds[GLUSTER_FD_READ], p + s->event_reader_pos, + sizeof(s->event_acb) - s->event_reader_pos); + if (ret > 0) { + s->event_reader_pos += ret; + if (s->event_reader_pos == sizeof(s->event_acb)) { + s->event_reader_pos = 0; + qemu_gluster_complete_aio(s->event_acb, s); + } + } + } while (ret < 0 && errno == EINTR); +} + +static int qemu_gluster_aio_flush_cb(void *opaque) +{ + BDRVGlusterState *s = opaque; + + return (s->qemu_aio_count > 0); +} + +static int qemu_gluster_open(BlockDriverState *bs, const char *filename, + int bdrv_flags) +{ + BDRVGlusterState *s = bs->opaque; + int open_flags = O_BINARY; + int ret = 0; + GlusterConf *gconf = g_malloc0(sizeof(GlusterConf)); + + s->glfs = qemu_gluster_init(gconf, filename); + if (!s->glfs) { + ret = -errno; + goto out; + } + + if (bdrv_flags & BDRV_O_RDWR) { + open_flags |= O_RDWR; + } else { + open_flags |= O_RDONLY; + } + + if ((bdrv_flags & BDRV_O_NOCACHE)) { + open_flags |= O_DIRECT; + } + + s->fd = glfs_open(s->glfs, gconf->image, open_flags); + if (!s->fd) { + ret = -errno; + goto out; + } + + ret = qemu_pipe(s->fds); + if (ret < 0) { + ret = -errno; + goto out; + } + fcntl(s->fds[GLUSTER_FD_READ], F_SETFL, O_NONBLOCK); + qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], + qemu_gluster_aio_event_reader, NULL, qemu_gluster_aio_flush_cb, s); + +out: + qemu_gluster_gconf_free(gconf); + if (!ret) { + return ret; + } + if (s->fd) { + glfs_close(s->fd); + } + if (s->glfs) { + glfs_fini(s->glfs); + } + return ret; +} + +static int qemu_gluster_create(const char *filename, + QEMUOptionParameter *options) +{ + struct glfs *glfs; + struct glfs_fd *fd; + int ret = 0; + int64_t total_size = 0; + GlusterConf *gconf = g_malloc0(sizeof(GlusterConf)); + + glfs = qemu_gluster_init(gconf, filename); + if (!glfs) { + ret = -errno; + goto out; + } + + while (options && options->name) { + if (!strcmp(options->name, BLOCK_OPT_SIZE)) { + total_size = options->value.n / BDRV_SECTOR_SIZE; + } + options++; + } + + fd = glfs_creat(glfs, gconf->image, + O_WRONLY | O_CREAT | O_TRUNC | O_BINARY, S_IRUSR | S_IWUSR); + if (!fd) { + ret = -errno; + } else { + if (glfs_ftruncate(fd, total_size * BDRV_SECTOR_SIZE) != 0) { + ret = -errno; + } + if (glfs_close(fd) != 0) { + ret = -errno; + } + } +out: + qemu_gluster_gconf_free(gconf); + if (glfs) { + glfs_fini(glfs); + } + return ret; +} + +static void qemu_gluster_aio_cancel(BlockDriverAIOCB *blockacb) +{ + GlusterAIOCB *acb = (GlusterAIOCB *)blockacb; + bool finished = false; + + acb->finished = &finished; + while (!finished) { + qemu_aio_wait(); + } +} + +static AIOPool gluster_aio_pool = { + .aiocb_size = sizeof(GlusterAIOCB), + .cancel = qemu_gluster_aio_cancel, +}; + +static void gluster_finish_aiocb(struct glfs_fd *fd, ssize_t ret, void *arg) +{ + GlusterAIOCB *acb = (GlusterAIOCB *)arg; + BlockDriverState *bs = acb->common.bs; + BDRVGlusterState *s = bs->opaque; + int retval; + + acb->ret = ret; + retval = qemu_write_full(s->fds[GLUSTER_FD_WRITE], &acb, sizeof(acb)); + if (retval != sizeof(acb)) { + /* + * Gluster AIO callback thread failed to notify the waiting + * QEMU thread about IO completion. + * + * Complete this IO request and make the disk inaccessible for + * subsequent reads and writes. + */ + error_report("Gluster failed to notify QEMU about IO completion"); + + qemu_mutex_lock_iothread(); /* We are in gluster thread context */ + acb->common.cb(acb->common.opaque, -EIO); + qemu_aio_release(acb); + s->qemu_aio_count--; + close(s->fds[GLUSTER_FD_READ]); + close(s->fds[GLUSTER_FD_WRITE]); + qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL, + NULL); + bs->drv = NULL; /* Make the disk inaccessible */ + qemu_mutex_unlock_iothread(); + } +} + +static BlockDriverAIOCB *qemu_gluster_aio_rw(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque, int write) +{ + int ret; + GlusterAIOCB *acb; + BDRVGlusterState *s = bs->opaque; + size_t size; + off_t offset; + + offset = sector_num * BDRV_SECTOR_SIZE; + size = nb_sectors * BDRV_SECTOR_SIZE; + s->qemu_aio_count++; + + acb = qemu_aio_get(&gluster_aio_pool, bs, cb, opaque); + acb->size = size; + acb->ret = 0; + acb->finished = NULL; + + if (write) { + ret = glfs_pwritev_async(s->fd, qiov->iov, qiov->niov, offset, 0, + &gluster_finish_aiocb, acb); + } else { + ret = glfs_preadv_async(s->fd, qiov->iov, qiov->niov, offset, 0, + &gluster_finish_aiocb, acb); + } + + if (ret < 0) { + goto out; + } + return &acb->common; + +out: + s->qemu_aio_count--; + qemu_aio_release(acb); + return NULL; +} + +static BlockDriverAIOCB *qemu_gluster_aio_readv(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 0); +} + +static BlockDriverAIOCB *qemu_gluster_aio_writev(BlockDriverState *bs, + int64_t sector_num, QEMUIOVector *qiov, int nb_sectors, + BlockDriverCompletionFunc *cb, void *opaque) +{ + return qemu_gluster_aio_rw(bs, sector_num, qiov, nb_sectors, cb, opaque, 1); +} + +static BlockDriverAIOCB *qemu_gluster_aio_flush(BlockDriverState *bs, + BlockDriverCompletionFunc *cb, void *opaque) +{ + int ret; + GlusterAIOCB *acb; + BDRVGlusterState *s = bs->opaque; + + acb = qemu_aio_get(&gluster_aio_pool, bs, cb, opaque); + acb->size = 0; + acb->ret = 0; + acb->finished = NULL; + s->qemu_aio_count++; + + ret = glfs_fsync_async(s->fd, &gluster_finish_aiocb, acb); + if (ret < 0) { + goto out; + } + return &acb->common; + +out: + s->qemu_aio_count--; + qemu_aio_release(acb); + return NULL; +} + +static int64_t qemu_gluster_getlength(BlockDriverState *bs) +{ + BDRVGlusterState *s = bs->opaque; + int64_t ret; + + ret = glfs_lseek(s->fd, 0, SEEK_END); + if (ret < 0) { + return -errno; + } else { + return ret; + } +} + +static int64_t qemu_gluster_allocated_file_size(BlockDriverState *bs) +{ + BDRVGlusterState *s = bs->opaque; + struct stat st; + int ret; + + ret = glfs_fstat(s->fd, &st); + if (ret < 0) { + return -errno; + } else { + return st.st_blocks * 512; + } +} + +static void qemu_gluster_close(BlockDriverState *bs) +{ + BDRVGlusterState *s = bs->opaque; + + close(s->fds[GLUSTER_FD_READ]); + close(s->fds[GLUSTER_FD_WRITE]); + qemu_aio_set_fd_handler(s->fds[GLUSTER_FD_READ], NULL, NULL, NULL, NULL); + + if (s->fd) { + glfs_close(s->fd); + s->fd = NULL; + } + glfs_fini(s->glfs); +} + +static QEMUOptionParameter qemu_gluster_create_options[] = { + { + .name = BLOCK_OPT_SIZE, + .type = OPT_SIZE, + .help = "Virtual disk size" + }, + { NULL } +}; + +static BlockDriver bdrv_gluster = { + .format_name = "gluster", + .protocol_name = "gluster", + .instance_size = sizeof(BDRVGlusterState), + .bdrv_file_open = qemu_gluster_open, + .bdrv_close = qemu_gluster_close, + .bdrv_create = qemu_gluster_create, + .bdrv_getlength = qemu_gluster_getlength, + .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, + .bdrv_aio_readv = qemu_gluster_aio_readv, + .bdrv_aio_writev = qemu_gluster_aio_writev, + .bdrv_aio_flush = qemu_gluster_aio_flush, + .create_options = qemu_gluster_create_options, +}; + +static BlockDriver bdrv_gluster_tcp = { + .format_name = "gluster", + .protocol_name = "gluster+tcp", + .instance_size = sizeof(BDRVGlusterState), + .bdrv_file_open = qemu_gluster_open, + .bdrv_close = qemu_gluster_close, + .bdrv_create = qemu_gluster_create, + .bdrv_getlength = qemu_gluster_getlength, + .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, + .bdrv_aio_readv = qemu_gluster_aio_readv, + .bdrv_aio_writev = qemu_gluster_aio_writev, + .bdrv_aio_flush = qemu_gluster_aio_flush, + .create_options = qemu_gluster_create_options, +}; + +static BlockDriver bdrv_gluster_unix = { + .format_name = "gluster", + .protocol_name = "gluster+unix", + .instance_size = sizeof(BDRVGlusterState), + .bdrv_file_open = qemu_gluster_open, + .bdrv_close = qemu_gluster_close, + .bdrv_create = qemu_gluster_create, + .bdrv_getlength = qemu_gluster_getlength, + .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, + .bdrv_aio_readv = qemu_gluster_aio_readv, + .bdrv_aio_writev = qemu_gluster_aio_writev, + .bdrv_aio_flush = qemu_gluster_aio_flush, + .create_options = qemu_gluster_create_options, +}; + +static BlockDriver bdrv_gluster_rdma = { + .format_name = "gluster", + .protocol_name = "gluster+rdma", + .instance_size = sizeof(BDRVGlusterState), + .bdrv_file_open = qemu_gluster_open, + .bdrv_close = qemu_gluster_close, + .bdrv_create = qemu_gluster_create, + .bdrv_getlength = qemu_gluster_getlength, + .bdrv_get_allocated_file_size = qemu_gluster_allocated_file_size, + .bdrv_aio_readv = qemu_gluster_aio_readv, + .bdrv_aio_writev = qemu_gluster_aio_writev, + .bdrv_aio_flush = qemu_gluster_aio_flush, + .create_options = qemu_gluster_create_options, +}; + +static void bdrv_gluster_init(void) +{ + bdrv_register(&bdrv_gluster_rdma); + bdrv_register(&bdrv_gluster_unix); + bdrv_register(&bdrv_gluster_tcp); + bdrv_register(&bdrv_gluster); +} + +block_init(bdrv_gluster_init); diff --git a/block/stream.c b/block/stream.c index c4f87dd5b6..792665276e 100644 --- a/block/stream.c +++ b/block/stream.c @@ -13,6 +13,7 @@ #include "trace.h" #include "block_int.h" +#include "blockjob.h" #include "qemu/ratelimit.h" enum { @@ -30,6 +31,7 @@ typedef struct StreamBlockJob { BlockJob common; RateLimit limit; BlockDriverState *base; + BlockdevOnError on_error; char backing_file_id[1024]; } StreamBlockJob; @@ -77,6 +79,7 @@ static void coroutine_fn stream_run(void *opaque) BlockDriverState *bs = s->common.bs; BlockDriverState *base = s->base; int64_t sector_num, end; + int error = 0; int ret = 0; int n = 0; void *buf; @@ -141,7 +144,19 @@ wait: ret = stream_populate(bs, sector_num, n, buf); } if (ret < 0) { - break; + BlockErrorAction action = + block_job_error_action(&s->common, s->common.bs, s->on_error, + true, -ret); + if (action == BDRV_ACTION_STOP) { + n = 0; + continue; + } + if (error == 0) { + error = ret; + } + if (action == BDRV_ACTION_REPORT) { + break; + } } ret = 0; @@ -153,6 +168,9 @@ wait: bdrv_disable_copy_on_read(bs); } + /* Do not remove the backing file if an error was there but ignored. */ + ret = error; + if (!block_job_is_cancelled(&s->common) && sector_num == end && ret == 0) { const char *base_id = NULL, *base_fmt = NULL; if (base) { @@ -188,11 +206,19 @@ static BlockJobType stream_job_type = { void stream_start(BlockDriverState *bs, BlockDriverState *base, const char *base_id, int64_t speed, + BlockdevOnError on_error, BlockDriverCompletionFunc *cb, void *opaque, Error **errp) { StreamBlockJob *s; + if ((on_error == BLOCKDEV_ON_ERROR_STOP || + on_error == BLOCKDEV_ON_ERROR_ENOSPC) && + !bdrv_iostatus_is_enabled(bs)) { + error_set(errp, QERR_INVALID_PARAMETER, "on-error"); + return; + } + s = block_job_create(&stream_job_type, bs, speed, cb, opaque, errp); if (!s) { return; @@ -203,6 +229,7 @@ void stream_start(BlockDriverState *bs, BlockDriverState *base, pstrcpy(s->backing_file_id, sizeof(s->backing_file_id), base_id); } + s->on_error = on_error; s->common.co = qemu_coroutine_create(stream_run); trace_stream_start(bs, base, s, s->common.co, opaque); qemu_coroutine_enter(s->common.co, s); diff --git a/block_int.h b/block_int.h index ac4245cb18..f4bae04401 100644 --- a/block_int.h +++ b/block_int.h @@ -31,6 +31,7 @@ #include "qemu-timer.h" #include "qapi-types.h" #include "qerror.h" +#include "monitor.h" #define BLOCK_FLAG_ENCRYPT 1 #define BLOCK_FLAG_COMPAT6 4 @@ -67,73 +68,6 @@ typedef struct BlockIOBaseValue { uint64_t ios[2]; } BlockIOBaseValue; -typedef struct BlockJob BlockJob; - -/** - * BlockJobType: - * - * A class type for block job objects. - */ -typedef struct BlockJobType { - /** Derived BlockJob struct size */ - size_t instance_size; - - /** String describing the operation, part of query-block-jobs QMP API */ - const char *job_type; - - /** Optional callback for job types that support setting a speed limit */ - void (*set_speed)(BlockJob *job, int64_t speed, Error **errp); -} BlockJobType; - -/** - * BlockJob: - * - * Long-running operation on a BlockDriverState. - */ -struct BlockJob { - /** The job type, including the job vtable. */ - const BlockJobType *job_type; - - /** The block device on which the job is operating. */ - BlockDriverState *bs; - - /** - * The coroutine that executes the job. If not NULL, it is - * reentered when busy is false and the job is cancelled. - */ - Coroutine *co; - - /** - * Set to true if the job should cancel itself. The flag must - * always be tested just before toggling the busy flag from false - * to true. After a job has been cancelled, it should only yield - * if #qemu_aio_wait will ("sooner or later") reenter the coroutine. - */ - bool cancelled; - - /** - * Set to false by the job while it is in a quiescent state, where - * no I/O is pending and the job has yielded on any condition - * that is not detected by #qemu_aio_wait, such as a timer. - */ - bool busy; - - /** Offset that is published by the query-block-jobs QMP API */ - int64_t offset; - - /** Length that is published by the query-block-jobs QMP API */ - int64_t len; - - /** Speed that was set with @block_job_set_speed. */ - int64_t speed; - - /** The completion function that will be called when the job completes. */ - BlockDriverCompletionFunc *cb; - - /** The opaque value that is passed to the completion function. */ - void *opaque; -}; - struct BlockDriver { const char *format_name; int instance_size; @@ -329,7 +263,7 @@ struct BlockDriverState { /* NOTE: the following infos are only hints for real hardware drivers. They are not used by the block driver */ - BlockErrorAction on_read_error, on_write_error; + BlockdevOnError on_read_error, on_write_error; bool iostatus_enabled; BlockDeviceIoStatus iostatus; char device_name[32]; @@ -353,92 +287,9 @@ void bdrv_set_io_limits(BlockDriverState *bs, #ifdef _WIN32 int is_windows_drive(const char *filename); #endif - -/** - * block_job_create: - * @job_type: The class object for the newly-created job. - * @bs: The block - * @speed: The maximum speed, in bytes per second, or 0 for unlimited. - * @cb: Completion function for the job. - * @opaque: Opaque pointer value passed to @cb. - * @errp: Error object. - * - * Create a new long-running block device job and return it. The job - * will call @cb asynchronously when the job completes. Note that - * @bs may have been closed at the time the @cb it is called. If - * this is the case, the job may be reported as either cancelled or - * completed. - * - * This function is not part of the public job interface; it should be - * called from a wrapper that is specific to the job type. - */ -void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs, - int64_t speed, BlockDriverCompletionFunc *cb, - void *opaque, Error **errp); - -/** - * block_job_sleep_ns: - * @job: The job that calls the function. - * @clock: The clock to sleep on. - * @ns: How many nanoseconds to stop for. - * - * Put the job to sleep (assuming that it wasn't canceled) for @ns - * nanoseconds. Canceling the job will interrupt the wait immediately. - */ -void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns); - -/** - * block_job_complete: - * @job: The job being completed. - * @ret: The status code. - * - * Call the completion function that was registered at creation time, and - * free @job. - */ -void block_job_complete(BlockJob *job, int ret); - -/** - * block_job_set_speed: - * @job: The job to set the speed for. - * @speed: The new value - * @errp: Error object. - * - * Set a rate-limiting parameter for the job; the actual meaning may - * vary depending on the job type. - */ -void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp); - -/** - * block_job_cancel: - * @job: The job to be canceled. - * - * Asynchronously cancel the specified job. - */ -void block_job_cancel(BlockJob *job); - -/** - * block_job_is_cancelled: - * @job: The job being queried. - * - * Returns whether the job is scheduled for cancellation. - */ -bool block_job_is_cancelled(BlockJob *job); - -/** - * block_job_cancel: - * @job: The job to be canceled. - * - * Asynchronously cancel the job and wait for it to reach a quiescent - * state. Note that the completion callback will still be called - * asynchronously, hence it is *not* valid to call #bdrv_delete - * immediately after #block_job_cancel_sync. Users of block jobs - * will usually protect the BlockDriverState objects with a reference - * count, should this be a concern. - * - * Returns the return value from the job if the job actually completed - * during the call, or -ECANCELED if it was canceled. - */ -int block_job_cancel_sync(BlockJob *job); +void bdrv_emit_qmp_error_event(const BlockDriverState *bdrv, + enum MonitorEvent ev, + BlockErrorAction action, bool is_read); /** * stream_start: @@ -448,6 +299,7 @@ int block_job_cancel_sync(BlockJob *job); * @base_id: The file name that will be written to @bs as the new * backing file if the job completes. Ignored if @base is %NULL. * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @on_error: The action to take upon error. * @cb: Completion function for the job. * @opaque: Opaque pointer value passed to @cb. * @errp: Error object. @@ -459,8 +311,24 @@ int block_job_cancel_sync(BlockJob *job); * @base_id in the written image and to @base in the live BlockDriverState. */ void stream_start(BlockDriverState *bs, BlockDriverState *base, - const char *base_id, int64_t speed, + const char *base_id, int64_t speed, BlockdevOnError on_error, BlockDriverCompletionFunc *cb, void *opaque, Error **errp); +/** + * commit_start: + * @bs: Top Block device + * @base: Block device that will be written into, and become the new top + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @on_error: The action to take upon error. + * @cb: Completion function for the job. + * @opaque: Opaque pointer value passed to @cb. + * @errp: Error object. + * + */ +void commit_start(BlockDriverState *bs, BlockDriverState *base, + BlockDriverState *top, int64_t speed, + BlockdevOnError on_error, BlockDriverCompletionFunc *cb, + void *opaque, Error **errp); + #endif /* BLOCK_INT_H */ diff --git a/blockdev.c b/blockdev.c index e5d450f0bb..5f18dfa97b 100644 --- a/blockdev.c +++ b/blockdev.c @@ -9,6 +9,7 @@ #include "blockdev.h" #include "hw/block-common.h" +#include "blockjob.h" #include "monitor.h" #include "qerror.h" #include "qemu-option.h" @@ -237,16 +238,16 @@ static void drive_put_ref_bh_schedule(DriveInfo *dinfo) qemu_bh_schedule(s->bh); } -static int parse_block_error_action(const char *buf, int is_read) +static int parse_block_error_action(const char *buf, bool is_read) { if (!strcmp(buf, "ignore")) { - return BLOCK_ERR_IGNORE; + return BLOCKDEV_ON_ERROR_IGNORE; } else if (!is_read && !strcmp(buf, "enospc")) { - return BLOCK_ERR_STOP_ENOSPC; + return BLOCKDEV_ON_ERROR_ENOSPC; } else if (!strcmp(buf, "stop")) { - return BLOCK_ERR_STOP_ANY; + return BLOCKDEV_ON_ERROR_STOP; } else if (!strcmp(buf, "report")) { - return BLOCK_ERR_REPORT; + return BLOCKDEV_ON_ERROR_REPORT; } else { error_report("'%s' invalid %s error action", buf, is_read ? "read" : "write"); @@ -432,7 +433,7 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi) return NULL; } - on_write_error = BLOCK_ERR_STOP_ENOSPC; + on_write_error = BLOCKDEV_ON_ERROR_ENOSPC; if ((buf = qemu_opt_get(opts, "werror")) != NULL) { if (type != IF_IDE && type != IF_SCSI && type != IF_VIRTIO && type != IF_NONE) { error_report("werror is not supported by this bus type"); @@ -445,7 +446,7 @@ DriveInfo *drive_init(QemuOpts *opts, int default_to_scsi) } } - on_read_error = BLOCK_ERR_REPORT; + on_read_error = BLOCKDEV_ON_ERROR_REPORT; if ((buf = qemu_opt_get(opts, "rerror")) != NULL) { if (type != IF_IDE && type != IF_VIRTIO && type != IF_SCSI && type != IF_NONE) { error_report("rerror is not supported by this bus type"); @@ -805,6 +806,11 @@ void qmp_transaction(BlockdevActionList *dev_list, Error **errp) QSIMPLEQ_FOREACH(states, &snap_bdrv_states, entry) { /* This removes our old bs from the bdrv_states, and adds the new bs */ bdrv_append(states->new_bs, states->old_bs); + /* We don't need (or want) to use the transactional + * bdrv_reopen_multiple() across all the entries at once, because we + * don't want to abort all of them if one of them fails the reopen */ + bdrv_reopen(states->new_bs, states->new_bs->open_flags & ~BDRV_O_RDWR, + NULL); } /* success */ @@ -1065,12 +1071,12 @@ static QObject *qobject_from_block_job(BlockJob *job) job->speed); } -static void block_stream_cb(void *opaque, int ret) +static void block_job_cb(void *opaque, int ret) { BlockDriverState *bs = opaque; QObject *obj; - trace_block_stream_cb(bs, bs->job, ret); + trace_block_job_cb(bs, bs->job, ret); assert(bs->job); obj = qobject_from_block_job(bs->job); @@ -1090,13 +1096,18 @@ static void block_stream_cb(void *opaque, int ret) } void qmp_block_stream(const char *device, bool has_base, - const char *base, bool has_speed, - int64_t speed, Error **errp) + const char *base, bool has_speed, int64_t speed, + bool has_on_error, BlockdevOnError on_error, + Error **errp) { BlockDriverState *bs; BlockDriverState *base_bs = NULL; Error *local_err = NULL; + if (!has_on_error) { + on_error = BLOCKDEV_ON_ERROR_REPORT; + } + bs = bdrv_find(device); if (!bs) { error_set(errp, QERR_DEVICE_NOT_FOUND, device); @@ -1112,7 +1123,7 @@ void qmp_block_stream(const char *device, bool has_base, } stream_start(bs, base_bs, base, has_speed ? speed : 0, - block_stream_cb, bs, &local_err); + on_error, block_job_cb, bs, &local_err); if (error_is_set(&local_err)) { error_propagate(errp, local_err); return; @@ -1126,6 +1137,64 @@ void qmp_block_stream(const char *device, bool has_base, trace_qmp_block_stream(bs, bs->job); } +void qmp_block_commit(const char *device, + bool has_base, const char *base, const char *top, + bool has_speed, int64_t speed, + Error **errp) +{ + BlockDriverState *bs; + BlockDriverState *base_bs, *top_bs; + Error *local_err = NULL; + /* This will be part of the QMP command, if/when the + * BlockdevOnError change for blkmirror makes it in + */ + BlockdevOnError on_error = BLOCKDEV_ON_ERROR_REPORT; + + /* drain all i/o before commits */ + bdrv_drain_all(); + + bs = bdrv_find(device); + if (!bs) { + error_set(errp, QERR_DEVICE_NOT_FOUND, device); + return; + } + if (base && has_base) { + base_bs = bdrv_find_backing_image(bs, base); + } else { + base_bs = bdrv_find_base(bs); + } + + if (base_bs == NULL) { + error_set(errp, QERR_BASE_NOT_FOUND, base ? base : "NULL"); + return; + } + + /* default top_bs is the active layer */ + top_bs = bs; + + if (top) { + if (strcmp(bs->filename, top) != 0) { + top_bs = bdrv_find_backing_image(bs, top); + } + } + + if (top_bs == NULL) { + error_setg(errp, "Top image file %s not found", top ? top : "NULL"); + return; + } + + commit_start(bs, base_bs, top_bs, speed, on_error, block_job_cb, bs, + &local_err); + if (local_err != NULL) { + error_propagate(errp, local_err); + return; + } + /* Grab a reference so hotplug does not delete the BlockDriverState from + * underneath us. + */ + drive_get_ref(drive_get_by_blockdev(bs)); +} + static BlockJob *find_block_job(const char *device) { BlockDriverState *bs; @@ -1142,19 +1211,28 @@ void qmp_block_job_set_speed(const char *device, int64_t speed, Error **errp) BlockJob *job = find_block_job(device); if (!job) { - error_set(errp, QERR_DEVICE_NOT_ACTIVE, device); + error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device); return; } block_job_set_speed(job, speed, errp); } -void qmp_block_job_cancel(const char *device, Error **errp) +void qmp_block_job_cancel(const char *device, + bool has_force, bool force, Error **errp) { BlockJob *job = find_block_job(device); + if (!has_force) { + force = false; + } + if (!job) { - error_set(errp, QERR_DEVICE_NOT_ACTIVE, device); + error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device); + return; + } + if (job->paused && !force) { + error_set(errp, QERR_BLOCK_JOB_PAUSED, device); return; } @@ -1162,25 +1240,40 @@ void qmp_block_job_cancel(const char *device, Error **errp) block_job_cancel(job); } +void qmp_block_job_pause(const char *device, Error **errp) +{ + BlockJob *job = find_block_job(device); + + if (!job) { + error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device); + return; + } + + trace_qmp_block_job_pause(job); + block_job_pause(job); +} + +void qmp_block_job_resume(const char *device, Error **errp) +{ + BlockJob *job = find_block_job(device); + + if (!job) { + error_set(errp, QERR_BLOCK_JOB_NOT_ACTIVE, device); + return; + } + + trace_qmp_block_job_resume(job); + block_job_resume(job); +} + static void do_qmp_query_block_jobs_one(void *opaque, BlockDriverState *bs) { BlockJobInfoList **prev = opaque; BlockJob *job = bs->job; if (job) { - BlockJobInfoList *elem; - BlockJobInfo *info = g_new(BlockJobInfo, 1); - *info = (BlockJobInfo){ - .type = g_strdup(job->job_type->job_type), - .device = g_strdup(bdrv_get_device_name(bs)), - .len = job->len, - .offset = job->offset, - .speed = job->speed, - }; - - elem = g_new0(BlockJobInfoList, 1); - elem->value = info; - + BlockJobInfoList *elem = g_new0(BlockJobInfoList, 1); + elem->value = block_job_query(bs->job); (*prev)->next = elem; *prev = elem; } diff --git a/blockjob.c b/blockjob.c new file mode 100644 index 0000000000..f55f55a193 --- /dev/null +++ b/blockjob.c @@ -0,0 +1,249 @@ +/* + * QEMU System Emulator block driver + * + * Copyright (c) 2011 IBM Corp. + * Copyright (c) 2012 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ + +#include "config-host.h" +#include "qemu-common.h" +#include "trace.h" +#include "monitor.h" +#include "block.h" +#include "blockjob.h" +#include "block_int.h" +#include "qjson.h" +#include "qemu-coroutine.h" +#include "qmp-commands.h" +#include "qemu-timer.h" + +void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs, + int64_t speed, BlockDriverCompletionFunc *cb, + void *opaque, Error **errp) +{ + BlockJob *job; + + if (bs->job || bdrv_in_use(bs)) { + error_set(errp, QERR_DEVICE_IN_USE, bdrv_get_device_name(bs)); + return NULL; + } + bdrv_set_in_use(bs, 1); + + job = g_malloc0(job_type->instance_size); + job->job_type = job_type; + job->bs = bs; + job->cb = cb; + job->opaque = opaque; + job->busy = true; + bs->job = job; + + /* Only set speed when necessary to avoid NotSupported error */ + if (speed != 0) { + Error *local_err = NULL; + + block_job_set_speed(job, speed, &local_err); + if (error_is_set(&local_err)) { + bs->job = NULL; + g_free(job); + bdrv_set_in_use(bs, 0); + error_propagate(errp, local_err); + return NULL; + } + } + return job; +} + +void block_job_complete(BlockJob *job, int ret) +{ + BlockDriverState *bs = job->bs; + + assert(bs->job == job); + job->cb(job->opaque, ret); + bs->job = NULL; + g_free(job); + bdrv_set_in_use(bs, 0); +} + +void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp) +{ + Error *local_err = NULL; + + if (!job->job_type->set_speed) { + error_set(errp, QERR_NOT_SUPPORTED); + return; + } + job->job_type->set_speed(job, speed, &local_err); + if (error_is_set(&local_err)) { + error_propagate(errp, local_err); + return; + } + + job->speed = speed; +} + +void block_job_pause(BlockJob *job) +{ + job->paused = true; +} + +bool block_job_is_paused(BlockJob *job) +{ + return job->paused; +} + +void block_job_resume(BlockJob *job) +{ + job->paused = false; + block_job_iostatus_reset(job); + if (job->co && !job->busy) { + qemu_coroutine_enter(job->co, NULL); + } +} + +void block_job_cancel(BlockJob *job) +{ + job->cancelled = true; + block_job_resume(job); +} + +bool block_job_is_cancelled(BlockJob *job) +{ + return job->cancelled; +} + +void block_job_iostatus_reset(BlockJob *job) +{ + job->iostatus = BLOCK_DEVICE_IO_STATUS_OK; +} + +struct BlockCancelData { + BlockJob *job; + BlockDriverCompletionFunc *cb; + void *opaque; + bool cancelled; + int ret; +}; + +static void block_job_cancel_cb(void *opaque, int ret) +{ + struct BlockCancelData *data = opaque; + + data->cancelled = block_job_is_cancelled(data->job); + data->ret = ret; + data->cb(data->opaque, ret); +} + +int block_job_cancel_sync(BlockJob *job) +{ + struct BlockCancelData data; + BlockDriverState *bs = job->bs; + + assert(bs->job == job); + + /* Set up our own callback to store the result and chain to + * the original callback. + */ + data.job = job; + data.cb = job->cb; + data.opaque = job->opaque; + data.ret = -EINPROGRESS; + job->cb = block_job_cancel_cb; + job->opaque = &data; + block_job_cancel(job); + while (data.ret == -EINPROGRESS) { + qemu_aio_wait(); + } + return (data.cancelled && data.ret == 0) ? -ECANCELED : data.ret; +} + +void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns) +{ + assert(job->busy); + + /* Check cancellation *before* setting busy = false, too! */ + if (block_job_is_cancelled(job)) { + return; + } + + job->busy = false; + if (block_job_is_paused(job)) { + qemu_coroutine_yield(); + } else { + co_sleep_ns(clock, ns); + } + job->busy = true; +} + +BlockJobInfo *block_job_query(BlockJob *job) +{ + BlockJobInfo *info = g_new0(BlockJobInfo, 1); + info->type = g_strdup(job->job_type->job_type); + info->device = g_strdup(bdrv_get_device_name(job->bs)); + info->len = job->len; + info->busy = job->busy; + info->paused = job->paused; + info->offset = job->offset; + info->speed = job->speed; + info->io_status = job->iostatus; + return info; +} + +static void block_job_iostatus_set_err(BlockJob *job, int error) +{ + if (job->iostatus == BLOCK_DEVICE_IO_STATUS_OK) { + job->iostatus = error == ENOSPC ? BLOCK_DEVICE_IO_STATUS_NOSPACE : + BLOCK_DEVICE_IO_STATUS_FAILED; + } +} + + +BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs, + BlockdevOnError on_err, + int is_read, int error) +{ + BlockErrorAction action; + + switch (on_err) { + case BLOCKDEV_ON_ERROR_ENOSPC: + action = (error == ENOSPC) ? BDRV_ACTION_STOP : BDRV_ACTION_REPORT; + break; + case BLOCKDEV_ON_ERROR_STOP: + action = BDRV_ACTION_STOP; + break; + case BLOCKDEV_ON_ERROR_REPORT: + action = BDRV_ACTION_REPORT; + break; + case BLOCKDEV_ON_ERROR_IGNORE: + action = BDRV_ACTION_IGNORE; + break; + default: + abort(); + } + bdrv_emit_qmp_error_event(job->bs, QEVENT_BLOCK_JOB_ERROR, action, is_read); + if (action == BDRV_ACTION_STOP) { + block_job_pause(job); + block_job_iostatus_set_err(job, error); + if (bs != job->bs) { + bdrv_iostatus_set_err(bs, error); + } + } + return action; +} diff --git a/blockjob.h b/blockjob.h new file mode 100644 index 0000000000..930cc3c46a --- /dev/null +++ b/blockjob.h @@ -0,0 +1,243 @@ +/* + * Declarations for long-running block device operations + * + * Copyright (c) 2011 IBM Corp. + * Copyright (c) 2012 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN + * THE SOFTWARE. + */ +#ifndef BLOCKJOB_H +#define BLOCKJOB_H 1 + +#include "block.h" + +/** + * BlockJobType: + * + * A class type for block job objects. + */ +typedef struct BlockJobType { + /** Derived BlockJob struct size */ + size_t instance_size; + + /** String describing the operation, part of query-block-jobs QMP API */ + const char *job_type; + + /** Optional callback for job types that support setting a speed limit */ + void (*set_speed)(BlockJob *job, int64_t speed, Error **errp); +} BlockJobType; + +/** + * BlockJob: + * + * Long-running operation on a BlockDriverState. + */ +struct BlockJob { + /** The job type, including the job vtable. */ + const BlockJobType *job_type; + + /** The block device on which the job is operating. */ + BlockDriverState *bs; + + /** + * The coroutine that executes the job. If not NULL, it is + * reentered when busy is false and the job is cancelled. + */ + Coroutine *co; + + /** + * Set to true if the job should cancel itself. The flag must + * always be tested just before toggling the busy flag from false + * to true. After a job has been cancelled, it should only yield + * if #qemu_aio_wait will ("sooner or later") reenter the coroutine. + */ + bool cancelled; + + /** + * Set to true if the job is either paused, or will pause itself + * as soon as possible (if busy == true). + */ + bool paused; + + /** + * Set to false by the job while it is in a quiescent state, where + * no I/O is pending and the job has yielded on any condition + * that is not detected by #qemu_aio_wait, such as a timer. + */ + bool busy; + + /** Status that is published by the query-block-jobs QMP API */ + BlockDeviceIoStatus iostatus; + + /** Offset that is published by the query-block-jobs QMP API */ + int64_t offset; + + /** Length that is published by the query-block-jobs QMP API */ + int64_t len; + + /** Speed that was set with @block_job_set_speed. */ + int64_t speed; + + /** The completion function that will be called when the job completes. */ + BlockDriverCompletionFunc *cb; + + /** The opaque value that is passed to the completion function. */ + void *opaque; +}; + +/** + * block_job_create: + * @job_type: The class object for the newly-created job. + * @bs: The block + * @speed: The maximum speed, in bytes per second, or 0 for unlimited. + * @cb: Completion function for the job. + * @opaque: Opaque pointer value passed to @cb. + * @errp: Error object. + * + * Create a new long-running block device job and return it. The job + * will call @cb asynchronously when the job completes. Note that + * @bs may have been closed at the time the @cb it is called. If + * this is the case, the job may be reported as either cancelled or + * completed. + * + * This function is not part of the public job interface; it should be + * called from a wrapper that is specific to the job type. + */ +void *block_job_create(const BlockJobType *job_type, BlockDriverState *bs, + int64_t speed, BlockDriverCompletionFunc *cb, + void *opaque, Error **errp); + +/** + * block_job_sleep_ns: + * @job: The job that calls the function. + * @clock: The clock to sleep on. + * @ns: How many nanoseconds to stop for. + * + * Put the job to sleep (assuming that it wasn't canceled) for @ns + * nanoseconds. Canceling the job will interrupt the wait immediately. + */ +void block_job_sleep_ns(BlockJob *job, QEMUClock *clock, int64_t ns); + +/** + * block_job_complete: + * @job: The job being completed. + * @ret: The status code. + * + * Call the completion function that was registered at creation time, and + * free @job. + */ +void block_job_complete(BlockJob *job, int ret); + +/** + * block_job_set_speed: + * @job: The job to set the speed for. + * @speed: The new value + * @errp: Error object. + * + * Set a rate-limiting parameter for the job; the actual meaning may + * vary depending on the job type. + */ +void block_job_set_speed(BlockJob *job, int64_t speed, Error **errp); + +/** + * block_job_cancel: + * @job: The job to be canceled. + * + * Asynchronously cancel the specified job. + */ +void block_job_cancel(BlockJob *job); + +/** + * block_job_is_cancelled: + * @job: The job being queried. + * + * Returns whether the job is scheduled for cancellation. + */ +bool block_job_is_cancelled(BlockJob *job); + +/** + * block_job_query: + * @job: The job to get information about. + * + * Return information about a job. + */ +BlockJobInfo *block_job_query(BlockJob *job); + +/** + * block_job_pause: + * @job: The job to be paused. + * + * Asynchronously pause the specified job. + */ +void block_job_pause(BlockJob *job); + +/** + * block_job_resume: + * @job: The job to be resumed. + * + * Resume the specified job. + */ +void block_job_resume(BlockJob *job); + +/** + * block_job_is_paused: + * @job: The job being queried. + * + * Returns whether the job is currently paused, or will pause + * as soon as it reaches a sleeping point. + */ +bool block_job_is_paused(BlockJob *job); + +/** + * block_job_cancel_sync: + * @job: The job to be canceled. + * + * Synchronously cancel the job. The completion callback is called + * before the function returns. The job may actually complete + * instead of canceling itself; the circumstances under which this + * happens depend on the kind of job that is active. + * + * Returns the return value from the job if the job actually completed + * during the call, or -ECANCELED if it was canceled. + */ +int block_job_cancel_sync(BlockJob *job); + +/** + * block_job_iostatus_reset: + * @job: The job whose I/O status should be reset. + * + * Reset I/O status on @job. + */ +void block_job_iostatus_reset(BlockJob *job); + +/** + * block_job_error_action: + * @job: The job to signal an error for. + * @bs: The block device on which to set an I/O error. + * @on_err: The error action setting. + * @is_read: Whether the operation was a read. + * @error: The error that was reported. + * + * Report an I/O error for a block job and possibly stop the VM. Return the + * action that was selected based on @on_err and @error. + */ +BlockErrorAction block_job_error_action(BlockJob *job, BlockDriverState *bs, + BlockdevOnError on_err, + int is_read, int error); +#endif diff --git a/configure b/configure index c5e5bb2e81..e58846d5e2 100755 --- a/configure +++ b/configure @@ -219,6 +219,7 @@ want_tools="yes" libiscsi="" coroutine="" seccomp="" +glusterfs="" # parse CC options first for opt do @@ -856,6 +857,10 @@ for opt do ;; --disable-seccomp) seccomp="no" ;; + --disable-glusterfs) glusterfs="no" + ;; + --enable-glusterfs) glusterfs="yes" + ;; *) echo "ERROR: unknown option $opt"; show_help="yes" ;; esac @@ -1128,6 +1133,8 @@ echo " --disable-seccomp disable seccomp support" echo " --enable-seccomp enables seccomp support" echo " --with-coroutine=BACKEND coroutine backend. Supported options:" echo " gthread, ucontext, sigaltstack, windows" +echo " --enable-glusterfs enable GlusterFS backend" +echo " --disable-glusterfs disable GlusterFS backend" echo "" echo "NOTE: The object files are built at the place where configure is launched" exit 1 @@ -2303,6 +2310,29 @@ EOF fi fi +########################################## +# glusterfs probe +if test "$glusterfs" != "no" ; then + cat > $TMPC < +int main(void) { + (void) glfs_new("volume"); + return 0; +} +EOF + glusterfs_libs="-lgfapi -lgfrpc -lgfxdr" + if compile_prog "" "$glusterfs_libs" ; then + glusterfs=yes + libs_tools="$glusterfs_libs $libs_tools" + libs_softmmu="$glusterfs_libs $libs_softmmu" + else + if test "$glusterfs" = "yes" ; then + feature_not_found "GlusterFS backend support" + fi + glusterfs=no + fi +fi + # # Check for xxxat() functions when we are building linux-user # emulator. This is done because older glibc versions don't @@ -3170,6 +3200,7 @@ echo "libiscsi support $libiscsi" echo "build guest agent $guest_agent" echo "seccomp support $seccomp" echo "coroutine backend $coroutine_backend" +echo "GlusterFS support $glusterfs" if test "$sdl_too_old" = "yes"; then echo "-> Your SDL version is too old - please upgrade to have SDL support" @@ -3516,6 +3547,10 @@ if test "$has_environ" = "yes" ; then echo "CONFIG_HAS_ENVIRON=y" >> $config_host_mak fi +if test "$glusterfs" = "yes" ; then + echo "CONFIG_GLUSTERFS=y" >> $config_host_mak +fi + # USB host support case "$usb" in linux) diff --git a/hmp-commands.hx b/hmp-commands.hx index 0302458df2..e0b537d0cc 100644 --- a/hmp-commands.hx +++ b/hmp-commands.hx @@ -99,9 +99,10 @@ ETEXI { .name = "block_job_cancel", - .args_type = "device:B", - .params = "device", - .help = "stop an active background block operation", + .args_type = "force:-f,device:B", + .params = "[-f] device", + .help = "stop an active background block operation (use -f" + "\n\t\t\t if the operation is currently paused)", .mhandler.cmd = hmp_block_job_cancel, }, @@ -109,6 +110,34 @@ STEXI @item block_job_cancel @findex block_job_cancel Stop an active block streaming operation. +ETEXI + + { + .name = "block_job_pause", + .args_type = "device:B", + .params = "device", + .help = "pause an active background block operation", + .mhandler.cmd = hmp_block_job_pause, + }, + +STEXI +@item block_job_pause +@findex block_job_pause +Pause an active block streaming operation. +ETEXI + + { + .name = "block_job_resume", + .args_type = "device:B", + .params = "device", + .help = "resume a paused background block operation", + .mhandler.cmd = hmp_block_job_resume, + }, + +STEXI +@item block_job_resume +@findex block_job_resume +Resume a paused block streaming operation. ETEXI { diff --git a/hmp.c b/hmp.c index 3306bcdbb4..70bdec2433 100644 --- a/hmp.c +++ b/hmp.c @@ -930,7 +930,8 @@ void hmp_block_stream(Monitor *mon, const QDict *qdict) int64_t speed = qdict_get_try_int(qdict, "speed", 0); qmp_block_stream(device, base != NULL, base, - qdict_haskey(qdict, "speed"), speed, &error); + qdict_haskey(qdict, "speed"), speed, + BLOCKDEV_ON_ERROR_REPORT, true, &error); hmp_handle_error(mon, &error); } @@ -950,8 +951,29 @@ void hmp_block_job_cancel(Monitor *mon, const QDict *qdict) { Error *error = NULL; const char *device = qdict_get_str(qdict, "device"); + bool force = qdict_get_try_bool(qdict, "force", 0); - qmp_block_job_cancel(device, &error); + qmp_block_job_cancel(device, true, force, &error); + + hmp_handle_error(mon, &error); +} + +void hmp_block_job_pause(Monitor *mon, const QDict *qdict) +{ + Error *error = NULL; + const char *device = qdict_get_str(qdict, "device"); + + qmp_block_job_pause(device, &error); + + hmp_handle_error(mon, &error); +} + +void hmp_block_job_resume(Monitor *mon, const QDict *qdict) +{ + Error *error = NULL; + const char *device = qdict_get_str(qdict, "device"); + + qmp_block_job_resume(device, &error); hmp_handle_error(mon, &error); } diff --git a/hmp.h b/hmp.h index 48b9c59f8a..71ea384523 100644 --- a/hmp.h +++ b/hmp.h @@ -64,6 +64,8 @@ void hmp_block_set_io_throttle(Monitor *mon, const QDict *qdict); void hmp_block_stream(Monitor *mon, const QDict *qdict); void hmp_block_job_set_speed(Monitor *mon, const QDict *qdict); void hmp_block_job_cancel(Monitor *mon, const QDict *qdict); +void hmp_block_job_pause(Monitor *mon, const QDict *qdict); +void hmp_block_job_resume(Monitor *mon, const QDict *qdict); void hmp_migrate(Monitor *mon, const QDict *qdict); void hmp_device_del(Monitor *mon, const QDict *qdict); void hmp_dump_guest_memory(Monitor *mon, const QDict *qdict); diff --git a/hw/fdc.c b/hw/fdc.c index 08830c1ba2..43b0f20503 100644 --- a/hw/fdc.c +++ b/hw/fdc.c @@ -1994,11 +1994,11 @@ static int fdctrl_connect_drives(FDCtrl *fdctrl) drive->fdctrl = fdctrl; if (drive->bs) { - if (bdrv_get_on_error(drive->bs, 0) != BLOCK_ERR_STOP_ENOSPC) { + if (bdrv_get_on_error(drive->bs, 0) != BLOCKDEV_ON_ERROR_ENOSPC) { error_report("fdc doesn't support drive option werror"); return -1; } - if (bdrv_get_on_error(drive->bs, 1) != BLOCK_ERR_REPORT) { + if (bdrv_get_on_error(drive->bs, 1) != BLOCKDEV_ON_ERROR_REPORT) { error_report("fdc doesn't support drive option rerror"); return -1; } diff --git a/hw/ide/core.c b/hw/ide/core.c index d6fb69c634..d683a8cc84 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -556,32 +556,22 @@ void ide_dma_error(IDEState *s) static int ide_handle_rw_error(IDEState *s, int error, int op) { - int is_read = (op & BM_STATUS_RETRY_READ); - BlockErrorAction action = bdrv_get_on_error(s->bs, is_read); + bool is_read = (op & BM_STATUS_RETRY_READ) != 0; + BlockErrorAction action = bdrv_get_error_action(s->bs, is_read, error); - if (action == BLOCK_ERR_IGNORE) { - bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_IGNORE, is_read); - return 0; - } - - if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC) - || action == BLOCK_ERR_STOP_ANY) { + if (action == BDRV_ACTION_STOP) { s->bus->dma->ops->set_unit(s->bus->dma, s->unit); s->bus->error_status = op; - bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_STOP, is_read); - vm_stop(RUN_STATE_IO_ERROR); - bdrv_iostatus_set_err(s->bs, error); - } else { + } else if (action == BDRV_ACTION_REPORT) { if (op & BM_STATUS_DMA_RETRY) { dma_buf_commit(s); ide_dma_error(s); } else { ide_rw_error(s); } - bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_REPORT, is_read); } - - return 1; + bdrv_error_action(s->bs, action, is_read, error); + return action != BDRV_ACTION_IGNORE; } void ide_dma_cb(void *opaque, int ret) diff --git a/hw/ide/pci.c b/hw/ide/pci.c index 88c0942e34..644533f777 100644 --- a/hw/ide/pci.c +++ b/hw/ide/pci.c @@ -188,7 +188,7 @@ static void bmdma_restart_bh(void *opaque) { BMDMAState *bm = opaque; IDEBus *bus = bm->bus; - int is_read; + bool is_read; int error_status; qemu_bh_delete(bm->bh); @@ -198,7 +198,7 @@ static void bmdma_restart_bh(void *opaque) return; } - is_read = !!(bus->error_status & BM_STATUS_RETRY_READ); + is_read = (bus->error_status & BM_STATUS_RETRY_READ) != 0; /* The error status must be cleared before resubmitting the request: The * request may fail again, and this case can only be distinguished if the diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c index 95e91585e6..99bb02ebf8 100644 --- a/hw/scsi-disk.c +++ b/hw/scsi-disk.c @@ -386,23 +386,11 @@ static void scsi_read_data(SCSIRequest *req) */ static int scsi_handle_rw_error(SCSIDiskReq *r, int error) { - int is_read = (r->req.cmd.xfer == SCSI_XFER_FROM_DEV); + bool is_read = (r->req.cmd.xfer == SCSI_XFER_FROM_DEV); SCSIDiskState *s = DO_UPCAST(SCSIDiskState, qdev, r->req.dev); - BlockErrorAction action = bdrv_get_on_error(s->qdev.conf.bs, is_read); + BlockErrorAction action = bdrv_get_error_action(s->qdev.conf.bs, is_read, error); - if (action == BLOCK_ERR_IGNORE) { - bdrv_emit_qmp_error_event(s->qdev.conf.bs, BDRV_ACTION_IGNORE, is_read); - return 0; - } - - if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC) - || action == BLOCK_ERR_STOP_ANY) { - - bdrv_emit_qmp_error_event(s->qdev.conf.bs, BDRV_ACTION_STOP, is_read); - vm_stop(RUN_STATE_IO_ERROR); - bdrv_iostatus_set_err(s->qdev.conf.bs, error); - scsi_req_retry(&r->req); - } else { + if (action == BDRV_ACTION_REPORT) { switch (error) { case ENOMEDIUM: scsi_check_condition(r, SENSE_CODE(NO_MEDIUM)); @@ -417,9 +405,12 @@ static int scsi_handle_rw_error(SCSIDiskReq *r, int error) scsi_check_condition(r, SENSE_CODE(IO_ERROR)); break; } - bdrv_emit_qmp_error_event(s->qdev.conf.bs, BDRV_ACTION_REPORT, is_read); } - return 1; + bdrv_error_action(s->qdev.conf.bs, action, is_read, error); + if (action == BDRV_ACTION_STOP) { + scsi_req_retry(&r->req); + } + return action != BDRV_ACTION_IGNORE; } static void scsi_write_complete(void * opaque, int ret) diff --git a/hw/scsi-generic.c b/hw/scsi-generic.c index a5eb663ecf..d9045341ba 100644 --- a/hw/scsi-generic.c +++ b/hw/scsi-generic.c @@ -400,11 +400,11 @@ static int scsi_generic_initfn(SCSIDevice *s) return -1; } - if (bdrv_get_on_error(s->conf.bs, 0) != BLOCK_ERR_STOP_ENOSPC) { + if (bdrv_get_on_error(s->conf.bs, 0) != BLOCKDEV_ON_ERROR_ENOSPC) { error_report("Device doesn't support drive option werror"); return -1; } - if (bdrv_get_on_error(s->conf.bs, 1) != BLOCK_ERR_REPORT) { + if (bdrv_get_on_error(s->conf.bs, 1) != BLOCKDEV_ON_ERROR_REPORT) { error_report("Device doesn't support drive option rerror"); return -1; } diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c index 6f6d172fd0..e25cc96477 100644 --- a/hw/virtio-blk.c +++ b/hw/virtio-blk.c @@ -64,31 +64,22 @@ static void virtio_blk_req_complete(VirtIOBlockReq *req, int status) } static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, - int is_read) + bool is_read) { - BlockErrorAction action = bdrv_get_on_error(req->dev->bs, is_read); + BlockErrorAction action = bdrv_get_error_action(req->dev->bs, is_read, error); VirtIOBlock *s = req->dev; - if (action == BLOCK_ERR_IGNORE) { - bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_IGNORE, is_read); - return 0; - } - - if ((error == ENOSPC && action == BLOCK_ERR_STOP_ENOSPC) - || action == BLOCK_ERR_STOP_ANY) { + if (action == BDRV_ACTION_STOP) { req->next = s->rq; s->rq = req; - bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_STOP, is_read); - vm_stop(RUN_STATE_IO_ERROR); - bdrv_iostatus_set_err(s->bs, error); - } else { + } else if (action == BDRV_ACTION_REPORT) { virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); bdrv_acct_done(s->bs, &req->acct); g_free(req); - bdrv_emit_qmp_error_event(s->bs, BDRV_ACTION_REPORT, is_read); } - return 1; + bdrv_error_action(s->bs, action, is_read, error); + return action != BDRV_ACTION_IGNORE; } static void virtio_blk_rw_complete(void *opaque, int ret) @@ -98,7 +89,7 @@ static void virtio_blk_rw_complete(void *opaque, int ret) trace_virtio_blk_rw_complete(req, ret); if (ret) { - int is_read = !(ldl_p(&req->out->type) & VIRTIO_BLK_T_OUT); + bool is_read = !(ldl_p(&req->out->type) & VIRTIO_BLK_T_OUT); if (virtio_blk_handle_rw_error(req, -ret, is_read)) return; } diff --git a/monitor.c b/monitor.c index cd735bbbeb..a0e3ffb924 100644 --- a/monitor.c +++ b/monitor.c @@ -450,6 +450,7 @@ static const char *monitor_event_names[] = { [QEVENT_SPICE_DISCONNECTED] = "SPICE_DISCONNECTED", [QEVENT_BLOCK_JOB_COMPLETED] = "BLOCK_JOB_COMPLETED", [QEVENT_BLOCK_JOB_CANCELLED] = "BLOCK_JOB_CANCELLED", + [QEVENT_BLOCK_JOB_ERROR] = "BLOCK_JOB_ERROR", [QEVENT_DEVICE_TRAY_MOVED] = "DEVICE_TRAY_MOVED", [QEVENT_SUSPEND] = "SUSPEND", [QEVENT_SUSPEND_DISK] = "SUSPEND_DISK", diff --git a/monitor.h b/monitor.h index e240c3f42d..b6e7d95a30 100644 --- a/monitor.h +++ b/monitor.h @@ -38,6 +38,7 @@ typedef enum MonitorEvent { QEVENT_SPICE_DISCONNECTED, QEVENT_BLOCK_JOB_COMPLETED, QEVENT_BLOCK_JOB_CANCELLED, + QEVENT_BLOCK_JOB_ERROR, QEVENT_DEVICE_TRAY_MOVED, QEVENT_SUSPEND, QEVENT_SUSPEND_DISK, diff --git a/qapi-schema.json b/qapi-schema.json index f4c21855cf..23abb94a1a 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1112,6 +1112,29 @@ ## { 'command': 'query-pci', 'returns': ['PciInfo'] } +## +# @BlockdevOnError: +# +# An enumeration of possible behaviors for errors on I/O operations. +# The exact meaning depends on whether the I/O was initiated by a guest +# or by a block job +# +# @report: for guest operations, report the error to the guest; +# for jobs, cancel the job +# +# @ignore: ignore the error, only report a QMP event (BLOCK_IO_ERROR +# or BLOCK_JOB_ERROR) +# +# @enospc: same as @stop on ENOSPC, same as @report otherwise. +# +# @stop: for guest operations, stop the virtual machine; +# for jobs, pause the job +# +# Since: 1.3 +## +{ 'enum': 'BlockdevOnError', + 'data': ['report', 'ignore', 'enospc', 'stop'] } + ## # @BlockJobInfo: # @@ -1123,15 +1146,24 @@ # # @len: the maximum progress value # +# @busy: false if the job is known to be in a quiescent state, with +# no pending I/O. Since 1.3. +# +# @paused: whether the job is paused or, if @busy is true, will +# pause itself as soon as possible. Since 1.3. +# # @offset: the current progress value # # @speed: the rate limit, bytes per second # +# @io-status: the status of the job (since 1.3) +# # Since: 1.1 ## { 'type': 'BlockJobInfo', 'data': {'type': 'str', 'device': 'str', 'len': 'int', - 'offset': 'int', 'speed': 'int'} } + 'offset': 'int', 'busy': 'bool', 'paused': 'bool', 'speed': 'int', + 'io-status': 'BlockDeviceIoStatus'} } ## # @query-block-jobs: @@ -1493,6 +1525,40 @@ 'returns': 'str' } ## +# @block-commit +# +# Live commit of data from overlay image nodes into backing nodes - i.e., +# writes data between 'top' and 'base' into 'base'. +# +# @device: the name of the device +# +# @base: #optional The file name of the backing image to write data into. +# If not specified, this is the deepest backing image +# +# @top: The file name of the backing image within the image chain, +# which contains the topmost data to be committed down. +# Note, the active layer as 'top' is currently unsupported. +# +# If top == base, that is an error. +# +# +# @speed: #optional the maximum speed, in bytes per second +# +# Returns: Nothing on success +# If commit or stream is already active on this device, DeviceInUse +# If @device does not exist, DeviceNotFound +# If image commit is not supported by this device, NotSupported +# If @base or @top is invalid, a generic error is returned +# If @top is the active layer, or omitted, a generic error is returned +# If @speed is invalid, InvalidParameter +# +# Since: 1.3 +# +## +{ 'command': 'block-commit', + 'data': { 'device': 'str', '*base': 'str', 'top': 'str', + '*speed': 'int' } } + # @migrate_cancel # # Cancel the current executing migration process. @@ -1828,13 +1894,18 @@ # # @speed: #optional the maximum speed, in bytes per second # +# @on-error: #optional the action to take on an error (default report). +# 'stop' and 'enospc' can only be used if the block device +# supports io-status (see BlockInfo). Since 1.3. +# # Returns: Nothing on success # If @device does not exist, DeviceNotFound # # Since: 1.1 ## -{ 'command': 'block-stream', 'data': { 'device': 'str', '*base': 'str', - '*speed': 'int' } } +{ 'command': 'block-stream', + 'data': { 'device': 'str', '*base': 'str', '*speed': 'int', + '*on-error': 'BlockdevOnError' } } ## # @block-job-set-speed: @@ -1878,12 +1949,58 @@ # # @device: the device name # +# @force: #optional whether to allow cancellation of a paused job (default +# false). Since 1.3. +# # Returns: Nothing on success # If no background operation is active on this device, DeviceNotActive # # Since: 1.1 ## -{ 'command': 'block-job-cancel', 'data': { 'device': 'str' } } +{ 'command': 'block-job-cancel', 'data': { 'device': 'str', '*force': 'bool' } } + +## +# @block-job-pause: +# +# Pause an active background block operation. +# +# This command returns immediately after marking the active background block +# operation for pausing. It is an error to call this command if no +# operation is in progress. Pausing an already paused job has no cumulative +# effect; a single block-job-resume command will resume the job. +# +# The operation will pause as soon as possible. No event is emitted when +# the operation is actually paused. Cancelling a paused job automatically +# resumes it. +# +# @device: the device name +# +# Returns: Nothing on success +# If no background operation is active on this device, DeviceNotActive +# +# Since: 1.3 +## +{ 'command': 'block-job-pause', 'data': { 'device': 'str' } } + +## +# @block-job-resume: +# +# Resume an active background block operation. +# +# This command returns immediately after resuming a paused background block +# operation. It is an error to call this command if no operation is in +# progress. Resuming an already running job is not an error. +# +# This command also clears the error status of the job. +# +# @device: the device name +# +# Returns: Nothing on success +# If no background operation is active on this device, DeviceNotActive +# +# Since: 1.3 +## +{ 'command': 'block-job-resume', 'data': { 'device': 'str' } } ## # @ObjectTypeInfo: diff --git a/qemu-tool.c b/qemu-tool.c index 18205babab..f2f98138ce 100644 --- a/qemu-tool.c +++ b/qemu-tool.c @@ -19,6 +19,7 @@ #include "qemu-log.h" #include "migration.h" #include "main-loop.h" +#include "sysemu.h" #include "qemu_socket.h" #include "slirp/libslirp.h" @@ -37,6 +38,11 @@ const char *qemu_get_vm_name(void) Monitor *cur_mon; +void vm_stop(RunState state) +{ + abort(); +} + int monitor_cur_is_qmp(void) { return 0; diff --git a/qerror.h b/qerror.h index d0a76a4f71..c91708cc3c 100644 --- a/qerror.h +++ b/qerror.h @@ -48,6 +48,12 @@ void assert_no_error(Error *err); #define QERR_BASE_NOT_FOUND \ ERROR_CLASS_GENERIC_ERROR, "Base '%s' not found" +#define QERR_BLOCK_JOB_NOT_ACTIVE \ + ERROR_CLASS_DEVICE_NOT_ACTIVE, "No active block job on device '%s'" + +#define QERR_BLOCK_JOB_PAUSED \ + ERROR_CLASS_GENERIC_ERROR, "The block job for device '%s' is currently paused" + #define QERR_BLOCK_FORMAT_FEATURE_NOT_SUPPORTED \ ERROR_CLASS_GENERIC_ERROR, "Block format '%s' used by device '%s' does not support feature '%s'" diff --git a/qmp-commands.hx b/qmp-commands.hx index 36e08d9ffc..1aef27ca14 100644 --- a/qmp-commands.hx +++ b/qmp-commands.hx @@ -787,10 +787,16 @@ EQMP { .name = "block-stream", - .args_type = "device:B,base:s?,speed:o?", + .args_type = "device:B,base:s?,speed:o?,on-error:s?", .mhandler.cmd_new = qmp_marshal_input_block_stream, }, + { + .name = "block-commit", + .args_type = "device:B,base:s?,top:s,speed:o?", + .mhandler.cmd_new = qmp_marshal_input_block_commit, + }, + { .name = "block-job-set-speed", .args_type = "device:B,speed:o", @@ -799,9 +805,19 @@ EQMP { .name = "block-job-cancel", - .args_type = "device:B", + .args_type = "device:B,force:b?", .mhandler.cmd_new = qmp_marshal_input_block_job_cancel, }, + { + .name = "block-job-pause", + .args_type = "device:B", + .mhandler.cmd_new = qmp_marshal_input_block_job_pause, + }, + { + .name = "block-job-resume", + .args_type = "device:B", + .mhandler.cmd_new = qmp_marshal_input_block_job_resume, + }, { .name = "transaction", .args_type = "actions:q", diff --git a/tests/qemu-iotests/030 b/tests/qemu-iotests/030 index 55b16f81dd..dd4ef11996 100755 --- a/tests/qemu-iotests/030 +++ b/tests/qemu-iotests/030 @@ -18,6 +18,7 @@ # along with this program. If not, see . # +import time import os import iotests from iotests import qemu_img, qemu_io @@ -98,6 +99,43 @@ class TestSingleDrive(ImageStreamingTestCase): qemu_io('-c', 'map', test_img), 'image file map does not match backing file after streaming') + def test_stream_pause(self): + self.assert_no_active_streams() + + result = self.vm.qmp('block-stream', device='drive0') + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('block-job-pause', device='drive0') + self.assert_qmp(result, 'return', {}) + + time.sleep(1) + result = self.vm.qmp('query-block-jobs') + offset = self.dictpath(result, 'return[0]/offset') + + time.sleep(1) + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/offset', offset) + + result = self.vm.qmp('block-job-resume', device='drive0') + self.assert_qmp(result, 'return', {}) + + completed = False + while not completed: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_COMPLETED': + self.assert_qmp(event, 'data/type', 'stream') + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/offset', self.image_len) + self.assert_qmp(event, 'data/len', self.image_len) + completed = True + + self.assert_no_active_streams() + self.vm.shutdown() + + self.assertEqual(qemu_io('-c', 'map', backing_img), + qemu_io('-c', 'map', test_img), + 'image file map does not match backing file after streaming') + def test_stream_partial(self): self.assert_no_active_streams() @@ -157,6 +195,226 @@ class TestSmallerBackingFile(ImageStreamingTestCase): self.assert_no_active_streams() self.vm.shutdown() +class TestErrors(ImageStreamingTestCase): + image_len = 2 * 1024 * 1024 # MB + + # this should match STREAM_BUFFER_SIZE/512 in block/stream.c + STREAM_BUFFER_SIZE = 512 * 1024 + + def create_blkdebug_file(self, name, event, errno): + file = open(name, 'w') + file.write(''' +[inject-error] +state = "1" +event = "%s" +errno = "%d" +immediately = "off" +once = "on" +sector = "%d" + +[set-state] +state = "1" +event = "%s" +new_state = "2" + +[set-state] +state = "2" +event = "%s" +new_state = "1" +''' % (event, errno, self.STREAM_BUFFER_SIZE / 512, event, event)) + file.close() + +class TestEIO(TestErrors): + def setUp(self): + self.blkdebug_file = backing_img + ".blkdebug" + self.create_image(backing_img, TestErrors.image_len) + self.create_blkdebug_file(self.blkdebug_file, "read_aio", 5) + qemu_img('create', '-f', iotests.imgfmt, + '-o', 'backing_file=blkdebug:%s:%s,backing_fmt=raw' + % (self.blkdebug_file, backing_img), + test_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(test_img) + os.remove(backing_img) + os.remove(self.blkdebug_file) + + def test_report(self): + self.assert_no_active_streams() + + result = self.vm.qmp('block-stream', device='drive0') + self.assert_qmp(result, 'return', {}) + + completed = False + error = False + while not completed: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_ERROR': + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/operation', 'read') + error = True + elif event['event'] == 'BLOCK_JOB_COMPLETED': + self.assertTrue(error, 'job completed unexpectedly') + self.assert_qmp(event, 'data/type', 'stream') + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/error', 'Input/output error') + self.assert_qmp(event, 'data/offset', self.STREAM_BUFFER_SIZE) + self.assert_qmp(event, 'data/len', self.image_len) + completed = True + + self.assert_no_active_streams() + self.vm.shutdown() + + def test_ignore(self): + self.assert_no_active_streams() + + result = self.vm.qmp('block-stream', device='drive0', on_error='ignore') + self.assert_qmp(result, 'return', {}) + + error = False + completed = False + while not completed: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_ERROR': + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/operation', 'read') + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/paused', False) + error = True + elif event['event'] == 'BLOCK_JOB_COMPLETED': + self.assertTrue(error, 'job completed unexpectedly') + self.assert_qmp(event, 'data/type', 'stream') + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/error', 'Input/output error') + self.assert_qmp(event, 'data/offset', self.image_len) + self.assert_qmp(event, 'data/len', self.image_len) + completed = True + + self.assert_no_active_streams() + self.vm.shutdown() + + def test_stop(self): + self.assert_no_active_streams() + + result = self.vm.qmp('block-stream', device='drive0', on_error='stop') + self.assert_qmp(result, 'return', {}) + + error = False + completed = False + while not completed: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_ERROR': + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/operation', 'read') + + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/paused', True) + self.assert_qmp(result, 'return[0]/offset', self.STREAM_BUFFER_SIZE) + self.assert_qmp(result, 'return[0]/io-status', 'failed') + + result = self.vm.qmp('block-job-resume', device='drive0') + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/paused', False) + self.assert_qmp(result, 'return[0]/io-status', 'ok') + error = True + elif event['event'] == 'BLOCK_JOB_COMPLETED': + self.assertTrue(error, 'job completed unexpectedly') + self.assert_qmp(event, 'data/type', 'stream') + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp_absent(event, 'data/error') + self.assert_qmp(event, 'data/offset', self.image_len) + self.assert_qmp(event, 'data/len', self.image_len) + completed = True + + self.assert_no_active_streams() + self.vm.shutdown() + + def test_enospc(self): + self.assert_no_active_streams() + + result = self.vm.qmp('block-stream', device='drive0', on_error='enospc') + self.assert_qmp(result, 'return', {}) + + completed = False + error = False + while not completed: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_ERROR': + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/operation', 'read') + error = True + elif event['event'] == 'BLOCK_JOB_COMPLETED': + self.assertTrue(error, 'job completed unexpectedly') + self.assert_qmp(event, 'data/type', 'stream') + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/error', 'Input/output error') + self.assert_qmp(event, 'data/offset', self.STREAM_BUFFER_SIZE) + self.assert_qmp(event, 'data/len', self.image_len) + completed = True + + self.assert_no_active_streams() + self.vm.shutdown() + +class TestENOSPC(TestErrors): + def setUp(self): + self.blkdebug_file = backing_img + ".blkdebug" + self.create_image(backing_img, TestErrors.image_len) + self.create_blkdebug_file(self.blkdebug_file, "read_aio", 28) + qemu_img('create', '-f', iotests.imgfmt, + '-o', 'backing_file=blkdebug:%s:%s,backing_fmt=raw' + % (self.blkdebug_file, backing_img), + test_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(test_img) + os.remove(backing_img) + os.remove(self.blkdebug_file) + + def test_enospc(self): + self.assert_no_active_streams() + + result = self.vm.qmp('block-stream', device='drive0', on_error='enospc') + self.assert_qmp(result, 'return', {}) + + error = False + completed = False + while not completed: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_ERROR': + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/operation', 'read') + + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/paused', True) + self.assert_qmp(result, 'return[0]/offset', self.STREAM_BUFFER_SIZE) + self.assert_qmp(result, 'return[0]/io-status', 'nospace') + + result = self.vm.qmp('block-job-resume', device='drive0') + self.assert_qmp(result, 'return', {}) + + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/paused', False) + self.assert_qmp(result, 'return[0]/io-status', 'ok') + error = True + elif event['event'] == 'BLOCK_JOB_COMPLETED': + self.assertTrue(error, 'job completed unexpectedly') + self.assert_qmp(event, 'data/type', 'stream') + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp_absent(event, 'data/error') + self.assert_qmp(event, 'data/offset', self.image_len) + self.assert_qmp(event, 'data/len', self.image_len) + completed = True + + self.assert_no_active_streams() + self.vm.shutdown() class TestStreamStop(ImageStreamingTestCase): image_len = 8 * 1024 * 1024 * 1024 # GB @@ -173,8 +431,6 @@ class TestStreamStop(ImageStreamingTestCase): os.remove(backing_img) def test_stream_stop(self): - import time - self.assert_no_active_streams() result = self.vm.qmp('block-stream', device='drive0') diff --git a/tests/qemu-iotests/030.out b/tests/qemu-iotests/030.out index 2f7d3902f2..fa16b5ccef 100644 --- a/tests/qemu-iotests/030.out +++ b/tests/qemu-iotests/030.out @@ -1,5 +1,5 @@ -....... +............. ---------------------------------------------------------------------- -Ran 7 tests +Ran 13 tests OK diff --git a/tests/qemu-iotests/040 b/tests/qemu-iotests/040 new file mode 100755 index 0000000000..258e7eae38 --- /dev/null +++ b/tests/qemu-iotests/040 @@ -0,0 +1,178 @@ +#!/usr/bin/env python +# +# Tests for image block commit. +# +# Copyright (C) 2012 IBM, Corp. +# Copyright (C) 2012 Red Hat, Inc. +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . +# +# Test for live block commit +# Derived from Image Streaming Test 030 + +import time +import os +import iotests +from iotests import qemu_img, qemu_io +import struct + +backing_img = os.path.join(iotests.test_dir, 'backing.img') +mid_img = os.path.join(iotests.test_dir, 'mid.img') +test_img = os.path.join(iotests.test_dir, 'test.img') + +class ImageCommitTestCase(iotests.QMPTestCase): + '''Abstract base class for image commit test cases''' + + def assert_no_active_commit(self): + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return', []) + + def cancel_and_wait(self, drive='drive0'): + '''Cancel a block job and wait for it to finish''' + result = self.vm.qmp('block-job-cancel', device=drive) + self.assert_qmp(result, 'return', {}) + + cancelled = False + while not cancelled: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_CANCELLED': + self.assert_qmp(event, 'data/type', 'commit') + self.assert_qmp(event, 'data/device', drive) + cancelled = True + + self.assert_no_active_commit() + + def create_image(self, name, size): + file = open(name, 'w') + i = 0 + while i < size: + sector = struct.pack('>l504xl', i / 512, i / 512) + file.write(sector) + i = i + 512 + file.close() + + +class TestSingleDrive(ImageCommitTestCase): + image_len = 1 * 1024 * 1024 + test_len = 1 * 1024 * 256 + + def setUp(self): + self.create_image(backing_img, TestSingleDrive.image_len) + qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, mid_img) + qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % mid_img, test_img) + qemu_io('-c', 'write -P 0xab 0 524288', backing_img) + qemu_io('-c', 'write -P 0xef 524288 524288', mid_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(test_img) + os.remove(mid_img) + os.remove(backing_img) + + def test_commit(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0', top='%s' % mid_img) + self.assert_qmp(result, 'return', {}) + + completed = False + while not completed: + for event in self.vm.get_qmp_events(wait=True): + if event['event'] == 'BLOCK_JOB_COMPLETED': + self.assert_qmp(event, 'data/type', 'commit') + self.assert_qmp(event, 'data/device', 'drive0') + self.assert_qmp(event, 'data/offset', self.image_len) + self.assert_qmp(event, 'data/len', self.image_len) + completed = True + + self.assert_no_active_commit() + self.vm.shutdown() + + self.assertEqual(-1, qemu_io('-c', 'read -P 0xab 0 524288', backing_img).find("verification failed")) + self.assertEqual(-1, qemu_io('-c', 'read -P 0xef 524288 524288', backing_img).find("verification failed")) + + def test_device_not_found(self): + result = self.vm.qmp('block-commit', device='nonexistent', top='%s' % mid_img) + self.assert_qmp(result, 'error/class', 'DeviceNotFound') + + def test_top_same_base(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0', top='%s' % backing_img, base='%s' % backing_img) + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', 'Invalid files for merge: top and base are the same') + + def test_top_invalid(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0', top='badfile', base='%s' % backing_img) + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', 'Top image file badfile not found') + + def test_base_invalid(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0', top='%s' % mid_img, base='badfile') + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', 'Base \'badfile\' not found') + + def test_top_is_active(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0', top='%s' % test_img, base='%s' % backing_img) + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', 'Top image as the active layer is currently unsupported') + + def test_top_and_base_reversed(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0', top='%s' % backing_img, base='%s' % mid_img) + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', 'Base (%(1)s) is not reachable from top (%(2)s)' % {"1" : mid_img, "2" : backing_img}) + + def test_top_omitted(self): + self.assert_no_active_commit() + result = self.vm.qmp('block-commit', device='drive0') + self.assert_qmp(result, 'error/class', 'GenericError') + self.assert_qmp(result, 'error/desc', "Parameter 'top' is missing") + + +class TestSetSpeed(ImageCommitTestCase): + image_len = 80 * 1024 * 1024 # MB + + def setUp(self): + qemu_img('create', backing_img, str(TestSetSpeed.image_len)) + qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % backing_img, mid_img) + qemu_img('create', '-f', iotests.imgfmt, '-o', 'backing_file=%s' % mid_img, test_img) + self.vm = iotests.VM().add_drive(test_img) + self.vm.launch() + + def tearDown(self): + self.vm.shutdown() + os.remove(test_img) + os.remove(mid_img) + os.remove(backing_img) + + def test_set_speed(self): + self.assert_no_active_commit() + + result = self.vm.qmp('block-commit', device='drive0', top=mid_img, speed=1024 * 1024) + self.assert_qmp(result, 'return', {}) + + # Ensure the speed we set was accepted + result = self.vm.qmp('query-block-jobs') + self.assert_qmp(result, 'return[0]/device', 'drive0') + self.assert_qmp(result, 'return[0]/speed', 1024 * 1024) + + self.cancel_and_wait() + + +if __name__ == '__main__': + iotests.main(supported_fmts=['qcow2', 'qed']) diff --git a/tests/qemu-iotests/040.out b/tests/qemu-iotests/040.out new file mode 100644 index 0000000000..dae404e278 --- /dev/null +++ b/tests/qemu-iotests/040.out @@ -0,0 +1,5 @@ +......... +---------------------------------------------------------------------- +Ran 9 tests + +OK diff --git a/tests/qemu-iotests/group b/tests/qemu-iotests/group index ebb5ca4b41..66d2ba9689 100644 --- a/tests/qemu-iotests/group +++ b/tests/qemu-iotests/group @@ -36,7 +36,7 @@ 027 rw auto quick 028 rw backing auto 029 rw auto quick -030 rw auto +030 rw auto backing 031 rw auto quick 032 rw auto 033 rw auto @@ -46,3 +46,4 @@ 037 rw auto backing 038 rw auto backing 039 rw auto +040 rw auto diff --git a/tests/qemu-iotests/iotests.py b/tests/qemu-iotests/iotests.py index e05b1d640b..3c60b2d163 100644 --- a/tests/qemu-iotests/iotests.py +++ b/tests/qemu-iotests/iotests.py @@ -19,6 +19,7 @@ import os import re import subprocess +import string import unittest import sys; sys.path.append(os.path.join(os.path.dirname(__file__), '..', '..', 'QMP')) import qmp @@ -96,9 +97,14 @@ class VM(object): os.remove(self._qemu_log_path) self._popen = None + underscore_to_dash = string.maketrans('_', '-') def qmp(self, cmd, **args): '''Invoke a QMP command and return the result dict''' - return self._qmp.cmd(cmd, args=args) + qmp_args = dict() + for k in args.keys(): + qmp_args[k.translate(self.underscore_to_dash)] = args[k] + + return self._qmp.cmd(cmd, args=qmp_args) def get_qmp_events(self, wait=False): '''Poll for queued QMP events and return a list of dicts''' @@ -132,6 +138,13 @@ class QMPTestCase(unittest.TestCase): self.fail('invalid index "%s" in path "%s" in "%s"' % (idx, path, str(d))) return d + def assert_qmp_absent(self, d, path): + try: + result = self.dictpath(d, path) + except AssertionError: + return + self.fail('path "%s" has value "%s"' % (path, str(result))) + def assert_qmp(self, d, path, value): '''Assert that the value for a specific path in a QMP dict matches''' result = self.dictpath(d, path) diff --git a/trace-events b/trace-events index f5b5097552..42b66f19f4 100644 --- a/trace-events +++ b/trace-events @@ -74,10 +74,14 @@ bdrv_co_do_copy_on_readv(void *bs, int64_t sector_num, int nb_sectors, int64_t c # block/stream.c stream_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d" stream_start(void *bs, void *base, void *s, void *co, void *opaque) "bs %p base %p s %p co %p opaque %p" +commit_one_iteration(void *s, int64_t sector_num, int nb_sectors, int is_allocated) "s %p sector_num %"PRId64" nb_sectors %d is_allocated %d" +commit_start(void *bs, void *base, void *top, void *s, void *co, void *opaque) "bs %p base %p top %p s %p co %p opaque %p" # blockdev.c qmp_block_job_cancel(void *job) "job %p" -block_stream_cb(void *bs, void *job, int ret) "bs %p job %p ret %d" +qmp_block_job_pause(void *job) "job %p" +qmp_block_job_resume(void *job) "job %p" +block_job_cb(void *bs, void *job, int ret) "bs %p job %p ret %d" qmp_block_stream(void *bs, void *job) "bs %p job %p" # hw/virtio-blk.c diff --git a/uri.c b/uri.c new file mode 100644 index 0000000000..dd922de339 --- /dev/null +++ b/uri.c @@ -0,0 +1,2249 @@ +/** + * uri.c: set of generic URI related routines + * + * Reference: RFCs 3986, 2732 and 2373 + * + * Copyright (C) 1998-2003 Daniel Veillard. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Except as contained in this notice, the name of Daniel Veillard shall not + * be used in advertising or otherwise to promote the sale, use or other + * dealings in this Software without prior written authorization from him. + * + * daniel@veillard.com + * + ** + * + * Copyright (C) 2007, 2009-2010 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Authors: + * Richard W.M. Jones + * + */ + +#include +#include +#include + +#include "uri.h" + +static void uri_clean(URI *uri); + +/* + * Old rule from 2396 used in legacy handling code + * alpha = lowalpha | upalpha + */ +#define IS_ALPHA(x) (IS_LOWALPHA(x) || IS_UPALPHA(x)) + + +/* + * lowalpha = "a" | "b" | "c" | "d" | "e" | "f" | "g" | "h" | "i" | "j" | + * "k" | "l" | "m" | "n" | "o" | "p" | "q" | "r" | "s" | "t" | + * "u" | "v" | "w" | "x" | "y" | "z" + */ + +#define IS_LOWALPHA(x) (((x) >= 'a') && ((x) <= 'z')) + +/* + * upalpha = "A" | "B" | "C" | "D" | "E" | "F" | "G" | "H" | "I" | "J" | + * "K" | "L" | "M" | "N" | "O" | "P" | "Q" | "R" | "S" | "T" | + * "U" | "V" | "W" | "X" | "Y" | "Z" + */ +#define IS_UPALPHA(x) (((x) >= 'A') && ((x) <= 'Z')) + +#ifdef IS_DIGIT +#undef IS_DIGIT +#endif +/* + * digit = "0" | "1" | "2" | "3" | "4" | "5" | "6" | "7" | "8" | "9" + */ +#define IS_DIGIT(x) (((x) >= '0') && ((x) <= '9')) + +/* + * alphanum = alpha | digit + */ + +#define IS_ALPHANUM(x) (IS_ALPHA(x) || IS_DIGIT(x)) + +/* + * mark = "-" | "_" | "." | "!" | "~" | "*" | "'" | "(" | ")" + */ + +#define IS_MARK(x) (((x) == '-') || ((x) == '_') || ((x) == '.') || \ + ((x) == '!') || ((x) == '~') || ((x) == '*') || ((x) == '\'') || \ + ((x) == '(') || ((x) == ')')) + +/* + * unwise = "{" | "}" | "|" | "\" | "^" | "`" + */ + +#define IS_UNWISE(p) \ + (((*(p) == '{')) || ((*(p) == '}')) || ((*(p) == '|')) || \ + ((*(p) == '\\')) || ((*(p) == '^')) || ((*(p) == '[')) || \ + ((*(p) == ']')) || ((*(p) == '`'))) +/* + * reserved = ";" | "/" | "?" | ":" | "@" | "&" | "=" | "+" | "$" | "," | + * "[" | "]" + */ + +#define IS_RESERVED(x) (((x) == ';') || ((x) == '/') || ((x) == '?') || \ + ((x) == ':') || ((x) == '@') || ((x) == '&') || ((x) == '=') || \ + ((x) == '+') || ((x) == '$') || ((x) == ',') || ((x) == '[') || \ + ((x) == ']')) + +/* + * unreserved = alphanum | mark + */ + +#define IS_UNRESERVED(x) (IS_ALPHANUM(x) || IS_MARK(x)) + +/* + * Skip to next pointer char, handle escaped sequences + */ + +#define NEXT(p) ((*p == '%')? p += 3 : p++) + +/* + * Productions from the spec. + * + * authority = server | reg_name + * reg_name = 1*( unreserved | escaped | "$" | "," | + * ";" | ":" | "@" | "&" | "=" | "+" ) + * + * path = [ abs_path | opaque_part ] + */ + + +/************************************************************************ + * * + * RFC 3986 parser * + * * + ************************************************************************/ + +#define ISA_DIGIT(p) ((*(p) >= '0') && (*(p) <= '9')) +#define ISA_ALPHA(p) (((*(p) >= 'a') && (*(p) <= 'z')) || \ + ((*(p) >= 'A') && (*(p) <= 'Z'))) +#define ISA_HEXDIG(p) \ + (ISA_DIGIT(p) || ((*(p) >= 'a') && (*(p) <= 'f')) || \ + ((*(p) >= 'A') && (*(p) <= 'F'))) + +/* + * sub-delims = "!" / "$" / "&" / "'" / "(" / ")" + * / "*" / "+" / "," / ";" / "=" + */ +#define ISA_SUB_DELIM(p) \ + (((*(p) == '!')) || ((*(p) == '$')) || ((*(p) == '&')) || \ + ((*(p) == '(')) || ((*(p) == ')')) || ((*(p) == '*')) || \ + ((*(p) == '+')) || ((*(p) == ',')) || ((*(p) == ';')) || \ + ((*(p) == '=')) || ((*(p) == '\''))) + +/* + * gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@" + */ +#define ISA_GEN_DELIM(p) \ + (((*(p) == ':')) || ((*(p) == '/')) || ((*(p) == '?')) || \ + ((*(p) == '#')) || ((*(p) == '[')) || ((*(p) == ']')) || \ + ((*(p) == '@'))) + +/* + * reserved = gen-delims / sub-delims + */ +#define ISA_RESERVED(p) (ISA_GEN_DELIM(p) || (ISA_SUB_DELIM(p))) + +/* + * unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~" + */ +#define ISA_UNRESERVED(p) \ + ((ISA_ALPHA(p)) || (ISA_DIGIT(p)) || ((*(p) == '-')) || \ + ((*(p) == '.')) || ((*(p) == '_')) || ((*(p) == '~'))) + +/* + * pct-encoded = "%" HEXDIG HEXDIG + */ +#define ISA_PCT_ENCODED(p) \ + ((*(p) == '%') && (ISA_HEXDIG(p + 1)) && (ISA_HEXDIG(p + 2))) + +/* + * pchar = unreserved / pct-encoded / sub-delims / ":" / "@" + */ +#define ISA_PCHAR(p) \ + (ISA_UNRESERVED(p) || ISA_PCT_ENCODED(p) || ISA_SUB_DELIM(p) || \ + ((*(p) == ':')) || ((*(p) == '@'))) + +/** + * rfc3986_parse_scheme: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * + * Parse an URI scheme + * + * ALPHA *( ALPHA / DIGIT / "+" / "-" / "." ) + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_scheme(URI *uri, const char **str) { + const char *cur; + + if (str == NULL) + return(-1); + + cur = *str; + if (!ISA_ALPHA(cur)) + return(2); + cur++; + while (ISA_ALPHA(cur) || ISA_DIGIT(cur) || + (*cur == '+') || (*cur == '-') || (*cur == '.')) cur++; + if (uri != NULL) { + if (uri->scheme != NULL) g_free(uri->scheme); + uri->scheme = g_strndup(*str, cur - *str); + } + *str = cur; + return(0); +} + +/** + * rfc3986_parse_fragment: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * + * Parse the query part of an URI + * + * fragment = *( pchar / "/" / "?" ) + * NOTE: the strict syntax as defined by 3986 does not allow '[' and ']' + * in the fragment identifier but this is used very broadly for + * xpointer scheme selection, so we are allowing it here to not break + * for example all the DocBook processing chains. + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_fragment(URI *uri, const char **str) +{ + const char *cur; + + if (str == NULL) + return (-1); + + cur = *str; + + while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') || + (*cur == '[') || (*cur == ']') || + ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) + NEXT(cur); + if (uri != NULL) { + if (uri->fragment != NULL) + g_free(uri->fragment); + if (uri->cleanup & 2) + uri->fragment = g_strndup(*str, cur - *str); + else + uri->fragment = uri_string_unescape(*str, cur - *str, NULL); + } + *str = cur; + return (0); +} + +/** + * rfc3986_parse_query: + * @uri: pointer to an URI structure + * @str: pointer to the string to analyze + * + * Parse the query part of an URI + * + * query = *uric + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_query(URI *uri, const char **str) +{ + const char *cur; + + if (str == NULL) + return (-1); + + cur = *str; + + while ((ISA_PCHAR(cur)) || (*cur == '/') || (*cur == '?') || + ((uri != NULL) && (uri->cleanup & 1) && (IS_UNWISE(cur)))) + NEXT(cur); + if (uri != NULL) { + if (uri->query != NULL) + g_free (uri->query); + uri->query = g_strndup (*str, cur - *str); + } + *str = cur; + return (0); +} + +/** + * rfc3986_parse_port: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse a port part and fills in the appropriate fields + * of the @uri structure + * + * port = *DIGIT + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_port(URI *uri, const char **str) +{ + const char *cur = *str; + + if (ISA_DIGIT(cur)) { + if (uri != NULL) + uri->port = 0; + while (ISA_DIGIT(cur)) { + if (uri != NULL) + uri->port = uri->port * 10 + (*cur - '0'); + cur++; + } + *str = cur; + return(0); + } + return(1); +} + +/** + * rfc3986_parse_user_info: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an user informations part and fills in the appropriate fields + * of the @uri structure + * + * userinfo = *( unreserved / pct-encoded / sub-delims / ":" ) + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_user_info(URI *uri, const char **str) +{ + const char *cur; + + cur = *str; + while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || + ISA_SUB_DELIM(cur) || (*cur == ':')) + NEXT(cur); + if (*cur == '@') { + if (uri != NULL) { + if (uri->user != NULL) g_free(uri->user); + if (uri->cleanup & 2) + uri->user = g_strndup(*str, cur - *str); + else + uri->user = uri_string_unescape(*str, cur - *str, NULL); + } + *str = cur; + return(0); + } + return(1); +} + +/** + * rfc3986_parse_dec_octet: + * @str: the string to analyze + * + * dec-octet = DIGIT ; 0-9 + * / %x31-39 DIGIT ; 10-99 + * / "1" 2DIGIT ; 100-199 + * / "2" %x30-34 DIGIT ; 200-249 + * / "25" %x30-35 ; 250-255 + * + * Skip a dec-octet. + * + * Returns 0 if found and skipped, 1 otherwise + */ +static int +rfc3986_parse_dec_octet(const char **str) { + const char *cur = *str; + + if (!(ISA_DIGIT(cur))) + return(1); + if (!ISA_DIGIT(cur+1)) + cur++; + else if ((*cur != '0') && (ISA_DIGIT(cur + 1)) && (!ISA_DIGIT(cur+2))) + cur += 2; + else if ((*cur == '1') && (ISA_DIGIT(cur + 1)) && (ISA_DIGIT(cur + 2))) + cur += 3; + else if ((*cur == '2') && (*(cur + 1) >= '0') && + (*(cur + 1) <= '4') && (ISA_DIGIT(cur + 2))) + cur += 3; + else if ((*cur == '2') && (*(cur + 1) == '5') && + (*(cur + 2) >= '0') && (*(cur + 1) <= '5')) + cur += 3; + else + return(1); + *str = cur; + return(0); +} +/** + * rfc3986_parse_host: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an host part and fills in the appropriate fields + * of the @uri structure + * + * host = IP-literal / IPv4address / reg-name + * IP-literal = "[" ( IPv6address / IPvFuture ) "]" + * IPv4address = dec-octet "." dec-octet "." dec-octet "." dec-octet + * reg-name = *( unreserved / pct-encoded / sub-delims ) + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_host(URI *uri, const char **str) +{ + const char *cur = *str; + const char *host; + + host = cur; + /* + * IPv6 and future adressing scheme are enclosed between brackets + */ + if (*cur == '[') { + cur++; + while ((*cur != ']') && (*cur != 0)) + cur++; + if (*cur != ']') + return(1); + cur++; + goto found; + } + /* + * try to parse an IPv4 + */ + if (ISA_DIGIT(cur)) { + if (rfc3986_parse_dec_octet(&cur) != 0) + goto not_ipv4; + if (*cur != '.') + goto not_ipv4; + cur++; + if (rfc3986_parse_dec_octet(&cur) != 0) + goto not_ipv4; + if (*cur != '.') + goto not_ipv4; + if (rfc3986_parse_dec_octet(&cur) != 0) + goto not_ipv4; + if (*cur != '.') + goto not_ipv4; + if (rfc3986_parse_dec_octet(&cur) != 0) + goto not_ipv4; + goto found; +not_ipv4: + cur = *str; + } + /* + * then this should be a hostname which can be empty + */ + while (ISA_UNRESERVED(cur) || ISA_PCT_ENCODED(cur) || ISA_SUB_DELIM(cur)) + NEXT(cur); +found: + if (uri != NULL) { + if (uri->authority != NULL) g_free(uri->authority); + uri->authority = NULL; + if (uri->server != NULL) g_free(uri->server); + if (cur != host) { + if (uri->cleanup & 2) + uri->server = g_strndup(host, cur - host); + else + uri->server = uri_string_unescape(host, cur - host, NULL); + } else + uri->server = NULL; + } + *str = cur; + return(0); +} + +/** + * rfc3986_parse_authority: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an authority part and fills in the appropriate fields + * of the @uri structure + * + * authority = [ userinfo "@" ] host [ ":" port ] + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_authority(URI *uri, const char **str) +{ + const char *cur; + int ret; + + cur = *str; + /* + * try to parse an userinfo and check for the trailing @ + */ + ret = rfc3986_parse_user_info(uri, &cur); + if ((ret != 0) || (*cur != '@')) + cur = *str; + else + cur++; + ret = rfc3986_parse_host(uri, &cur); + if (ret != 0) return(ret); + if (*cur == ':') { + cur++; + ret = rfc3986_parse_port(uri, &cur); + if (ret != 0) return(ret); + } + *str = cur; + return(0); +} + +/** + * rfc3986_parse_segment: + * @str: the string to analyze + * @forbid: an optional forbidden character + * @empty: allow an empty segment + * + * Parse a segment and fills in the appropriate fields + * of the @uri structure + * + * segment = *pchar + * segment-nz = 1*pchar + * segment-nz-nc = 1*( unreserved / pct-encoded / sub-delims / "@" ) + * ; non-zero-length segment without any colon ":" + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_segment(const char **str, char forbid, int empty) +{ + const char *cur; + + cur = *str; + if (!ISA_PCHAR(cur)) { + if (empty) + return(0); + return(1); + } + while (ISA_PCHAR(cur) && (*cur != forbid)) + NEXT(cur); + *str = cur; + return (0); +} + +/** + * rfc3986_parse_path_ab_empty: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an path absolute or empty and fills in the appropriate fields + * of the @uri structure + * + * path-abempty = *( "/" segment ) + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_path_ab_empty(URI *uri, const char **str) +{ + const char *cur; + int ret; + + cur = *str; + + while (*cur == '/') { + cur++; + ret = rfc3986_parse_segment(&cur, 0, 1); + if (ret != 0) return(ret); + } + if (uri != NULL) { + if (uri->path != NULL) g_free(uri->path); + if (*str != cur) { + if (uri->cleanup & 2) + uri->path = g_strndup(*str, cur - *str); + else + uri->path = uri_string_unescape(*str, cur - *str, NULL); + } else { + uri->path = NULL; + } + } + *str = cur; + return (0); +} + +/** + * rfc3986_parse_path_absolute: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an path absolute and fills in the appropriate fields + * of the @uri structure + * + * path-absolute = "/" [ segment-nz *( "/" segment ) ] + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_path_absolute(URI *uri, const char **str) +{ + const char *cur; + int ret; + + cur = *str; + + if (*cur != '/') + return(1); + cur++; + ret = rfc3986_parse_segment(&cur, 0, 0); + if (ret == 0) { + while (*cur == '/') { + cur++; + ret = rfc3986_parse_segment(&cur, 0, 1); + if (ret != 0) return(ret); + } + } + if (uri != NULL) { + if (uri->path != NULL) g_free(uri->path); + if (cur != *str) { + if (uri->cleanup & 2) + uri->path = g_strndup(*str, cur - *str); + else + uri->path = uri_string_unescape(*str, cur - *str, NULL); + } else { + uri->path = NULL; + } + } + *str = cur; + return (0); +} + +/** + * rfc3986_parse_path_rootless: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an path without root and fills in the appropriate fields + * of the @uri structure + * + * path-rootless = segment-nz *( "/" segment ) + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_path_rootless(URI *uri, const char **str) +{ + const char *cur; + int ret; + + cur = *str; + + ret = rfc3986_parse_segment(&cur, 0, 0); + if (ret != 0) return(ret); + while (*cur == '/') { + cur++; + ret = rfc3986_parse_segment(&cur, 0, 1); + if (ret != 0) return(ret); + } + if (uri != NULL) { + if (uri->path != NULL) g_free(uri->path); + if (cur != *str) { + if (uri->cleanup & 2) + uri->path = g_strndup(*str, cur - *str); + else + uri->path = uri_string_unescape(*str, cur - *str, NULL); + } else { + uri->path = NULL; + } + } + *str = cur; + return (0); +} + +/** + * rfc3986_parse_path_no_scheme: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an path which is not a scheme and fills in the appropriate fields + * of the @uri structure + * + * path-noscheme = segment-nz-nc *( "/" segment ) + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_path_no_scheme(URI *uri, const char **str) +{ + const char *cur; + int ret; + + cur = *str; + + ret = rfc3986_parse_segment(&cur, ':', 0); + if (ret != 0) return(ret); + while (*cur == '/') { + cur++; + ret = rfc3986_parse_segment(&cur, 0, 1); + if (ret != 0) return(ret); + } + if (uri != NULL) { + if (uri->path != NULL) g_free(uri->path); + if (cur != *str) { + if (uri->cleanup & 2) + uri->path = g_strndup(*str, cur - *str); + else + uri->path = uri_string_unescape(*str, cur - *str, NULL); + } else { + uri->path = NULL; + } + } + *str = cur; + return (0); +} + +/** + * rfc3986_parse_hier_part: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an hierarchical part and fills in the appropriate fields + * of the @uri structure + * + * hier-part = "//" authority path-abempty + * / path-absolute + * / path-rootless + * / path-empty + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_hier_part(URI *uri, const char **str) +{ + const char *cur; + int ret; + + cur = *str; + + if ((*cur == '/') && (*(cur + 1) == '/')) { + cur += 2; + ret = rfc3986_parse_authority(uri, &cur); + if (ret != 0) return(ret); + ret = rfc3986_parse_path_ab_empty(uri, &cur); + if (ret != 0) return(ret); + *str = cur; + return(0); + } else if (*cur == '/') { + ret = rfc3986_parse_path_absolute(uri, &cur); + if (ret != 0) return(ret); + } else if (ISA_PCHAR(cur)) { + ret = rfc3986_parse_path_rootless(uri, &cur); + if (ret != 0) return(ret); + } else { + /* path-empty is effectively empty */ + if (uri != NULL) { + if (uri->path != NULL) g_free(uri->path); + uri->path = NULL; + } + } + *str = cur; + return (0); +} + +/** + * rfc3986_parse_relative_ref: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an URI string and fills in the appropriate fields + * of the @uri structure + * + * relative-ref = relative-part [ "?" query ] [ "#" fragment ] + * relative-part = "//" authority path-abempty + * / path-absolute + * / path-noscheme + * / path-empty + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_relative_ref(URI *uri, const char *str) { + int ret; + + if ((*str == '/') && (*(str + 1) == '/')) { + str += 2; + ret = rfc3986_parse_authority(uri, &str); + if (ret != 0) return(ret); + ret = rfc3986_parse_path_ab_empty(uri, &str); + if (ret != 0) return(ret); + } else if (*str == '/') { + ret = rfc3986_parse_path_absolute(uri, &str); + if (ret != 0) return(ret); + } else if (ISA_PCHAR(str)) { + ret = rfc3986_parse_path_no_scheme(uri, &str); + if (ret != 0) return(ret); + } else { + /* path-empty is effectively empty */ + if (uri != NULL) { + if (uri->path != NULL) g_free(uri->path); + uri->path = NULL; + } + } + + if (*str == '?') { + str++; + ret = rfc3986_parse_query(uri, &str); + if (ret != 0) return(ret); + } + if (*str == '#') { + str++; + ret = rfc3986_parse_fragment(uri, &str); + if (ret != 0) return(ret); + } + if (*str != 0) { + uri_clean(uri); + return(1); + } + return(0); +} + + +/** + * rfc3986_parse: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an URI string and fills in the appropriate fields + * of the @uri structure + * + * scheme ":" hier-part [ "?" query ] [ "#" fragment ] + * + * Returns 0 or the error code + */ +static int +rfc3986_parse(URI *uri, const char *str) { + int ret; + + ret = rfc3986_parse_scheme(uri, &str); + if (ret != 0) return(ret); + if (*str != ':') { + return(1); + } + str++; + ret = rfc3986_parse_hier_part(uri, &str); + if (ret != 0) return(ret); + if (*str == '?') { + str++; + ret = rfc3986_parse_query(uri, &str); + if (ret != 0) return(ret); + } + if (*str == '#') { + str++; + ret = rfc3986_parse_fragment(uri, &str); + if (ret != 0) return(ret); + } + if (*str != 0) { + uri_clean(uri); + return(1); + } + return(0); +} + +/** + * rfc3986_parse_uri_reference: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an URI reference string and fills in the appropriate fields + * of the @uri structure + * + * URI-reference = URI / relative-ref + * + * Returns 0 or the error code + */ +static int +rfc3986_parse_uri_reference(URI *uri, const char *str) { + int ret; + + if (str == NULL) + return(-1); + uri_clean(uri); + + /* + * Try first to parse absolute refs, then fallback to relative if + * it fails. + */ + ret = rfc3986_parse(uri, str); + if (ret != 0) { + uri_clean(uri); + ret = rfc3986_parse_relative_ref(uri, str); + if (ret != 0) { + uri_clean(uri); + return(ret); + } + } + return(0); +} + +/** + * uri_parse: + * @str: the URI string to analyze + * + * Parse an URI based on RFC 3986 + * + * URI-reference = [ absoluteURI | relativeURI ] [ "#" fragment ] + * + * Returns a newly built URI or NULL in case of error + */ +URI * +uri_parse(const char *str) { + URI *uri; + int ret; + + if (str == NULL) + return(NULL); + uri = uri_new(); + if (uri != NULL) { + ret = rfc3986_parse_uri_reference(uri, str); + if (ret) { + uri_free(uri); + return(NULL); + } + } + return(uri); +} + +/** + * uri_parse_into: + * @uri: pointer to an URI structure + * @str: the string to analyze + * + * Parse an URI reference string based on RFC 3986 and fills in the + * appropriate fields of the @uri structure + * + * URI-reference = URI / relative-ref + * + * Returns 0 or the error code + */ +int +uri_parse_into(URI *uri, const char *str) { + return(rfc3986_parse_uri_reference(uri, str)); +} + +/** + * uri_parse_raw: + * @str: the URI string to analyze + * @raw: if 1 unescaping of URI pieces are disabled + * + * Parse an URI but allows to keep intact the original fragments. + * + * URI-reference = URI / relative-ref + * + * Returns a newly built URI or NULL in case of error + */ +URI * +uri_parse_raw(const char *str, int raw) { + URI *uri; + int ret; + + if (str == NULL) + return(NULL); + uri = uri_new(); + if (uri != NULL) { + if (raw) { + uri->cleanup |= 2; + } + ret = uri_parse_into(uri, str); + if (ret) { + uri_free(uri); + return(NULL); + } + } + return(uri); +} + +/************************************************************************ + * * + * Generic URI structure functions * + * * + ************************************************************************/ + +/** + * uri_new: + * + * Simply creates an empty URI + * + * Returns the new structure or NULL in case of error + */ +URI * +uri_new(void) { + URI *ret; + + ret = (URI *) g_malloc(sizeof(URI)); + memset(ret, 0, sizeof(URI)); + return(ret); +} + +/** + * realloc2n: + * + * Function to handle properly a reallocation when saving an URI + * Also imposes some limit on the length of an URI string output + */ +static char * +realloc2n(char *ret, int *max) { + char *temp; + int tmp; + + tmp = *max * 2; + temp = g_realloc(ret, (tmp + 1)); + *max = tmp; + return(temp); +} + +/** + * uri_to_string: + * @uri: pointer to an URI + * + * Save the URI as an escaped string + * + * Returns a new string (to be deallocated by caller) + */ +char * +uri_to_string(URI *uri) { + char *ret = NULL; + char *temp; + const char *p; + int len; + int max; + + if (uri == NULL) return(NULL); + + + max = 80; + ret = g_malloc(max + 1); + len = 0; + + if (uri->scheme != NULL) { + p = uri->scheme; + while (*p != 0) { + if (len >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = *p++; + } + if (len >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = ':'; + } + if (uri->opaque != NULL) { + p = uri->opaque; + while (*p != 0) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + if (IS_RESERVED(*(p)) || IS_UNRESERVED(*(p))) + ret[len++] = *p++; + else { + int val = *(unsigned char *)p++; + int hi = val / 0x10, lo = val % 0x10; + ret[len++] = '%'; + ret[len++] = hi + (hi > 9? 'A'-10 : '0'); + ret[len++] = lo + (lo > 9? 'A'-10 : '0'); + } + } + } else { + if (uri->server != NULL) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = '/'; + ret[len++] = '/'; + if (uri->user != NULL) { + p = uri->user; + while (*p != 0) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + if ((IS_UNRESERVED(*(p))) || + ((*(p) == ';')) || ((*(p) == ':')) || + ((*(p) == '&')) || ((*(p) == '=')) || + ((*(p) == '+')) || ((*(p) == '$')) || + ((*(p) == ','))) + ret[len++] = *p++; + else { + int val = *(unsigned char *)p++; + int hi = val / 0x10, lo = val % 0x10; + ret[len++] = '%'; + ret[len++] = hi + (hi > 9? 'A'-10 : '0'); + ret[len++] = lo + (lo > 9? 'A'-10 : '0'); + } + } + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = '@'; + } + p = uri->server; + while (*p != 0) { + if (len >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = *p++; + } + if (uri->port > 0) { + if (len + 10 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + len += snprintf(&ret[len], max - len, ":%d", uri->port); + } + } else if (uri->authority != NULL) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = '/'; + ret[len++] = '/'; + p = uri->authority; + while (*p != 0) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + if ((IS_UNRESERVED(*(p))) || + ((*(p) == '$')) || ((*(p) == ',')) || ((*(p) == ';')) || + ((*(p) == ':')) || ((*(p) == '@')) || ((*(p) == '&')) || + ((*(p) == '=')) || ((*(p) == '+'))) + ret[len++] = *p++; + else { + int val = *(unsigned char *)p++; + int hi = val / 0x10, lo = val % 0x10; + ret[len++] = '%'; + ret[len++] = hi + (hi > 9? 'A'-10 : '0'); + ret[len++] = lo + (lo > 9? 'A'-10 : '0'); + } + } + } else if (uri->scheme != NULL) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = '/'; + ret[len++] = '/'; + } + if (uri->path != NULL) { + p = uri->path; + /* + * the colon in file:///d: should not be escaped or + * Windows accesses fail later. + */ + if ((uri->scheme != NULL) && + (p[0] == '/') && + (((p[1] >= 'a') && (p[1] <= 'z')) || + ((p[1] >= 'A') && (p[1] <= 'Z'))) && + (p[2] == ':') && + (!strcmp(uri->scheme, "file"))) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = *p++; + ret[len++] = *p++; + ret[len++] = *p++; + } + while (*p != 0) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + if ((IS_UNRESERVED(*(p))) || ((*(p) == '/')) || + ((*(p) == ';')) || ((*(p) == '@')) || ((*(p) == '&')) || + ((*(p) == '=')) || ((*(p) == '+')) || ((*(p) == '$')) || + ((*(p) == ','))) + ret[len++] = *p++; + else { + int val = *(unsigned char *)p++; + int hi = val / 0x10, lo = val % 0x10; + ret[len++] = '%'; + ret[len++] = hi + (hi > 9? 'A'-10 : '0'); + ret[len++] = lo + (lo > 9? 'A'-10 : '0'); + } + } + } + if (uri->query != NULL) { + if (len + 1 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = '?'; + p = uri->query; + while (*p != 0) { + if (len + 1 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = *p++; + } + } + } + if (uri->fragment != NULL) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len++] = '#'; + p = uri->fragment; + while (*p != 0) { + if (len + 3 >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + if ((IS_UNRESERVED(*(p))) || (IS_RESERVED(*(p)))) + ret[len++] = *p++; + else { + int val = *(unsigned char *)p++; + int hi = val / 0x10, lo = val % 0x10; + ret[len++] = '%'; + ret[len++] = hi + (hi > 9? 'A'-10 : '0'); + ret[len++] = lo + (lo > 9? 'A'-10 : '0'); + } + } + } + if (len >= max) { + temp = realloc2n(ret, &max); + if (temp == NULL) goto mem_error; + ret = temp; + } + ret[len] = 0; + return(ret); + +mem_error: + g_free(ret); + return(NULL); +} + +/** + * uri_clean: + * @uri: pointer to an URI + * + * Make sure the URI struct is free of content + */ +static void +uri_clean(URI *uri) { + if (uri == NULL) return; + + if (uri->scheme != NULL) g_free(uri->scheme); + uri->scheme = NULL; + if (uri->server != NULL) g_free(uri->server); + uri->server = NULL; + if (uri->user != NULL) g_free(uri->user); + uri->user = NULL; + if (uri->path != NULL) g_free(uri->path); + uri->path = NULL; + if (uri->fragment != NULL) g_free(uri->fragment); + uri->fragment = NULL; + if (uri->opaque != NULL) g_free(uri->opaque); + uri->opaque = NULL; + if (uri->authority != NULL) g_free(uri->authority); + uri->authority = NULL; + if (uri->query != NULL) g_free(uri->query); + uri->query = NULL; +} + +/** + * uri_free: + * @uri: pointer to an URI + * + * Free up the URI struct + */ +void +uri_free(URI *uri) { + uri_clean(uri); + g_free(uri); +} + +/************************************************************************ + * * + * Helper functions * + * * + ************************************************************************/ + +/** + * normalize_uri_path: + * @path: pointer to the path string + * + * Applies the 5 normalization steps to a path string--that is, RFC 2396 + * Section 5.2, steps 6.c through 6.g. + * + * Normalization occurs directly on the string, no new allocation is done + * + * Returns 0 or an error code + */ +static int +normalize_uri_path(char *path) { + char *cur, *out; + + if (path == NULL) + return(-1); + + /* Skip all initial "/" chars. We want to get to the beginning of the + * first non-empty segment. + */ + cur = path; + while (cur[0] == '/') + ++cur; + if (cur[0] == '\0') + return(0); + + /* Keep everything we've seen so far. */ + out = cur; + + /* + * Analyze each segment in sequence for cases (c) and (d). + */ + while (cur[0] != '\0') { + /* + * c) All occurrences of "./", where "." is a complete path segment, + * are removed from the buffer string. + */ + if ((cur[0] == '.') && (cur[1] == '/')) { + cur += 2; + /* '//' normalization should be done at this point too */ + while (cur[0] == '/') + cur++; + continue; + } + + /* + * d) If the buffer string ends with "." as a complete path segment, + * that "." is removed. + */ + if ((cur[0] == '.') && (cur[1] == '\0')) + break; + + /* Otherwise keep the segment. */ + while (cur[0] != '/') { + if (cur[0] == '\0') + goto done_cd; + (out++)[0] = (cur++)[0]; + } + /* nomalize // */ + while ((cur[0] == '/') && (cur[1] == '/')) + cur++; + + (out++)[0] = (cur++)[0]; + } + done_cd: + out[0] = '\0'; + + /* Reset to the beginning of the first segment for the next sequence. */ + cur = path; + while (cur[0] == '/') + ++cur; + if (cur[0] == '\0') + return(0); + + /* + * Analyze each segment in sequence for cases (e) and (f). + * + * e) All occurrences of "/../", where is a + * complete path segment not equal to "..", are removed from the + * buffer string. Removal of these path segments is performed + * iteratively, removing the leftmost matching pattern on each + * iteration, until no matching pattern remains. + * + * f) If the buffer string ends with "/..", where + * is a complete path segment not equal to "..", that + * "/.." is removed. + * + * To satisfy the "iterative" clause in (e), we need to collapse the + * string every time we find something that needs to be removed. Thus, + * we don't need to keep two pointers into the string: we only need a + * "current position" pointer. + */ + while (1) { + char *segp, *tmp; + + /* At the beginning of each iteration of this loop, "cur" points to + * the first character of the segment we want to examine. + */ + + /* Find the end of the current segment. */ + segp = cur; + while ((segp[0] != '/') && (segp[0] != '\0')) + ++segp; + + /* If this is the last segment, we're done (we need at least two + * segments to meet the criteria for the (e) and (f) cases). + */ + if (segp[0] == '\0') + break; + + /* If the first segment is "..", or if the next segment _isn't_ "..", + * keep this segment and try the next one. + */ + ++segp; + if (((cur[0] == '.') && (cur[1] == '.') && (segp == cur+3)) + || ((segp[0] != '.') || (segp[1] != '.') + || ((segp[2] != '/') && (segp[2] != '\0')))) { + cur = segp; + continue; + } + + /* If we get here, remove this segment and the next one and back up + * to the previous segment (if there is one), to implement the + * "iteratively" clause. It's pretty much impossible to back up + * while maintaining two pointers into the buffer, so just compact + * the whole buffer now. + */ + + /* If this is the end of the buffer, we're done. */ + if (segp[2] == '\0') { + cur[0] = '\0'; + break; + } + /* Valgrind complained, strcpy(cur, segp + 3); */ + /* string will overlap, do not use strcpy */ + tmp = cur; + segp += 3; + while ((*tmp++ = *segp++) != 0) + ; + + /* If there are no previous segments, then keep going from here. */ + segp = cur; + while ((segp > path) && ((--segp)[0] == '/')) + ; + if (segp == path) + continue; + + /* "segp" is pointing to the end of a previous segment; find it's + * start. We need to back up to the previous segment and start + * over with that to handle things like "foo/bar/../..". If we + * don't do this, then on the first pass we'll remove the "bar/..", + * but be pointing at the second ".." so we won't realize we can also + * remove the "foo/..". + */ + cur = segp; + while ((cur > path) && (cur[-1] != '/')) + --cur; + } + out[0] = '\0'; + + /* + * g) If the resulting buffer string still begins with one or more + * complete path segments of "..", then the reference is + * considered to be in error. Implementations may handle this + * error by retaining these components in the resolved path (i.e., + * treating them as part of the final URI), by removing them from + * the resolved path (i.e., discarding relative levels above the + * root), or by avoiding traversal of the reference. + * + * We discard them from the final path. + */ + if (path[0] == '/') { + cur = path; + while ((cur[0] == '/') && (cur[1] == '.') && (cur[2] == '.') + && ((cur[3] == '/') || (cur[3] == '\0'))) + cur += 3; + + if (cur != path) { + out = path; + while (cur[0] != '\0') + (out++)[0] = (cur++)[0]; + out[0] = 0; + } + } + + return(0); +} + +static int is_hex(char c) { + if (((c >= '0') && (c <= '9')) || + ((c >= 'a') && (c <= 'f')) || + ((c >= 'A') && (c <= 'F'))) + return(1); + return(0); +} + + +/** + * uri_string_unescape: + * @str: the string to unescape + * @len: the length in bytes to unescape (or <= 0 to indicate full string) + * @target: optional destination buffer + * + * Unescaping routine, but does not check that the string is an URI. The + * output is a direct unsigned char translation of %XX values (no encoding) + * Note that the length of the result can only be smaller or same size as + * the input string. + * + * Returns a copy of the string, but unescaped, will return NULL only in case + * of error + */ +char * +uri_string_unescape(const char *str, int len, char *target) { + char *ret, *out; + const char *in; + + if (str == NULL) + return(NULL); + if (len <= 0) len = strlen(str); + if (len < 0) return(NULL); + + if (target == NULL) { + ret = g_malloc(len + 1); + } else + ret = target; + in = str; + out = ret; + while(len > 0) { + if ((len > 2) && (*in == '%') && (is_hex(in[1])) && (is_hex(in[2]))) { + in++; + if ((*in >= '0') && (*in <= '9')) + *out = (*in - '0'); + else if ((*in >= 'a') && (*in <= 'f')) + *out = (*in - 'a') + 10; + else if ((*in >= 'A') && (*in <= 'F')) + *out = (*in - 'A') + 10; + in++; + if ((*in >= '0') && (*in <= '9')) + *out = *out * 16 + (*in - '0'); + else if ((*in >= 'a') && (*in <= 'f')) + *out = *out * 16 + (*in - 'a') + 10; + else if ((*in >= 'A') && (*in <= 'F')) + *out = *out * 16 + (*in - 'A') + 10; + in++; + len -= 3; + out++; + } else { + *out++ = *in++; + len--; + } + } + *out = 0; + return(ret); +} + +/** + * uri_string_escape: + * @str: string to escape + * @list: exception list string of chars not to escape + * + * This routine escapes a string to hex, ignoring reserved characters (a-z) + * and the characters in the exception list. + * + * Returns a new escaped string or NULL in case of error. + */ +char * +uri_string_escape(const char *str, const char *list) { + char *ret, ch; + char *temp; + const char *in; + int len, out; + + if (str == NULL) + return(NULL); + if (str[0] == 0) + return(g_strdup(str)); + len = strlen(str); + if (!(len > 0)) return(NULL); + + len += 20; + ret = g_malloc(len); + in = str; + out = 0; + while(*in != 0) { + if (len - out <= 3) { + temp = realloc2n(ret, &len); + ret = temp; + } + + ch = *in; + + if ((ch != '@') && (!IS_UNRESERVED(ch)) && (!strchr(list, ch))) { + unsigned char val; + ret[out++] = '%'; + val = ch >> 4; + if (val <= 9) + ret[out++] = '0' + val; + else + ret[out++] = 'A' + val - 0xA; + val = ch & 0xF; + if (val <= 9) + ret[out++] = '0' + val; + else + ret[out++] = 'A' + val - 0xA; + in++; + } else { + ret[out++] = *in++; + } + + } + ret[out] = 0; + return(ret); +} + +/************************************************************************ + * * + * Public functions * + * * + ************************************************************************/ + +/** + * uri_resolve: + * @URI: the URI instance found in the document + * @base: the base value + * + * Computes he final URI of the reference done by checking that + * the given URI is valid, and building the final URI using the + * base URI. This is processed according to section 5.2 of the + * RFC 2396 + * + * 5.2. Resolving Relative References to Absolute Form + * + * Returns a new URI string (to be freed by the caller) or NULL in case + * of error. + */ +char * +uri_resolve(const char *uri, const char *base) { + char *val = NULL; + int ret, len, indx, cur, out; + URI *ref = NULL; + URI *bas = NULL; + URI *res = NULL; + + /* + * 1) The URI reference is parsed into the potential four components and + * fragment identifier, as described in Section 4.3. + * + * NOTE that a completely empty URI is treated by modern browsers + * as a reference to "." rather than as a synonym for the current + * URI. Should we do that here? + */ + if (uri == NULL) + ret = -1; + else { + if (*uri) { + ref = uri_new(); + if (ref == NULL) + goto done; + ret = uri_parse_into(ref, uri); + } + else + ret = 0; + } + if (ret != 0) + goto done; + if ((ref != NULL) && (ref->scheme != NULL)) { + /* + * The URI is absolute don't modify. + */ + val = g_strdup(uri); + goto done; + } + if (base == NULL) + ret = -1; + else { + bas = uri_new(); + if (bas == NULL) + goto done; + ret = uri_parse_into(bas, base); + } + if (ret != 0) { + if (ref) + val = uri_to_string(ref); + goto done; + } + if (ref == NULL) { + /* + * the base fragment must be ignored + */ + if (bas->fragment != NULL) { + g_free(bas->fragment); + bas->fragment = NULL; + } + val = uri_to_string(bas); + goto done; + } + + /* + * 2) If the path component is empty and the scheme, authority, and + * query components are undefined, then it is a reference to the + * current document and we are done. Otherwise, the reference URI's + * query and fragment components are defined as found (or not found) + * within the URI reference and not inherited from the base URI. + * + * NOTE that in modern browsers, the parsing differs from the above + * in the following aspect: the query component is allowed to be + * defined while still treating this as a reference to the current + * document. + */ + res = uri_new(); + if (res == NULL) + goto done; + if ((ref->scheme == NULL) && (ref->path == NULL) && + ((ref->authority == NULL) && (ref->server == NULL))) { + if (bas->scheme != NULL) + res->scheme = g_strdup(bas->scheme); + if (bas->authority != NULL) + res->authority = g_strdup(bas->authority); + else if (bas->server != NULL) { + res->server = g_strdup(bas->server); + if (bas->user != NULL) + res->user = g_strdup(bas->user); + res->port = bas->port; + } + if (bas->path != NULL) + res->path = g_strdup(bas->path); + if (ref->query != NULL) + res->query = g_strdup (ref->query); + else if (bas->query != NULL) + res->query = g_strdup(bas->query); + if (ref->fragment != NULL) + res->fragment = g_strdup(ref->fragment); + goto step_7; + } + + /* + * 3) If the scheme component is defined, indicating that the reference + * starts with a scheme name, then the reference is interpreted as an + * absolute URI and we are done. Otherwise, the reference URI's + * scheme is inherited from the base URI's scheme component. + */ + if (ref->scheme != NULL) { + val = uri_to_string(ref); + goto done; + } + if (bas->scheme != NULL) + res->scheme = g_strdup(bas->scheme); + + if (ref->query != NULL) + res->query = g_strdup(ref->query); + if (ref->fragment != NULL) + res->fragment = g_strdup(ref->fragment); + + /* + * 4) If the authority component is defined, then the reference is a + * network-path and we skip to step 7. Otherwise, the reference + * URI's authority is inherited from the base URI's authority + * component, which will also be undefined if the URI scheme does not + * use an authority component. + */ + if ((ref->authority != NULL) || (ref->server != NULL)) { + if (ref->authority != NULL) + res->authority = g_strdup(ref->authority); + else { + res->server = g_strdup(ref->server); + if (ref->user != NULL) + res->user = g_strdup(ref->user); + res->port = ref->port; + } + if (ref->path != NULL) + res->path = g_strdup(ref->path); + goto step_7; + } + if (bas->authority != NULL) + res->authority = g_strdup(bas->authority); + else if (bas->server != NULL) { + res->server = g_strdup(bas->server); + if (bas->user != NULL) + res->user = g_strdup(bas->user); + res->port = bas->port; + } + + /* + * 5) If the path component begins with a slash character ("/"), then + * the reference is an absolute-path and we skip to step 7. + */ + if ((ref->path != NULL) && (ref->path[0] == '/')) { + res->path = g_strdup(ref->path); + goto step_7; + } + + + /* + * 6) If this step is reached, then we are resolving a relative-path + * reference. The relative path needs to be merged with the base + * URI's path. Although there are many ways to do this, we will + * describe a simple method using a separate string buffer. + * + * Allocate a buffer large enough for the result string. + */ + len = 2; /* extra / and 0 */ + if (ref->path != NULL) + len += strlen(ref->path); + if (bas->path != NULL) + len += strlen(bas->path); + res->path = g_malloc(len); + res->path[0] = 0; + + /* + * a) All but the last segment of the base URI's path component is + * copied to the buffer. In other words, any characters after the + * last (right-most) slash character, if any, are excluded. + */ + cur = 0; + out = 0; + if (bas->path != NULL) { + while (bas->path[cur] != 0) { + while ((bas->path[cur] != 0) && (bas->path[cur] != '/')) + cur++; + if (bas->path[cur] == 0) + break; + + cur++; + while (out < cur) { + res->path[out] = bas->path[out]; + out++; + } + } + } + res->path[out] = 0; + + /* + * b) The reference's path component is appended to the buffer + * string. + */ + if (ref->path != NULL && ref->path[0] != 0) { + indx = 0; + /* + * Ensure the path includes a '/' + */ + if ((out == 0) && (bas->server != NULL)) + res->path[out++] = '/'; + while (ref->path[indx] != 0) { + res->path[out++] = ref->path[indx++]; + } + } + res->path[out] = 0; + + /* + * Steps c) to h) are really path normalization steps + */ + normalize_uri_path(res->path); + +step_7: + + /* + * 7) The resulting URI components, including any inherited from the + * base URI, are recombined to give the absolute form of the URI + * reference. + */ + val = uri_to_string(res); + +done: + if (ref != NULL) + uri_free(ref); + if (bas != NULL) + uri_free(bas); + if (res != NULL) + uri_free(res); + return(val); +} + +/** + * uri_resolve_relative: + * @URI: the URI reference under consideration + * @base: the base value + * + * Expresses the URI of the reference in terms relative to the + * base. Some examples of this operation include: + * base = "http://site1.com/docs/book1.html" + * URI input URI returned + * docs/pic1.gif pic1.gif + * docs/img/pic1.gif img/pic1.gif + * img/pic1.gif ../img/pic1.gif + * http://site1.com/docs/pic1.gif pic1.gif + * http://site2.com/docs/pic1.gif http://site2.com/docs/pic1.gif + * + * base = "docs/book1.html" + * URI input URI returned + * docs/pic1.gif pic1.gif + * docs/img/pic1.gif img/pic1.gif + * img/pic1.gif ../img/pic1.gif + * http://site1.com/docs/pic1.gif http://site1.com/docs/pic1.gif + * + * + * Note: if the URI reference is really wierd or complicated, it may be + * worthwhile to first convert it into a "nice" one by calling + * uri_resolve (using 'base') before calling this routine, + * since this routine (for reasonable efficiency) assumes URI has + * already been through some validation. + * + * Returns a new URI string (to be freed by the caller) or NULL in case + * error. + */ +char * +uri_resolve_relative (const char *uri, const char * base) +{ + char *val = NULL; + int ret; + int ix; + int pos = 0; + int nbslash = 0; + int len; + URI *ref = NULL; + URI *bas = NULL; + char *bptr, *uptr, *vptr; + int remove_path = 0; + + if ((uri == NULL) || (*uri == 0)) + return NULL; + + /* + * First parse URI into a standard form + */ + ref = uri_new (); + if (ref == NULL) + return NULL; + /* If URI not already in "relative" form */ + if (uri[0] != '.') { + ret = uri_parse_into (ref, uri); + if (ret != 0) + goto done; /* Error in URI, return NULL */ + } else + ref->path = g_strdup(uri); + + /* + * Next parse base into the same standard form + */ + if ((base == NULL) || (*base == 0)) { + val = g_strdup (uri); + goto done; + } + bas = uri_new (); + if (bas == NULL) + goto done; + if (base[0] != '.') { + ret = uri_parse_into (bas, base); + if (ret != 0) + goto done; /* Error in base, return NULL */ + } else + bas->path = g_strdup(base); + + /* + * If the scheme / server on the URI differs from the base, + * just return the URI + */ + if ((ref->scheme != NULL) && + ((bas->scheme == NULL) || + (strcmp (bas->scheme, ref->scheme)) || + (strcmp (bas->server, ref->server)))) { + val = g_strdup (uri); + goto done; + } + if (!strcmp(bas->path, ref->path)) { + val = g_strdup(""); + goto done; + } + if (bas->path == NULL) { + val = g_strdup(ref->path); + goto done; + } + if (ref->path == NULL) { + ref->path = (char *) "/"; + remove_path = 1; + } + + /* + * At this point (at last!) we can compare the two paths + * + * First we take care of the special case where either of the + * two path components may be missing (bug 316224) + */ + if (bas->path == NULL) { + if (ref->path != NULL) { + uptr = ref->path; + if (*uptr == '/') + uptr++; + /* exception characters from uri_to_string */ + val = uri_string_escape(uptr, "/;&=+$,"); + } + goto done; + } + bptr = bas->path; + if (ref->path == NULL) { + for (ix = 0; bptr[ix] != 0; ix++) { + if (bptr[ix] == '/') + nbslash++; + } + uptr = NULL; + len = 1; /* this is for a string terminator only */ + } else { + /* + * Next we compare the two strings and find where they first differ + */ + if ((ref->path[pos] == '.') && (ref->path[pos+1] == '/')) + pos += 2; + if ((*bptr == '.') && (bptr[1] == '/')) + bptr += 2; + else if ((*bptr == '/') && (ref->path[pos] != '/')) + bptr++; + while ((bptr[pos] == ref->path[pos]) && (bptr[pos] != 0)) + pos++; + + if (bptr[pos] == ref->path[pos]) { + val = g_strdup(""); + goto done; /* (I can't imagine why anyone would do this) */ + } + + /* + * In URI, "back up" to the last '/' encountered. This will be the + * beginning of the "unique" suffix of URI + */ + ix = pos; + if ((ref->path[ix] == '/') && (ix > 0)) + ix--; + else if ((ref->path[ix] == 0) && (ix > 1) && (ref->path[ix - 1] == '/')) + ix -= 2; + for (; ix > 0; ix--) { + if (ref->path[ix] == '/') + break; + } + if (ix == 0) { + uptr = ref->path; + } else { + ix++; + uptr = &ref->path[ix]; + } + + /* + * In base, count the number of '/' from the differing point + */ + if (bptr[pos] != ref->path[pos]) {/* check for trivial URI == base */ + for (; bptr[ix] != 0; ix++) { + if (bptr[ix] == '/') + nbslash++; + } + } + len = strlen (uptr) + 1; + } + + if (nbslash == 0) { + if (uptr != NULL) + /* exception characters from uri_to_string */ + val = uri_string_escape(uptr, "/;&=+$,"); + goto done; + } + + /* + * Allocate just enough space for the returned string - + * length of the remainder of the URI, plus enough space + * for the "../" groups, plus one for the terminator + */ + val = g_malloc (len + 3 * nbslash); + vptr = val; + /* + * Put in as many "../" as needed + */ + for (; nbslash>0; nbslash--) { + *vptr++ = '.'; + *vptr++ = '.'; + *vptr++ = '/'; + } + /* + * Finish up with the end of the URI + */ + if (uptr != NULL) { + if ((vptr > val) && (len > 0) && + (uptr[0] == '/') && (vptr[-1] == '/')) { + memcpy (vptr, uptr + 1, len - 1); + vptr[len - 2] = 0; + } else { + memcpy (vptr, uptr, len); + vptr[len - 1] = 0; + } + } else { + vptr[len - 1] = 0; + } + + /* escape the freshly-built path */ + vptr = val; + /* exception characters from uri_to_string */ + val = uri_string_escape(vptr, "/;&=+$,"); + g_free(vptr); + +done: + /* + * Free the working variables + */ + if (remove_path != 0) + ref->path = NULL; + if (ref != NULL) + uri_free (ref); + if (bas != NULL) + uri_free (bas); + + return val; +} + +/* + * Utility functions to help parse and assemble query strings. + */ + +struct QueryParams * +query_params_new (int init_alloc) +{ + struct QueryParams *ps; + + if (init_alloc <= 0) init_alloc = 1; + + ps = g_new(QueryParams, 1); + ps->n = 0; + ps->alloc = init_alloc; + ps->p = g_new(QueryParam, ps->alloc); + + return ps; +} + +/* Ensure there is space to store at least one more parameter + * at the end of the set. + */ +static int +query_params_append (struct QueryParams *ps, + const char *name, const char *value) +{ + if (ps->n >= ps->alloc) { + ps->p = g_renew(QueryParam, ps->p, ps->alloc * 2); + ps->alloc *= 2; + } + + ps->p[ps->n].name = g_strdup(name); + ps->p[ps->n].value = value ? g_strdup(value) : NULL; + ps->p[ps->n].ignore = 0; + ps->n++; + + return 0; +} + +void +query_params_free (struct QueryParams *ps) +{ + int i; + + for (i = 0; i < ps->n; ++i) { + g_free (ps->p[i].name); + g_free (ps->p[i].value); + } + g_free (ps->p); + g_free (ps); +} + +struct QueryParams * +query_params_parse (const char *query) +{ + struct QueryParams *ps; + const char *end, *eq; + + ps = query_params_new (0); + if (!query || query[0] == '\0') return ps; + + while (*query) { + char *name = NULL, *value = NULL; + + /* Find the next separator, or end of the string. */ + end = strchr (query, '&'); + if (!end) + end = strchr (query, ';'); + if (!end) + end = query + strlen (query); + + /* Find the first '=' character between here and end. */ + eq = strchr (query, '='); + if (eq && eq >= end) eq = NULL; + + /* Empty section (eg. "&&"). */ + if (end == query) + goto next; + + /* If there is no '=' character, then we have just "name" + * and consistent with CGI.pm we assume value is "". + */ + else if (!eq) { + name = uri_string_unescape (query, end - query, NULL); + value = NULL; + } + /* Or if we have "name=" here (works around annoying + * problem when calling uri_string_unescape with len = 0). + */ + else if (eq+1 == end) { + name = uri_string_unescape (query, eq - query, NULL); + value = g_new0(char, 1); + } + /* If the '=' character is at the beginning then we have + * "=value" and consistent with CGI.pm we _ignore_ this. + */ + else if (query == eq) + goto next; + + /* Otherwise it's "name=value". */ + else { + name = uri_string_unescape (query, eq - query, NULL); + value = uri_string_unescape (eq+1, end - (eq+1), NULL); + } + + /* Append to the parameter set. */ + query_params_append (ps, name, value); + g_free(name); + g_free(value); + + next: + query = end; + if (*query) query ++; /* skip '&' separator */ + } + + return ps; +} diff --git a/uri.h b/uri.h new file mode 100644 index 0000000000..de99b3bd4b --- /dev/null +++ b/uri.h @@ -0,0 +1,113 @@ +/** + * Summary: library of generic URI related routines + * Description: library of generic URI related routines + * Implements RFC 2396 + * + * Copyright (C) 1998-2003 Daniel Veillard. All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to deal + * in the Software without restriction, including without limitation the rights + * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell + * copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER + * IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + * Except as contained in this notice, the name of Daniel Veillard shall not + * be used in advertising or otherwise to promote the sale, use or other + * dealings in this Software without prior written authorization from him. + * + * Author: Daniel Veillard + ** + * Copyright (C) 2007 Red Hat, Inc. + * + * This library is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * This library is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with this library; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * Authors: + * Richard W.M. Jones + * + * Utility functions to help parse and assemble query strings. + */ + +#ifndef QEMU_URI_H +#define QEMU_URI_H + +#ifdef __cplusplus +extern "C" { +#endif + +/** + * URI: + * + * A parsed URI reference. This is a struct containing the various fields + * as described in RFC 2396 but separated for further processing. + */ +typedef struct URI { + char *scheme; /* the URI scheme */ + char *opaque; /* opaque part */ + char *authority; /* the authority part */ + char *server; /* the server part */ + char *user; /* the user part */ + int port; /* the port number */ + char *path; /* the path string */ + char *fragment; /* the fragment identifier */ + int cleanup; /* parsing potentially unclean URI */ + char *query; /* the query string (as it appears in the URI) */ +} URI; + +URI *uri_new(void); +char *uri_resolve(const char *URI, const char *base); +char *uri_resolve_relative(const char *URI, const char *base); +URI *uri_parse(const char *str); +URI *uri_parse_raw(const char *str, int raw); +int uri_parse_into(URI *uri, const char *str); +char *uri_to_string(URI *uri); +char *uri_string_escape(const char *str, const char *list); +char *uri_string_unescape(const char *str, int len, char *target); +void uri_free(URI *uri); + +/* Single web service query parameter 'name=value'. */ +typedef struct QueryParam { + char *name; /* Name (unescaped). */ + char *value; /* Value (unescaped). */ + int ignore; /* Ignore this field in qparam_get_query */ +} QueryParam; + +/* Set of parameters. */ +typedef struct QueryParams { + int n; /* number of parameters used */ + int alloc; /* allocated space */ + QueryParam *p; /* array of parameters */ +} QueryParams; + +struct QueryParams *query_params_new (int init_alloc); +int query_param_append (QueryParams *ps, const char *name, const char *value); +extern char *query_param_to_string (const QueryParams *ps); +extern QueryParams *query_params_parse (const char *query); +extern void query_params_free (QueryParams *ps); + +#ifdef __cplusplus +} +#endif +#endif /* QEMU_URI_H */