From fc1084aad72297cb6dbccf4c78473b5390ff6c87 Mon Sep 17 00:00:00 2001 From: Peter Crosthwaite Date: Wed, 18 Jun 2014 18:36:03 -0700 Subject: [PATCH 1/6] block: m25p80: sync_page(): Deindent function body. sync_page() was conditionalizing it's whole fn body on the bdrv being non-null. Just return for the function immediately on NULL brdv and get rid of the big if. Makes implementation consistent with flash_zynq_area(). Signed-off-by: Peter Crosthwaite Signed-off-by: Stefan Hajnoczi --- hw/block/m25p80.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c index 4076114b32..e4ef733cc1 100644 --- a/hw/block/m25p80.c +++ b/hw/block/m25p80.c @@ -288,18 +288,20 @@ static void bdrv_sync_complete(void *opaque, int ret) static void flash_sync_page(Flash *s, int page) { - if (s->bdrv) { - int bdrv_sector, nb_sectors; - QEMUIOVector iov; + int bdrv_sector, nb_sectors; + QEMUIOVector iov; - bdrv_sector = (page * s->pi->page_size) / BDRV_SECTOR_SIZE; - nb_sectors = DIV_ROUND_UP(s->pi->page_size, BDRV_SECTOR_SIZE); - qemu_iovec_init(&iov, 1); - qemu_iovec_add(&iov, s->storage + bdrv_sector * BDRV_SECTOR_SIZE, - nb_sectors * BDRV_SECTOR_SIZE); - bdrv_aio_writev(s->bdrv, bdrv_sector, &iov, nb_sectors, - bdrv_sync_complete, NULL); + if (!s->bdrv) { + return; } + + bdrv_sector = (page * s->pi->page_size) / BDRV_SECTOR_SIZE; + nb_sectors = DIV_ROUND_UP(s->pi->page_size, BDRV_SECTOR_SIZE); + qemu_iovec_init(&iov, 1); + qemu_iovec_add(&iov, s->storage + bdrv_sector * BDRV_SECTOR_SIZE, + nb_sectors * BDRV_SECTOR_SIZE); + bdrv_aio_writev(s->bdrv, bdrv_sector, &iov, nb_sectors, bdrv_sync_complete, + NULL); } static inline void flash_sync_area(Flash *s, int64_t off, int64_t len) From 999e5aa5cec71d7138c4b5271092f59ca85f9f6b Mon Sep 17 00:00:00 2001 From: Peter Crosthwaite Date: Wed, 18 Jun 2014 18:36:37 -0700 Subject: [PATCH 2/6] block: m25p80: Support read only bdrvs. By just never doing write-backs. This is completely invisible to the guest, as the entire storage area is implemented as device state (at realize time the entire drive is read in). Signed-off-by: Peter Crosthwaite Signed-off-by: Stefan Hajnoczi --- hw/block/m25p80.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/hw/block/m25p80.c b/hw/block/m25p80.c index e4ef733cc1..5893773f0c 100644 --- a/hw/block/m25p80.c +++ b/hw/block/m25p80.c @@ -291,7 +291,7 @@ static void flash_sync_page(Flash *s, int page) int bdrv_sector, nb_sectors; QEMUIOVector iov; - if (!s->bdrv) { + if (!s->bdrv || bdrv_is_read_only(s->bdrv)) { return; } @@ -309,7 +309,7 @@ static inline void flash_sync_area(Flash *s, int64_t off, int64_t len) int64_t start, end, nb_sectors; QEMUIOVector iov; - if (!s->bdrv) { + if (!s->bdrv || bdrv_is_read_only(s->bdrv)) { return; } @@ -627,10 +627,6 @@ static int m25p80_init(SSISlave *ss) if (dinfo && dinfo->bdrv) { DB_PRINT_L(0, "Binding to IF_MTD drive\n"); s->bdrv = dinfo->bdrv; - if (bdrv_is_read_only(s->bdrv)) { - fprintf(stderr, "Can't use a read-only drive"); - return 1; - } /* FIXME: Move to late init */ if (bdrv_read(s->bdrv, 0, s->storage, DIV_ROUND_UP(s->size, From 435db4cf29b88b6612e30acda01cd18788dff458 Mon Sep 17 00:00:00 2001 From: Chunyan Liu Date: Wed, 18 Jun 2014 10:47:26 +0800 Subject: [PATCH 3/6] QemuOpts: check NULL opts in qemu_opt_get functions Some places will call bdrv_create_file(filename, NULL, &local_err), where opts is NULL. Check NULL in qemu_opt_get and qemu_opt_get_*_del functions, to avoid extra effort of checking opts before calling them every time. Signed-off-by: Chunyan Liu Reviewed-by: Eric Blake Signed-off-by: Stefan Hajnoczi --- util/qemu-option.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) diff --git a/util/qemu-option.c b/util/qemu-option.c index 836055a4d2..43de3add29 100644 --- a/util/qemu-option.c +++ b/util/qemu-option.c @@ -310,8 +310,13 @@ static void qemu_opt_del_all(QemuOpts *opts, const char *name) const char *qemu_opt_get(QemuOpts *opts, const char *name) { - QemuOpt *opt = qemu_opt_find(opts, name); + QemuOpt *opt; + if (opts == NULL) { + return NULL; + } + + opt = qemu_opt_find(opts, name); if (!opt) { const QemuOptDesc *desc = find_desc_by_name(opts->list->desc, name); if (desc && desc->def_value_str) { @@ -364,9 +369,14 @@ bool qemu_opt_has_help_opt(QemuOpts *opts) static bool qemu_opt_get_bool_helper(QemuOpts *opts, const char *name, bool defval, bool del) { - QemuOpt *opt = qemu_opt_find(opts, name); + QemuOpt *opt; bool ret = defval; + if (opts == NULL) { + return ret; + } + + opt = qemu_opt_find(opts, name); if (opt == NULL) { const QemuOptDesc *desc = find_desc_by_name(opts->list->desc, name); if (desc && desc->def_value_str) { @@ -395,9 +405,14 @@ bool qemu_opt_get_bool_del(QemuOpts *opts, const char *name, bool defval) static uint64_t qemu_opt_get_number_helper(QemuOpts *opts, const char *name, uint64_t defval, bool del) { - QemuOpt *opt = qemu_opt_find(opts, name); + QemuOpt *opt; uint64_t ret = defval; + if (opts == NULL) { + return ret; + } + + opt = qemu_opt_find(opts, name); if (opt == NULL) { const QemuOptDesc *desc = find_desc_by_name(opts->list->desc, name); if (desc && desc->def_value_str) { @@ -427,9 +442,14 @@ uint64_t qemu_opt_get_number_del(QemuOpts *opts, const char *name, static uint64_t qemu_opt_get_size_helper(QemuOpts *opts, const char *name, uint64_t defval, bool del) { - QemuOpt *opt = qemu_opt_find(opts, name); + QemuOpt *opt; uint64_t ret = defval; + if (opts == NULL) { + return ret; + } + + opt = qemu_opt_find(opts, name); if (opt == NULL) { const QemuOptDesc *desc = find_desc_by_name(opts->list->desc, name); if (desc && desc->def_value_str) { From 5d5da114b39d3cf187a69dcf5eaca7eaf886c041 Mon Sep 17 00:00:00 2001 From: Liu Yuan Date: Tue, 17 Jun 2014 13:45:35 +0800 Subject: [PATCH 4/6] sheepdog: fix NULL dereference in sd_create Following command qemu-img create -f qcow2 sheepdog:test 20g will cause core dump because aio_context is NULL in sd_create. We should initialize it by qemu_get_aio_context() to avoid NULL dereference. Cc: qemu-devel@nongnu.org Cc: Kevin Wolf Cc: Stefan Hajnoczi Signed-off-by: Liu Yuan Signed-off-by: Stefan Hajnoczi --- block/sheepdog.c | 1 + 1 file changed, 1 insertion(+) diff --git a/block/sheepdog.c b/block/sheepdog.c index 2dcc5959f4..8d9350c26d 100644 --- a/block/sheepdog.c +++ b/block/sheepdog.c @@ -1756,6 +1756,7 @@ static int sd_create(const char *filename, QemuOpts *opts, bdrv_unref(bs); } + s->aio_context = qemu_get_aio_context(); ret = do_sd_create(s, &vid, 0, errp); if (ret) { goto out; From 74892d2468b9f0c56b915ce94848d6f7fac39740 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 5 Jun 2014 14:53:58 +0200 Subject: [PATCH 5/6] vl: allow other threads to do qemu_system_vmstop_request There patch protects vmstop_requested with a lock and introduces qemu_system_vmstop_request_prepare. Together with the new call to qemu_vmstop_requested in vm_start, qemu_system_vmstop_request_prepare avoids a race where the VM could remain stopped even though the iostatus of a block device has already been set (for example). qemu_system_vmstop_request_prepare however also lets the caller thread delay observation of the state change until it has itself communicated that change to the user. This delay avoids any possibility of a wrong reordering of the BLOCK_IO_ERROR event and the subsequent STOP event. Signed-off-by: Paolo Bonzini Reviewed-by: Eric Blake Signed-off-by: Stefan Hajnoczi --- cpus.c | 1 + include/sysemu/sysemu.h | 1 + target-lm32/op_helper.c | 2 +- vl.c | 85 ++++++++++++++++++++++++++--------------- 4 files changed, 57 insertions(+), 32 deletions(-) diff --git a/cpus.c b/cpus.c index 1ec3a9edd4..06da4e7665 100644 --- a/cpus.c +++ b/cpus.c @@ -1206,6 +1206,7 @@ void cpu_stop_current(void) int vm_stop(RunState state) { if (qemu_in_vcpu_thread()) { + qemu_system_vmstop_request_prepare(); qemu_system_vmstop_request(state); /* * FIXME: should not return to device code in case diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index 277230db49..6b4cc133c5 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -62,6 +62,7 @@ void qemu_system_powerdown_request(void); void qemu_register_powerdown_notifier(Notifier *notifier); void qemu_system_debug_request(void); void qemu_system_vmstop_request(RunState reason); +void qemu_system_vmstop_request_prepare(void); int qemu_shutdown_requested_get(void); int qemu_reset_requested_get(void); void qemu_system_killed(int signal, pid_t pid); diff --git a/target-lm32/op_helper.c b/target-lm32/op_helper.c index 308742a74e..61209c19b2 100644 --- a/target-lm32/op_helper.c +++ b/target-lm32/op_helper.c @@ -42,7 +42,7 @@ void HELPER(ill)(CPULM32State *env) fprintf(stderr, "VM paused due to illegal instruction. " "Connect a debugger or switch to the monitor console " "to find out more.\n"); - qemu_system_vmstop_request(RUN_STATE_PAUSED); + vm_stop(RUN_STATE_PAUSED); cs->halted = 1; raise_exception(env, EXCP_HALTED); #endif diff --git a/vl.c b/vl.c index 54b46271c2..ab8f15243b 100644 --- a/vl.c +++ b/vl.c @@ -574,6 +574,10 @@ static int default_driver_check(QemuOpts *opts, void *opaque) static RunState current_run_state = RUN_STATE_PRELAUNCH; +/* We use RUN_STATE_MAX but any invalid value will do */ +static RunState vmstop_requested = RUN_STATE_MAX; +static QemuMutex vmstop_lock; + typedef struct { RunState from; RunState to; @@ -650,10 +654,11 @@ static void runstate_init(void) const RunStateTransition *p; memset(&runstate_valid_transitions, 0, sizeof(runstate_valid_transitions)); - for (p = &runstate_transitions_def[0]; p->from != RUN_STATE_MAX; p++) { runstate_valid_transitions[p->from][p->to] = true; } + + qemu_mutex_init(&vmstop_lock); } /* This function will abort() on invalid state transitions */ @@ -693,6 +698,54 @@ StatusInfo *qmp_query_status(Error **errp) return info; } +static bool qemu_vmstop_requested(RunState *r) +{ + qemu_mutex_lock(&vmstop_lock); + *r = vmstop_requested; + vmstop_requested = RUN_STATE_MAX; + qemu_mutex_unlock(&vmstop_lock); + return *r < RUN_STATE_MAX; +} + +void qemu_system_vmstop_request_prepare(void) +{ + qemu_mutex_lock(&vmstop_lock); +} + +void qemu_system_vmstop_request(RunState state) +{ + vmstop_requested = state; + qemu_mutex_unlock(&vmstop_lock); + qemu_notify_event(); +} + +void vm_start(void) +{ + RunState requested; + + qemu_vmstop_requested(&requested); + if (runstate_is_running() && requested == RUN_STATE_MAX) { + return; + } + + /* Ensure that a STOP/RESUME pair of events is emitted if a + * vmstop request was pending. The BLOCK_IO_ERROR event, for + * example, according to documentation is always followed by + * the STOP event. + */ + if (runstate_is_running()) { + monitor_protocol_event(QEVENT_STOP, NULL); + } else { + cpu_enable_ticks(); + runstate_set(RUN_STATE_RUNNING); + vm_state_notify(1, RUN_STATE_RUNNING); + resume_all_vcpus(); + } + + monitor_protocol_event(QEVENT_RESUME, NULL); +} + + /***********************************************************/ /* real time host monotonic timer */ @@ -1658,17 +1711,6 @@ void vm_state_notify(int running, RunState state) } } -void vm_start(void) -{ - if (!runstate_is_running()) { - cpu_enable_ticks(); - runstate_set(RUN_STATE_RUNNING); - vm_state_notify(1, RUN_STATE_RUNNING); - resume_all_vcpus(); - monitor_protocol_event(QEVENT_RESUME, NULL); - } -} - /* reset/shutdown handler */ typedef struct QEMUResetEntry { @@ -1693,7 +1735,6 @@ static NotifierList suspend_notifiers = static NotifierList wakeup_notifiers = NOTIFIER_LIST_INITIALIZER(wakeup_notifiers); static uint32_t wakeup_reason_mask = ~(1 << QEMU_WAKEUP_REASON_NONE); -static RunState vmstop_requested = RUN_STATE_MAX; int qemu_shutdown_requested_get(void) { @@ -1761,18 +1802,6 @@ static int qemu_debug_requested(void) return r; } -/* We use RUN_STATE_MAX but any invalid value will do */ -static bool qemu_vmstop_requested(RunState *r) -{ - if (vmstop_requested < RUN_STATE_MAX) { - *r = vmstop_requested; - vmstop_requested = RUN_STATE_MAX; - return true; - } - - return false; -} - void qemu_register_reset(QEMUResetHandler *func, void *opaque) { QEMUResetEntry *re = g_malloc0(sizeof(QEMUResetEntry)); @@ -1922,12 +1951,6 @@ void qemu_system_debug_request(void) qemu_notify_event(); } -void qemu_system_vmstop_request(RunState state) -{ - vmstop_requested = state; - qemu_notify_event(); -} - static bool main_loop_should_exit(void) { RunState r; From 2bd3bce8efebe86b031beab5c0e3b9bbaec0b502 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 5 Jun 2014 14:53:59 +0200 Subject: [PATCH 6/6] block: asynchronously stop the VM on I/O errors With virtio-blk dataplane, I/O errors might occur while QEMU is not in the main I/O thread. However, it's invalid to call vm_stop when we're neither in a VCPU thread nor in the main I/O thread, even if we were to take the iothread mutex around it. To avoid this problem, we can raise a request to the main I/O thread, similar to what QEMU does when vm_stop is called from a CPU thread. We know that bdrv_error_action is called from an AIO callback, and the moment at which the callback will fire is not well-defined; it depends on the moment at which the disk or OS finishes the operation, which can happen at any time. Note that QEMU is certainly not in a CPU thread and we do not need to call cpu_stop_current() like vm_stop() does. However, we need to ensure that any action taken by management will result in correct detection of the error _and_ a running VM. In particular: - the event must be raised after the iostatus has been set, so that "info block" will return an iostatus that matches the event. - the VM must be stopped after the iostatus has been set, so that "info block" will return an iostatus that matches the runstate. The ordering between the STOP and BLOCK_IO_ERROR events is preserved; BLOCK_IO_ERROR is documented to come first. This makes bdrv_error_action() thread safe (assuming QMP events are, which is attacked by a separate series). Signed-off-by: Paolo Bonzini Reviewed-by: Eric Blake Signed-off-by: Stefan Hajnoczi --- block.c | 21 +++++++++++++++++++-- docs/qmp/qmp-events.txt | 2 +- stubs/vm-stop.c | 7 ++++++- 3 files changed, 26 insertions(+), 4 deletions(-) diff --git a/block.c b/block.c index 43abe96db6..ff44e76c87 100644 --- a/block.c +++ b/block.c @@ -3626,10 +3626,27 @@ void bdrv_error_action(BlockDriverState *bs, BlockErrorAction action, bool is_read, int error) { assert(error >= 0); - bdrv_emit_qmp_error_event(bs, QEVENT_BLOCK_IO_ERROR, action, is_read); + if (action == BDRV_ACTION_STOP) { - vm_stop(RUN_STATE_IO_ERROR); + /* First set the iostatus, so that "info block" returns an iostatus + * that matches the events raised so far (an additional error iostatus + * is fine, but not a lost one). + */ bdrv_iostatus_set_err(bs, error); + + /* Then raise the request to stop the VM and the event. + * qemu_system_vmstop_request_prepare has two effects. First, + * it ensures that the STOP event always comes after the + * BLOCK_IO_ERROR event. Second, it ensures that even if management + * can observe the STOP event and do a "cont" before the STOP + * event is issued, the VM will not stop. In this case, vm_start() + * also ensures that the STOP/RESUME pair of events is emitted. + */ + qemu_system_vmstop_request_prepare(); + bdrv_emit_qmp_error_event(bs, QEVENT_BLOCK_IO_ERROR, action, is_read); + qemu_system_vmstop_request(RUN_STATE_IO_ERROR); + } else { + bdrv_emit_qmp_error_event(bs, QEVENT_BLOCK_IO_ERROR, action, is_read); } } diff --git a/docs/qmp/qmp-events.txt b/docs/qmp/qmp-events.txt index 019db53ec8..22fea580a9 100644 --- a/docs/qmp/qmp-events.txt +++ b/docs/qmp/qmp-events.txt @@ -62,7 +62,7 @@ Data: - "action": action that has been taken, it's one of the following (json-string): "ignore": error has been ignored "report": error has been reported to the device - "stop": error caused VM to be stopped + "stop": the VM is going to stop because of the error Example: diff --git a/stubs/vm-stop.c b/stubs/vm-stop.c index f82c897dfe..69fd86b2e8 100644 --- a/stubs/vm-stop.c +++ b/stubs/vm-stop.c @@ -1,7 +1,12 @@ #include "qemu-common.h" #include "sysemu/sysemu.h" -int vm_stop(RunState state) +void qemu_system_vmstop_request_prepare(void) +{ + abort(); +} + +void qemu_system_vmstop_request(RunState state) { abort(); }