qemu-e2k/softmmu/runstate.c
Fiona Ebner ca2a5e630d qemu_cleanup: begin drained section after vm_shutdown()
in order to avoid requests being stuck in a BlockBackend's request
queue during cleanup. Having such requests can lead to a deadlock [0]
with a virtio-scsi-pci device using iothread that's busy with IO when
initiating a shutdown with QMP 'quit'.

There is a race where such a queued request can continue sometime
(maybe after bdrv_child_free()?) during bdrv_root_unref_child() [1].
The completion will hold the AioContext lock and wait for the BQL
during SCSI completion, but the main thread will hold the BQL and
wait for the AioContext as part of bdrv_root_unref_child(), leading to
the deadlock [0].

[0]:

> Thread 3 (Thread 0x7f3bbd87b700 (LWP 135952) "qemu-system-x86"):
> #0  __lll_lock_wait (futex=futex@entry=0x564183365f00 <qemu_global_mutex>, private=0) at lowlevellock.c:52
> #1  0x00007f3bc1c0d843 in __GI___pthread_mutex_lock (mutex=0x564183365f00 <qemu_global_mutex>) at ../nptl/pthread_mutex_lock.c:80
> #2  0x0000564182939f2e in qemu_mutex_lock_impl (mutex=0x564183365f00 <qemu_global_mutex>, file=0x564182b7f774 "../softmmu/physmem.c", line=2593) at ../util/qemu-thread-posix.c:94
> #3  0x000056418247cc2a in qemu_mutex_lock_iothread_impl (file=0x564182b7f774 "../softmmu/physmem.c", line=2593) at ../softmmu/cpus.c:504
> #4  0x00005641826d5325 in prepare_mmio_access (mr=0x5641856148a0) at ../softmmu/physmem.c:2593
> #5  0x00005641826d6fe7 in address_space_stl_internal (as=0x56418679b310, addr=4276113408, val=16418, attrs=..., result=0x0, endian=DEVICE_LITTLE_ENDIAN) at /home/febner/repos/qemu/memory_ldst.c.inc:318
> #6  0x00005641826d7154 in address_space_stl_le (as=0x56418679b310, addr=4276113408, val=16418, attrs=..., result=0x0) at /home/febner/repos/qemu/memory_ldst.c.inc:357
> #7  0x0000564182374b07 in pci_msi_trigger (dev=0x56418679b0d0, msg=...) at ../hw/pci/pci.c:359
> #8  0x000056418237118b in msi_send_message (dev=0x56418679b0d0, msg=...) at ../hw/pci/msi.c:379
> #9  0x0000564182372c10 in msix_notify (dev=0x56418679b0d0, vector=8) at ../hw/pci/msix.c:542
> #10 0x000056418243719c in virtio_pci_notify (d=0x56418679b0d0, vector=8) at ../hw/virtio/virtio-pci.c:77
> #11 0x00005641826933b0 in virtio_notify_vector (vdev=0x5641867a34a0, vector=8) at ../hw/virtio/virtio.c:1985
> #12 0x00005641826948d6 in virtio_irq (vq=0x5641867ac078) at ../hw/virtio/virtio.c:2461
> #13 0x0000564182694978 in virtio_notify (vdev=0x5641867a34a0, vq=0x5641867ac078) at ../hw/virtio/virtio.c:2473
> #14 0x0000564182665b83 in virtio_scsi_complete_req (req=0x7f3bb000e5d0) at ../hw/scsi/virtio-scsi.c:115
> #15 0x00005641826670ce in virtio_scsi_complete_cmd_req (req=0x7f3bb000e5d0) at ../hw/scsi/virtio-scsi.c:641
> #16 0x000056418266736b in virtio_scsi_command_complete (r=0x7f3bb0010560, resid=0) at ../hw/scsi/virtio-scsi.c:712
> #17 0x000056418239aac6 in scsi_req_complete (req=0x7f3bb0010560, status=2) at ../hw/scsi/scsi-bus.c:1526
> #18 0x000056418239e090 in scsi_handle_rw_error (r=0x7f3bb0010560, ret=-123, acct_failed=false) at ../hw/scsi/scsi-disk.c:242
> #19 0x000056418239e13f in scsi_disk_req_check_error (r=0x7f3bb0010560, ret=-123, acct_failed=false) at ../hw/scsi/scsi-disk.c:265
> #20 0x000056418239e482 in scsi_dma_complete_noio (r=0x7f3bb0010560, ret=-123) at ../hw/scsi/scsi-disk.c:340
> #21 0x000056418239e5d9 in scsi_dma_complete (opaque=0x7f3bb0010560, ret=-123) at ../hw/scsi/scsi-disk.c:371
> #22 0x00005641824809ad in dma_complete (dbs=0x7f3bb000d9d0, ret=-123) at ../softmmu/dma-helpers.c:107
> #23 0x0000564182480a72 in dma_blk_cb (opaque=0x7f3bb000d9d0, ret=-123) at ../softmmu/dma-helpers.c:127
> #24 0x00005641827bf78a in blk_aio_complete (acb=0x7f3bb00021a0) at ../block/block-backend.c:1563
> #25 0x00005641827bfa5e in blk_aio_write_entry (opaque=0x7f3bb00021a0) at ../block/block-backend.c:1630
> #26 0x000056418295638a in coroutine_trampoline (i0=-1342102448, i1=32571) at ../util/coroutine-ucontext.c:177
> #27 0x00007f3bc0caed40 in ?? () from /lib/x86_64-linux-gnu/libc.so.6
> #28 0x00007f3bbd8757f0 in ?? ()
> #29 0x0000000000000000 in ?? ()
>
> Thread 1 (Thread 0x7f3bbe3e9280 (LWP 135944) "qemu-system-x86"):
> #0  __lll_lock_wait (futex=futex@entry=0x5641856f2a00, private=0) at lowlevellock.c:52
> #1  0x00007f3bc1c0d8d1 in __GI___pthread_mutex_lock (mutex=0x5641856f2a00) at ../nptl/pthread_mutex_lock.c:115
> #2  0x0000564182939f2e in qemu_mutex_lock_impl (mutex=0x5641856f2a00, file=0x564182c0e319 "../util/async.c", line=728) at ../util/qemu-thread-posix.c:94
> #3  0x000056418293a140 in qemu_rec_mutex_lock_impl (mutex=0x5641856f2a00, file=0x564182c0e319 "../util/async.c", line=728) at ../util/qemu-thread-posix.c:149
> #4  0x00005641829532d5 in aio_context_acquire (ctx=0x5641856f29a0) at ../util/async.c:728
> #5  0x000056418279d5df in bdrv_set_aio_context_commit (opaque=0x5641856e6e50) at ../block.c:7493
> #6  0x000056418294e288 in tran_commit (tran=0x56418630bfe0) at ../util/transactions.c:87
> #7  0x000056418279d880 in bdrv_try_change_aio_context (bs=0x5641856f7130, ctx=0x56418548f810, ignore_child=0x0, errp=0x0) at ../block.c:7626
> #8  0x0000564182793f39 in bdrv_root_unref_child (child=0x5641856f47d0) at ../block.c:3242
> #9  0x00005641827be137 in blk_remove_bs (blk=0x564185709880) at ../block/block-backend.c:914
> #10 0x00005641827bd689 in blk_remove_all_bs () at ../block/block-backend.c:583
> #11 0x0000564182798699 in bdrv_close_all () at ../block.c:5117
> #12 0x000056418248a5b2 in qemu_cleanup () at ../softmmu/runstate.c:821
> #13 0x0000564182738603 in qemu_default_main () at ../softmmu/main.c:38
> #14 0x0000564182738631 in main (argc=30, argv=0x7ffd675a8a48) at ../softmmu/main.c:48
>
> (gdb) p *((QemuMutex*)0x5641856f2a00)
> $1 = {lock = {__data = {__lock = 2, __count = 2, __owner = 135952, ...
> (gdb) p *((QemuMutex*)0x564183365f00)
> $2 = {lock = {__data = {__lock = 2, __count = 0, __owner = 135944, ...

[1]:

> Thread 1 "qemu-system-x86" hit Breakpoint 5, bdrv_drain_all_end () at ../block/io.c:551
> #0  bdrv_drain_all_end () at ../block/io.c:551
> #1  0x00005569810f0376 in bdrv_graph_wrlock (bs=0x0) at ../block/graph-lock.c:156
> #2  0x00005569810bd3e0 in bdrv_replace_child_noperm (child=0x556982e2d7d0, new_bs=0x0) at ../block.c:2897
> #3  0x00005569810bdef2 in bdrv_root_unref_child (child=0x556982e2d7d0) at ../block.c:3227
> #4  0x00005569810e8137 in blk_remove_bs (blk=0x556982e42880) at ../block/block-backend.c:914
> #5  0x00005569810e7689 in blk_remove_all_bs () at ../block/block-backend.c:583
> #6  0x00005569810c2699 in bdrv_close_all () at ../block.c:5117
> #7  0x0000556980db45b2 in qemu_cleanup () at ../softmmu/runstate.c:821
> #8  0x0000556981062603 in qemu_default_main () at ../softmmu/main.c:38
> #9  0x0000556981062631 in main (argc=30, argv=0x7ffd7a82a418) at ../softmmu/main.c:48
> [Switching to Thread 0x7fe76dab2700 (LWP 103649)]
>
> Thread 3 "qemu-system-x86" hit Breakpoint 4, blk_inc_in_flight (blk=0x556982e42880) at ../block/block-backend.c:1505
> #0  blk_inc_in_flight (blk=0x556982e42880) at ../block/block-backend.c:1505
> #1  0x00005569810e8f36 in blk_wait_while_drained (blk=0x556982e42880) at ../block/block-backend.c:1312
> #2  0x00005569810e9231 in blk_co_do_pwritev_part (blk=0x556982e42880, offset=3422961664, bytes=4096, qiov=0x556983028060, qiov_offset=0, flags=0) at ../block/block-backend.c:1402
> #3  0x00005569810e9a4b in blk_aio_write_entry (opaque=0x556982e2cfa0) at ../block/block-backend.c:1628
> #4  0x000055698128038a in coroutine_trampoline (i0=-2090057872, i1=21865) at ../util/coroutine-ucontext.c:177
> #5  0x00007fe770f50d40 in ?? () from /lib/x86_64-linux-gnu/libc.so.6
> #6  0x00007ffd7a829570 in ?? ()
> #7  0x0000000000000000 in ?? ()

Signed-off-by: Fiona Ebner <f.ebner@proxmox.com>
Message-ID: <20230706131418.423713-1-f.ebner@proxmox.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
2023-07-07 12:49:22 +02:00

832 lines
24 KiB
C

/*
* QEMU main system emulation loop
*
* Copyright (c) 2003-2020 QEMU contributors
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*/
#include "qemu/osdep.h"
#include "audio/audio.h"
#include "block/block.h"
#include "block/export.h"
#include "chardev/char.h"
#include "crypto/cipher.h"
#include "crypto/init.h"
#include "exec/cpu-common.h"
#include "gdbstub/syscalls.h"
#include "hw/boards.h"
#include "migration/misc.h"
#include "migration/postcopy-ram.h"
#include "monitor/monitor.h"
#include "net/net.h"
#include "net/vhost_net.h"
#include "qapi/error.h"
#include "qapi/qapi-commands-run-state.h"
#include "qapi/qapi-events-run-state.h"
#include "qemu/accel.h"
#include "qemu/error-report.h"
#include "qemu/job.h"
#include "qemu/log.h"
#include "qemu/module.h"
#include "qemu/plugin.h"
#include "qemu/sockets.h"
#include "qemu/timer.h"
#include "qemu/thread.h"
#include "qom/object.h"
#include "qom/object_interfaces.h"
#include "sysemu/cpus.h"
#include "sysemu/qtest.h"
#include "sysemu/replay.h"
#include "sysemu/reset.h"
#include "sysemu/runstate.h"
#include "sysemu/runstate-action.h"
#include "sysemu/sysemu.h"
#include "sysemu/tpm.h"
#include "trace.h"
static NotifierList exit_notifiers =
NOTIFIER_LIST_INITIALIZER(exit_notifiers);
static RunState current_run_state = RUN_STATE_PRELAUNCH;
/* We use RUN_STATE__MAX but any invalid value will do */
static RunState vmstop_requested = RUN_STATE__MAX;
static QemuMutex vmstop_lock;
typedef struct {
RunState from;
RunState to;
} RunStateTransition;
static const RunStateTransition runstate_transitions_def[] = {
{ RUN_STATE_PRELAUNCH, RUN_STATE_INMIGRATE },
{ RUN_STATE_DEBUG, RUN_STATE_RUNNING },
{ RUN_STATE_DEBUG, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_DEBUG, RUN_STATE_PRELAUNCH },
{ RUN_STATE_INMIGRATE, RUN_STATE_INTERNAL_ERROR },
{ RUN_STATE_INMIGRATE, RUN_STATE_IO_ERROR },
{ RUN_STATE_INMIGRATE, RUN_STATE_PAUSED },
{ RUN_STATE_INMIGRATE, RUN_STATE_RUNNING },
{ RUN_STATE_INMIGRATE, RUN_STATE_SHUTDOWN },
{ RUN_STATE_INMIGRATE, RUN_STATE_SUSPENDED },
{ RUN_STATE_INMIGRATE, RUN_STATE_WATCHDOG },
{ RUN_STATE_INMIGRATE, RUN_STATE_GUEST_PANICKED },
{ RUN_STATE_INMIGRATE, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_INMIGRATE, RUN_STATE_PRELAUNCH },
{ RUN_STATE_INMIGRATE, RUN_STATE_POSTMIGRATE },
{ RUN_STATE_INMIGRATE, RUN_STATE_COLO },
{ RUN_STATE_INTERNAL_ERROR, RUN_STATE_PAUSED },
{ RUN_STATE_INTERNAL_ERROR, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_INTERNAL_ERROR, RUN_STATE_PRELAUNCH },
{ RUN_STATE_IO_ERROR, RUN_STATE_RUNNING },
{ RUN_STATE_IO_ERROR, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_IO_ERROR, RUN_STATE_PRELAUNCH },
{ RUN_STATE_PAUSED, RUN_STATE_RUNNING },
{ RUN_STATE_PAUSED, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_PAUSED, RUN_STATE_POSTMIGRATE },
{ RUN_STATE_PAUSED, RUN_STATE_PRELAUNCH },
{ RUN_STATE_PAUSED, RUN_STATE_COLO},
{ RUN_STATE_POSTMIGRATE, RUN_STATE_RUNNING },
{ RUN_STATE_POSTMIGRATE, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_POSTMIGRATE, RUN_STATE_PRELAUNCH },
{ RUN_STATE_PRELAUNCH, RUN_STATE_RUNNING },
{ RUN_STATE_PRELAUNCH, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_PRELAUNCH, RUN_STATE_INMIGRATE },
{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_RUNNING },
{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_PAUSED },
{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_POSTMIGRATE },
{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_PRELAUNCH },
{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_COLO },
{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_INTERNAL_ERROR },
{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_IO_ERROR },
{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_SHUTDOWN },
{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_SUSPENDED },
{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_WATCHDOG },
{ RUN_STATE_FINISH_MIGRATE, RUN_STATE_GUEST_PANICKED },
{ RUN_STATE_RESTORE_VM, RUN_STATE_RUNNING },
{ RUN_STATE_RESTORE_VM, RUN_STATE_PRELAUNCH },
{ RUN_STATE_COLO, RUN_STATE_RUNNING },
{ RUN_STATE_COLO, RUN_STATE_PRELAUNCH },
{ RUN_STATE_COLO, RUN_STATE_SHUTDOWN},
{ RUN_STATE_RUNNING, RUN_STATE_DEBUG },
{ RUN_STATE_RUNNING, RUN_STATE_INTERNAL_ERROR },
{ RUN_STATE_RUNNING, RUN_STATE_IO_ERROR },
{ RUN_STATE_RUNNING, RUN_STATE_PAUSED },
{ RUN_STATE_RUNNING, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_RUNNING, RUN_STATE_RESTORE_VM },
{ RUN_STATE_RUNNING, RUN_STATE_SAVE_VM },
{ RUN_STATE_RUNNING, RUN_STATE_SHUTDOWN },
{ RUN_STATE_RUNNING, RUN_STATE_WATCHDOG },
{ RUN_STATE_RUNNING, RUN_STATE_GUEST_PANICKED },
{ RUN_STATE_RUNNING, RUN_STATE_COLO},
{ RUN_STATE_SAVE_VM, RUN_STATE_RUNNING },
{ RUN_STATE_SHUTDOWN, RUN_STATE_PAUSED },
{ RUN_STATE_SHUTDOWN, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_SHUTDOWN, RUN_STATE_PRELAUNCH },
{ RUN_STATE_SHUTDOWN, RUN_STATE_COLO },
{ RUN_STATE_DEBUG, RUN_STATE_SUSPENDED },
{ RUN_STATE_RUNNING, RUN_STATE_SUSPENDED },
{ RUN_STATE_SUSPENDED, RUN_STATE_RUNNING },
{ RUN_STATE_SUSPENDED, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_SUSPENDED, RUN_STATE_PRELAUNCH },
{ RUN_STATE_SUSPENDED, RUN_STATE_COLO},
{ RUN_STATE_WATCHDOG, RUN_STATE_RUNNING },
{ RUN_STATE_WATCHDOG, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_WATCHDOG, RUN_STATE_PRELAUNCH },
{ RUN_STATE_WATCHDOG, RUN_STATE_COLO},
{ RUN_STATE_GUEST_PANICKED, RUN_STATE_RUNNING },
{ RUN_STATE_GUEST_PANICKED, RUN_STATE_FINISH_MIGRATE },
{ RUN_STATE_GUEST_PANICKED, RUN_STATE_PRELAUNCH },
{ RUN_STATE__MAX, RUN_STATE__MAX },
};
static bool runstate_valid_transitions[RUN_STATE__MAX][RUN_STATE__MAX];
bool runstate_check(RunState state)
{
return current_run_state == state;
}
static void runstate_init(void)
{
const RunStateTransition *p;
memset(&runstate_valid_transitions, 0, sizeof(runstate_valid_transitions));
for (p = &runstate_transitions_def[0]; p->from != RUN_STATE__MAX; p++) {
runstate_valid_transitions[p->from][p->to] = true;
}
qemu_mutex_init(&vmstop_lock);
}
/* This function will abort() on invalid state transitions */
void runstate_set(RunState new_state)
{
assert(new_state < RUN_STATE__MAX);
trace_runstate_set(current_run_state, RunState_str(current_run_state),
new_state, RunState_str(new_state));
if (current_run_state == new_state) {
return;
}
if (!runstate_valid_transitions[current_run_state][new_state]) {
error_report("invalid runstate transition: '%s' -> '%s'",
RunState_str(current_run_state),
RunState_str(new_state));
abort();
}
current_run_state = new_state;
}
RunState runstate_get(void)
{
return current_run_state;
}
bool runstate_is_running(void)
{
return runstate_check(RUN_STATE_RUNNING);
}
bool runstate_needs_reset(void)
{
return runstate_check(RUN_STATE_INTERNAL_ERROR) ||
runstate_check(RUN_STATE_SHUTDOWN);
}
StatusInfo *qmp_query_status(Error **errp)
{
StatusInfo *info = g_malloc0(sizeof(*info));
AccelState *accel = current_accel();
/*
* We ignore errors, which will happen if the accelerator
* is not TCG. "singlestep" is meaningless for other accelerators,
* so we will set the StatusInfo field to false for those.
*/
info->singlestep = object_property_get_bool(OBJECT(accel),
"one-insn-per-tb", NULL);
info->running = runstate_is_running();
info->status = current_run_state;
return info;
}
bool qemu_vmstop_requested(RunState *r)
{
qemu_mutex_lock(&vmstop_lock);
*r = vmstop_requested;
vmstop_requested = RUN_STATE__MAX;
qemu_mutex_unlock(&vmstop_lock);
return *r < RUN_STATE__MAX;
}
void qemu_system_vmstop_request_prepare(void)
{
qemu_mutex_lock(&vmstop_lock);
}
void qemu_system_vmstop_request(RunState state)
{
vmstop_requested = state;
qemu_mutex_unlock(&vmstop_lock);
qemu_notify_event();
}
struct VMChangeStateEntry {
VMChangeStateHandler *cb;
void *opaque;
QTAILQ_ENTRY(VMChangeStateEntry) entries;
int priority;
};
static QTAILQ_HEAD(, VMChangeStateEntry) vm_change_state_head =
QTAILQ_HEAD_INITIALIZER(vm_change_state_head);
/**
* qemu_add_vm_change_state_handler_prio:
* @cb: the callback to invoke
* @opaque: user data passed to the callback
* @priority: low priorities execute first when the vm runs and the reverse is
* true when the vm stops
*
* Register a callback function that is invoked when the vm starts or stops
* running.
*
* Returns: an entry to be freed using qemu_del_vm_change_state_handler()
*/
VMChangeStateEntry *qemu_add_vm_change_state_handler_prio(
VMChangeStateHandler *cb, void *opaque, int priority)
{
VMChangeStateEntry *e;
VMChangeStateEntry *other;
e = g_malloc0(sizeof(*e));
e->cb = cb;
e->opaque = opaque;
e->priority = priority;
/* Keep list sorted in ascending priority order */
QTAILQ_FOREACH(other, &vm_change_state_head, entries) {
if (priority < other->priority) {
QTAILQ_INSERT_BEFORE(other, e, entries);
return e;
}
}
QTAILQ_INSERT_TAIL(&vm_change_state_head, e, entries);
return e;
}
VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb,
void *opaque)
{
return qemu_add_vm_change_state_handler_prio(cb, opaque, 0);
}
void qemu_del_vm_change_state_handler(VMChangeStateEntry *e)
{
QTAILQ_REMOVE(&vm_change_state_head, e, entries);
g_free(e);
}
void vm_state_notify(bool running, RunState state)
{
VMChangeStateEntry *e, *next;
trace_vm_state_notify(running, state, RunState_str(state));
if (running) {
QTAILQ_FOREACH_SAFE(e, &vm_change_state_head, entries, next) {
e->cb(e->opaque, running, state);
}
} else {
QTAILQ_FOREACH_REVERSE_SAFE(e, &vm_change_state_head, entries, next) {
e->cb(e->opaque, running, state);
}
}
}
static ShutdownCause reset_requested;
static ShutdownCause shutdown_requested;
static int shutdown_signal;
static pid_t shutdown_pid;
static int powerdown_requested;
static int debug_requested;
static int suspend_requested;
static WakeupReason wakeup_reason;
static NotifierList powerdown_notifiers =
NOTIFIER_LIST_INITIALIZER(powerdown_notifiers);
static NotifierList suspend_notifiers =
NOTIFIER_LIST_INITIALIZER(suspend_notifiers);
static NotifierList wakeup_notifiers =
NOTIFIER_LIST_INITIALIZER(wakeup_notifiers);
static NotifierList shutdown_notifiers =
NOTIFIER_LIST_INITIALIZER(shutdown_notifiers);
static uint32_t wakeup_reason_mask = ~(1 << QEMU_WAKEUP_REASON_NONE);
ShutdownCause qemu_shutdown_requested_get(void)
{
return shutdown_requested;
}
ShutdownCause qemu_reset_requested_get(void)
{
return reset_requested;
}
static int qemu_shutdown_requested(void)
{
return qatomic_xchg(&shutdown_requested, SHUTDOWN_CAUSE_NONE);
}
static void qemu_kill_report(void)
{
if (!qtest_driver() && shutdown_signal) {
if (shutdown_pid == 0) {
/* This happens for eg ^C at the terminal, so it's worth
* avoiding printing an odd message in that case.
*/
error_report("terminating on signal %d", shutdown_signal);
} else {
char *shutdown_cmd = qemu_get_pid_name(shutdown_pid);
error_report("terminating on signal %d from pid " FMT_pid " (%s)",
shutdown_signal, shutdown_pid,
shutdown_cmd ? shutdown_cmd : "<unknown process>");
g_free(shutdown_cmd);
}
shutdown_signal = 0;
}
}
static ShutdownCause qemu_reset_requested(void)
{
ShutdownCause r = reset_requested;
if (r && replay_checkpoint(CHECKPOINT_RESET_REQUESTED)) {
reset_requested = SHUTDOWN_CAUSE_NONE;
return r;
}
return SHUTDOWN_CAUSE_NONE;
}
static int qemu_suspend_requested(void)
{
int r = suspend_requested;
if (r && replay_checkpoint(CHECKPOINT_SUSPEND_REQUESTED)) {
suspend_requested = 0;
return r;
}
return false;
}
static WakeupReason qemu_wakeup_requested(void)
{
return wakeup_reason;
}
static int qemu_powerdown_requested(void)
{
int r = powerdown_requested;
powerdown_requested = 0;
return r;
}
static int qemu_debug_requested(void)
{
int r = debug_requested;
debug_requested = 0;
return r;
}
/*
* Reset the VM. Issue an event unless @reason is SHUTDOWN_CAUSE_NONE.
*/
void qemu_system_reset(ShutdownCause reason)
{
MachineClass *mc;
mc = current_machine ? MACHINE_GET_CLASS(current_machine) : NULL;
cpu_synchronize_all_states();
if (mc && mc->reset) {
mc->reset(current_machine, reason);
} else {
qemu_devices_reset(reason);
}
switch (reason) {
case SHUTDOWN_CAUSE_NONE:
case SHUTDOWN_CAUSE_SUBSYSTEM_RESET:
case SHUTDOWN_CAUSE_SNAPSHOT_LOAD:
break;
default:
qapi_event_send_reset(shutdown_caused_by_guest(reason), reason);
}
cpu_synchronize_all_post_reset();
}
/*
* Wake the VM after suspend.
*/
static void qemu_system_wakeup(void)
{
MachineClass *mc;
mc = current_machine ? MACHINE_GET_CLASS(current_machine) : NULL;
if (mc && mc->wakeup) {
mc->wakeup(current_machine);
}
}
void qemu_system_guest_panicked(GuestPanicInformation *info)
{
qemu_log_mask(LOG_GUEST_ERROR, "Guest crashed");
if (current_cpu) {
current_cpu->crash_occurred = true;
}
/*
* TODO: Currently the available panic actions are: none, pause, and
* shutdown, but in principle debug and reset could be supported as well.
* Investigate any potential use cases for the unimplemented actions.
*/
if (panic_action == PANIC_ACTION_PAUSE
|| (panic_action == PANIC_ACTION_SHUTDOWN && shutdown_action == SHUTDOWN_ACTION_PAUSE)) {
qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_PAUSE, info);
vm_stop(RUN_STATE_GUEST_PANICKED);
} else if (panic_action == PANIC_ACTION_SHUTDOWN ||
panic_action == PANIC_ACTION_EXIT_FAILURE) {
qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_POWEROFF, info);
vm_stop(RUN_STATE_GUEST_PANICKED);
qemu_system_shutdown_request(SHUTDOWN_CAUSE_GUEST_PANIC);
} else {
qapi_event_send_guest_panicked(GUEST_PANIC_ACTION_RUN, info);
}
if (info) {
if (info->type == GUEST_PANIC_INFORMATION_TYPE_HYPER_V) {
qemu_log_mask(LOG_GUEST_ERROR, "\nHV crash parameters: (%#"PRIx64
" %#"PRIx64" %#"PRIx64" %#"PRIx64" %#"PRIx64")\n",
info->u.hyper_v.arg1,
info->u.hyper_v.arg2,
info->u.hyper_v.arg3,
info->u.hyper_v.arg4,
info->u.hyper_v.arg5);
} else if (info->type == GUEST_PANIC_INFORMATION_TYPE_S390) {
qemu_log_mask(LOG_GUEST_ERROR, " on cpu %d: %s\n"
"PSW: 0x%016" PRIx64 " 0x%016" PRIx64"\n",
info->u.s390.core,
S390CrashReason_str(info->u.s390.reason),
info->u.s390.psw_mask,
info->u.s390.psw_addr);
}
qapi_free_GuestPanicInformation(info);
}
}
void qemu_system_guest_crashloaded(GuestPanicInformation *info)
{
qemu_log_mask(LOG_GUEST_ERROR, "Guest crash loaded");
qapi_event_send_guest_crashloaded(GUEST_PANIC_ACTION_RUN, info);
qapi_free_GuestPanicInformation(info);
}
void qemu_system_reset_request(ShutdownCause reason)
{
if (reboot_action == REBOOT_ACTION_SHUTDOWN &&
reason != SHUTDOWN_CAUSE_SUBSYSTEM_RESET) {
shutdown_requested = reason;
} else if (!cpus_are_resettable()) {
error_report("cpus are not resettable, terminating");
shutdown_requested = reason;
} else {
reset_requested = reason;
}
cpu_stop_current();
qemu_notify_event();
}
static void qemu_system_suspend(void)
{
pause_all_vcpus();
notifier_list_notify(&suspend_notifiers, NULL);
runstate_set(RUN_STATE_SUSPENDED);
qapi_event_send_suspend();
}
void qemu_system_suspend_request(void)
{
if (runstate_check(RUN_STATE_SUSPENDED)) {
return;
}
suspend_requested = 1;
cpu_stop_current();
qemu_notify_event();
}
void qemu_register_suspend_notifier(Notifier *notifier)
{
notifier_list_add(&suspend_notifiers, notifier);
}
void qemu_system_wakeup_request(WakeupReason reason, Error **errp)
{
trace_system_wakeup_request(reason);
if (!runstate_check(RUN_STATE_SUSPENDED)) {
error_setg(errp,
"Unable to wake up: guest is not in suspended state");
return;
}
if (!(wakeup_reason_mask & (1 << reason))) {
return;
}
runstate_set(RUN_STATE_RUNNING);
wakeup_reason = reason;
qemu_notify_event();
}
void qemu_system_wakeup_enable(WakeupReason reason, bool enabled)
{
if (enabled) {
wakeup_reason_mask |= (1 << reason);
} else {
wakeup_reason_mask &= ~(1 << reason);
}
}
void qemu_register_wakeup_notifier(Notifier *notifier)
{
notifier_list_add(&wakeup_notifiers, notifier);
}
static bool wakeup_suspend_enabled;
void qemu_register_wakeup_support(void)
{
wakeup_suspend_enabled = true;
}
bool qemu_wakeup_suspend_enabled(void)
{
return wakeup_suspend_enabled;
}
void qemu_system_killed(int signal, pid_t pid)
{
shutdown_signal = signal;
shutdown_pid = pid;
shutdown_action = SHUTDOWN_ACTION_POWEROFF;
/* Cannot call qemu_system_shutdown_request directly because
* we are in a signal handler.
*/
shutdown_requested = SHUTDOWN_CAUSE_HOST_SIGNAL;
qemu_notify_event();
}
void qemu_system_shutdown_request(ShutdownCause reason)
{
trace_qemu_system_shutdown_request(reason);
replay_shutdown_request(reason);
shutdown_requested = reason;
qemu_notify_event();
}
static void qemu_system_powerdown(void)
{
qapi_event_send_powerdown();
notifier_list_notify(&powerdown_notifiers, NULL);
}
static void qemu_system_shutdown(ShutdownCause cause)
{
qapi_event_send_shutdown(shutdown_caused_by_guest(cause), cause);
notifier_list_notify(&shutdown_notifiers, &cause);
}
void qemu_system_powerdown_request(void)
{
trace_qemu_system_powerdown_request();
powerdown_requested = 1;
qemu_notify_event();
}
void qemu_register_powerdown_notifier(Notifier *notifier)
{
notifier_list_add(&powerdown_notifiers, notifier);
}
void qemu_register_shutdown_notifier(Notifier *notifier)
{
notifier_list_add(&shutdown_notifiers, notifier);
}
void qemu_system_debug_request(void)
{
debug_requested = 1;
qemu_notify_event();
}
static bool main_loop_should_exit(int *status)
{
RunState r;
ShutdownCause request;
if (qemu_debug_requested()) {
vm_stop(RUN_STATE_DEBUG);
}
if (qemu_suspend_requested()) {
qemu_system_suspend();
}
request = qemu_shutdown_requested();
if (request) {
qemu_kill_report();
qemu_system_shutdown(request);
if (shutdown_action == SHUTDOWN_ACTION_PAUSE) {
vm_stop(RUN_STATE_SHUTDOWN);
} else {
if (request == SHUTDOWN_CAUSE_GUEST_PANIC &&
panic_action == PANIC_ACTION_EXIT_FAILURE) {
*status = EXIT_FAILURE;
}
return true;
}
}
request = qemu_reset_requested();
if (request) {
pause_all_vcpus();
qemu_system_reset(request);
resume_all_vcpus();
/*
* runstate can change in pause_all_vcpus()
* as iothread mutex is unlocked
*/
if (!runstate_check(RUN_STATE_RUNNING) &&
!runstate_check(RUN_STATE_INMIGRATE) &&
!runstate_check(RUN_STATE_FINISH_MIGRATE)) {
runstate_set(RUN_STATE_PRELAUNCH);
}
}
if (qemu_wakeup_requested()) {
pause_all_vcpus();
qemu_system_wakeup();
notifier_list_notify(&wakeup_notifiers, &wakeup_reason);
wakeup_reason = QEMU_WAKEUP_REASON_NONE;
resume_all_vcpus();
qapi_event_send_wakeup();
}
if (qemu_powerdown_requested()) {
qemu_system_powerdown();
}
if (qemu_vmstop_requested(&r)) {
vm_stop(r);
}
return false;
}
int qemu_main_loop(void)
{
int status = EXIT_SUCCESS;
while (!main_loop_should_exit(&status)) {
main_loop_wait(false);
}
return status;
}
void qemu_add_exit_notifier(Notifier *notify)
{
notifier_list_add(&exit_notifiers, notify);
}
void qemu_remove_exit_notifier(Notifier *notify)
{
notifier_remove(notify);
}
static void qemu_run_exit_notifiers(void)
{
notifier_list_notify(&exit_notifiers, NULL);
}
void qemu_init_subsystems(void)
{
Error *err = NULL;
os_set_line_buffering();
module_call_init(MODULE_INIT_TRACE);
qemu_init_cpu_list();
qemu_init_cpu_loop();
qemu_mutex_lock_iothread();
atexit(qemu_run_exit_notifiers);
module_call_init(MODULE_INIT_QOM);
module_call_init(MODULE_INIT_MIGRATION);
runstate_init();
precopy_infrastructure_init();
postcopy_infrastructure_init();
monitor_init_globals();
if (qcrypto_init(&err) < 0) {
error_reportf_err(err, "cannot initialize crypto: ");
exit(1);
}
os_setup_early_signal_handling();
bdrv_init_with_whitelist();
socket_init();
}
void qemu_cleanup(void)
{
gdb_exit(0);
/*
* cleaning up the migration object cancels any existing migration
* try to do this early so that it also stops using devices.
*/
migration_shutdown();
/*
* Close the exports before draining the block layer. The export
* drivers may have coroutines yielding on it, so we need to clean
* them up before the drain, as otherwise they may be get stuck in
* blk_wait_while_drained().
*/
blk_exp_close_all();
/* No more vcpu or device emulation activity beyond this point */
vm_shutdown();
replay_finish();
/*
* We must cancel all block jobs while the block layer is drained,
* or cancelling will be affected by throttling and thus may block
* for an extended period of time.
* Begin the drained section after vm_shutdown() to avoid requests being
* stuck in the BlockBackend's request queue.
* We do not need to end this section, because we do not want any
* requests happening from here on anyway.
*/
bdrv_drain_all_begin();
job_cancel_sync_all();
bdrv_close_all();
/* vhost-user must be cleaned up before chardevs. */
tpm_cleanup();
net_cleanup();
audio_cleanup();
monitor_cleanup();
qemu_chr_cleanup();
user_creatable_cleanup();
/* TODO: unref root container, check all devices are ok */
}