From aa2c364b4cf2fae4d9c8acf53ee4436ed533902d Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 1 Feb 2011 22:15:42 +0100 Subject: [PATCH 01/37] Prevent abortion on multiple VCPU kicks If we call qemu_cpu_kick more than once before the target was able to process the signal, pthread_kill will fail, and qemu will abort. Prevent this by avoiding the redundant signal. This logic can be found in qemu-kvm as well. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- cpu-defs.h | 1 + cpus.c | 6 +++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/cpu-defs.h b/cpu-defs.h index 8d4bf86c53..db809ed465 100644 --- a/cpu-defs.h +++ b/cpu-defs.h @@ -205,6 +205,7 @@ typedef struct CPUWatchpoint { uint32_t stopped; /* Artificially stopped */ \ struct QemuThread *thread; \ struct QemuCond *halt_cond; \ + int thread_kicked; \ struct qemu_work_item *queued_work_first, *queued_work_last; \ const char *cpu_model_str; \ struct KVMState *kvm_state; \ diff --git a/cpus.c b/cpus.c index 4c9928e2ce..ab6e40e93d 100644 --- a/cpus.c +++ b/cpus.c @@ -481,6 +481,7 @@ static void qemu_wait_io_event_common(CPUState *env) qemu_cond_signal(&qemu_pause_cond); } flush_queued_work(env); + env->thread_kicked = false; } static void qemu_tcg_wait_io_event(void) @@ -648,7 +649,10 @@ void qemu_cpu_kick(void *_env) { CPUState *env = _env; qemu_cond_broadcast(env->halt_cond); - qemu_thread_signal(env->thread, SIG_IPI); + if (!env->thread_kicked) { + qemu_thread_signal(env->thread, SIG_IPI); + env->thread_kicked = true; + } } int qemu_cpu_self(void *_env) From b4a3d965dee06d52281496bb5fd0a5cb5534b545 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 1 Feb 2011 22:15:43 +0100 Subject: [PATCH 02/37] Stop current VCPU on synchronous reset requests If some I/O operation ends up calling qemu_system_reset_request in VCPU context, we record this and inform the io-thread, but we do not terminate the VCPU loop. This can lead to fairly unexpected behavior if the triggering reset operation is supposed to work synchronously. Fix this for TCG (when run in deterministic I/O mode) by setting the VCPU on stop and issuing a cpu_exit. KVM requires some more work on its VCPU loop. [ ported from qemu-kvm ] Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- cpus.c | 17 +++++++++++++---- cpus.h | 1 + vl.c | 1 + 3 files changed, 15 insertions(+), 4 deletions(-) diff --git a/cpus.c b/cpus.c index ab6e40e93d..f8a88e3bb4 100644 --- a/cpus.c +++ b/cpus.c @@ -312,6 +312,10 @@ void qemu_notify_event(void) void qemu_mutex_lock_iothread(void) {} void qemu_mutex_unlock_iothread(void) {} +void cpu_stop_current(void) +{ +} + void vm_stop(int reason) { do_vm_stop(reason); @@ -852,6 +856,14 @@ static void qemu_system_vmstop_request(int reason) qemu_notify_event(); } +void cpu_stop_current(void) +{ + if (cpu_single_env) { + cpu_single_env->stopped = 1; + cpu_exit(cpu_single_env); + } +} + void vm_stop(int reason) { QemuThread me; @@ -863,10 +875,7 @@ void vm_stop(int reason) * FIXME: should not return to device code in case * vm_stop() has been requested. */ - if (cpu_single_env) { - cpu_exit(cpu_single_env); - cpu_single_env->stop = 1; - } + cpu_stop_current(); return; } do_vm_stop(reason); diff --git a/cpus.h b/cpus.h index bf4d9bb87a..4cadb641c0 100644 --- a/cpus.h +++ b/cpus.h @@ -6,6 +6,7 @@ int qemu_init_main_loop(void); void qemu_main_loop_start(void); void resume_all_vcpus(void); void pause_all_vcpus(void); +void cpu_stop_current(void); /* vl.c */ extern int smp_cores; diff --git a/vl.c b/vl.c index ed2cdfae42..5e3f7f24e9 100644 --- a/vl.c +++ b/vl.c @@ -1296,6 +1296,7 @@ void qemu_system_reset_request(void) } else { reset_requested = 1; } + cpu_stop_current(); qemu_notify_event(); } From 1745eaaa7c53c6090d53090d239d0234a7ecfd2d Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 1 Feb 2011 22:15:44 +0100 Subject: [PATCH 03/37] Process vmstop requests in IO thread A pending vmstop request is also a reason to leave the inner main loop. So far we ignored it, and pending stop requests issued over VCPU threads were simply ignored. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- vl.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/vl.c b/vl.c index 5e3f7f24e9..30263d6349 100644 --- a/vl.c +++ b/vl.c @@ -1391,15 +1391,11 @@ void main_loop_wait(int nonblocking) static int vm_can_run(void) { - if (powerdown_requested) - return 0; - if (reset_requested) - return 0; - if (shutdown_requested) - return 0; - if (debug_requested) - return 0; - return 1; + return !(powerdown_requested || + reset_requested || + shutdown_requested || + debug_requested || + vmstop_requested); } qemu_irq qemu_system_powerdown; From 38145df24c55bffe8ba63cfa28173c9ddd5a2c2d Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 1 Feb 2011 22:15:45 +0100 Subject: [PATCH 04/37] Trigger exit from cpu_exec_all on pending IO events Except for timer events, we currently do not leave the loop over all VCPUs if an IO event was filed. That may cause unexpected IO latencies under !CONFIG_IOTHREAD in SMP scenarios. Fix it by setting the global exit_request which breaks the loop. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- cpus.c | 1 + 1 file changed, 1 insertion(+) diff --git a/cpus.c b/cpus.c index f8a88e3bb4..0abc0092dd 100644 --- a/cpus.c +++ b/cpus.c @@ -307,6 +307,7 @@ void qemu_notify_event(void) if (next_cpu && env != next_cpu) { cpu_exit(next_cpu); } + exit_request = 1; } void qemu_mutex_lock_iothread(void) {} From 8e1b90ecc59573c4c5e9fc4934b4e30476b43e2f Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 1 Feb 2011 22:15:46 +0100 Subject: [PATCH 05/37] Leave inner main_loop faster on pending requests If there is any pending request that requires us to leave the inner loop if main_loop, makes sure we do this as soon as possible by enforcing non-blocking IO processing. At this change, move variable definitions out of the inner loop to improve readability. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- vl.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/vl.c b/vl.c index 30263d6349..9c628f01d6 100644 --- a/vl.c +++ b/vl.c @@ -1402,18 +1402,21 @@ qemu_irq qemu_system_powerdown; static void main_loop(void) { + bool nonblocking = false; +#ifdef CONFIG_PROFILER + int64_t ti; +#endif int r; qemu_main_loop_start(); for (;;) { do { - bool nonblocking = false; -#ifdef CONFIG_PROFILER - int64_t ti; -#endif #ifndef CONFIG_IOTHREAD nonblocking = cpu_exec_all(); + if (!vm_can_run()) { + nonblocking = true; + } #endif #ifdef CONFIG_PROFILER ti = profile_getclock(); From 46481d3921b86ef5ded7be7b5ce8194608f30e6b Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 1 Feb 2011 22:15:47 +0100 Subject: [PATCH 06/37] Flatten the main loop First of all, vm_can_run is a misnomer, it actually means "no request pending". Moreover, there is no need to check all pending requests twice, the first time via the inner loop check and then again when actually processing the requests. We can simply remove the inner loop and do the checks directly. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- vl.c | 30 +++++++++++++++--------------- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/vl.c b/vl.c index 9c628f01d6..c9fa266157 100644 --- a/vl.c +++ b/vl.c @@ -1389,14 +1389,16 @@ void main_loop_wait(int nonblocking) } -static int vm_can_run(void) +#ifndef CONFIG_IOTHREAD +static int vm_request_pending(void) { - return !(powerdown_requested || - reset_requested || - shutdown_requested || - debug_requested || - vmstop_requested); + return powerdown_requested || + reset_requested || + shutdown_requested || + debug_requested || + vmstop_requested; } +#endif qemu_irq qemu_system_powerdown; @@ -1411,21 +1413,19 @@ static void main_loop(void) qemu_main_loop_start(); for (;;) { - do { #ifndef CONFIG_IOTHREAD - nonblocking = cpu_exec_all(); - if (!vm_can_run()) { - nonblocking = true; - } + nonblocking = cpu_exec_all(); + if (vm_request_pending()) { + nonblocking = true; + } #endif #ifdef CONFIG_PROFILER - ti = profile_getclock(); + ti = profile_getclock(); #endif - main_loop_wait(nonblocking); + main_loop_wait(nonblocking); #ifdef CONFIG_PROFILER - dev_time += profile_getclock() - ti; + dev_time += profile_getclock() - ti; #endif - } while (vm_can_run()); if ((r = qemu_debug_requested())) { vm_stop(r); From 748a680b966c65cca9292ba0b9cbb6a04d063b21 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 1 Feb 2011 22:15:48 +0100 Subject: [PATCH 07/37] kvm: Report proper error on GET_VCPU_MMAP_SIZE failures Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- kvm-all.c | 1 + 1 file changed, 1 insertion(+) diff --git a/kvm-all.c b/kvm-all.c index 2ec9e0980d..55f6ac35ed 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -219,6 +219,7 @@ int kvm_init_vcpu(CPUState *env) mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0); if (mmap_size < 0) { + ret = mmap_size; DPRINTF("KVM_GET_VCPU_MMAP_SIZE failed\n"); goto err; } From d31ae052a913e8e41a9f22429c28decbdb660568 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 1 Feb 2011 22:15:49 +0100 Subject: [PATCH 08/37] kvm: Drop redundant kvm_enabled from kvm_cpu_thread_fn Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- cpus.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpus.c b/cpus.c index 0abc0092dd..8232d441ad 100644 --- a/cpus.c +++ b/cpus.c @@ -603,8 +603,8 @@ static void *kvm_cpu_thread_fn(void *arg) qemu_mutex_lock(&qemu_global_mutex); qemu_thread_self(env->thread); - if (kvm_enabled()) - kvm_init_vcpu(env); + + kvm_init_vcpu(env); kvm_init_ipi(env); From 84b4915dd2c0eaa86c970ffc42a68ea8ba9e48b5 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 1 Feb 2011 22:15:50 +0100 Subject: [PATCH 09/37] kvm: Handle kvm_init_vcpu errors Do not ignore errors of kvm_init_vcpu, they are fatal. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- cpus.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/cpus.c b/cpus.c index 8232d441ad..3a72d06791 100644 --- a/cpus.c +++ b/cpus.c @@ -265,12 +265,18 @@ void qemu_main_loop_start(void) void qemu_init_vcpu(void *_env) { CPUState *env = _env; + int r; env->nr_cores = smp_cores; env->nr_threads = smp_threads; - if (kvm_enabled()) - kvm_init_vcpu(env); - return; + + if (kvm_enabled()) { + r = kvm_init_vcpu(env); + if (r < 0) { + fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r)); + exit(1); + } + } } int qemu_cpu_self(void *env) @@ -600,11 +606,16 @@ static int qemu_cpu_exec(CPUState *env); static void *kvm_cpu_thread_fn(void *arg) { CPUState *env = arg; + int r; qemu_mutex_lock(&qemu_global_mutex); qemu_thread_self(env->thread); - kvm_init_vcpu(env); + r = kvm_init_vcpu(env); + if (r < 0) { + fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r)); + exit(1); + } kvm_init_ipi(env); From a1b87fe046bbb5a332e51906053c7e0307f26d89 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 1 Feb 2011 22:15:51 +0100 Subject: [PATCH 10/37] kvm: Provide sigbus services arch-independently Provide arch-independent kvm_on_sigbus* stubs to remove the #ifdef'ery from cpus.c. This patch also fixes --disable-kvm build by providing the missing kvm_on_sigbus_vcpu kvm-stub. Signed-off-by: Jan Kiszka Reviewed-by: Paolo Bonzini Acked-by: Alexander Graf Signed-off-by: Marcelo Tosatti --- cpus.c | 10 ++++------ kvm-all.c | 10 ++++++++++ kvm-stub.c | 5 +++++ kvm.h | 7 +++++-- target-i386/kvm.c | 4 ++-- target-ppc/kvm.c | 10 ++++++++++ target-s390x/kvm.c | 10 ++++++++++ 7 files changed, 46 insertions(+), 10 deletions(-) diff --git a/cpus.c b/cpus.c index 3a72d06791..4975317df7 100644 --- a/cpus.c +++ b/cpus.c @@ -539,10 +539,9 @@ static void sigbus_reraise(void) static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo, void *ctx) { -#if defined(TARGET_I386) - if (kvm_on_sigbus(siginfo->ssi_code, (void *)(intptr_t)siginfo->ssi_addr)) -#endif + if (kvm_on_sigbus(siginfo->ssi_code, (void *)(intptr_t)siginfo->ssi_addr)) { sigbus_reraise(); + } } static void qemu_kvm_eat_signal(CPUState *env, int timeout) @@ -575,10 +574,9 @@ static void qemu_kvm_eat_signal(CPUState *env, int timeout) switch (r) { case SIGBUS: -#ifdef TARGET_I386 - if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr)) -#endif + if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr)) { sigbus_reraise(); + } break; default: break; diff --git a/kvm-all.c b/kvm-all.c index 55f6ac35ed..a83aff2f66 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -1366,3 +1366,13 @@ int kvm_set_ioeventfd_pio_word(int fd, uint16_t addr, uint16_t val, bool assign) return -ENOSYS; #endif } + +int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr) +{ + return kvm_arch_on_sigbus_vcpu(env, code, addr); +} + +int kvm_on_sigbus(int code, void *addr) +{ + return kvm_arch_on_sigbus(code, addr); +} diff --git a/kvm-stub.c b/kvm-stub.c index 88682f288b..d6b6c8e794 100644 --- a/kvm-stub.c +++ b/kvm-stub.c @@ -147,6 +147,11 @@ int kvm_set_ioeventfd_mmio_long(int fd, uint32_t adr, uint32_t val, bool assign) return -ENOSYS; } +int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr) +{ + return 1; +} + int kvm_on_sigbus(int code, void *addr) { return 1; diff --git a/kvm.h b/kvm.h index ca57517af2..b2fb5c61e7 100644 --- a/kvm.h +++ b/kvm.h @@ -81,6 +81,9 @@ int kvm_set_signal_mask(CPUState *env, const sigset_t *sigset); int kvm_pit_in_kernel(void); int kvm_irqchip_in_kernel(void); +int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr); +int kvm_on_sigbus(int code, void *addr); + /* internal API */ struct KVMState; @@ -121,8 +124,8 @@ int kvm_arch_init_vcpu(CPUState *env); void kvm_arch_reset_vcpu(CPUState *env); -int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr); -int kvm_on_sigbus(int code, void *addr); +int kvm_arch_on_sigbus_vcpu(CPUState *env, int code, void *addr); +int kvm_arch_on_sigbus(int code, void *addr); struct kvm_guest_debug; struct kvm_debug_exit_arch; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 05010bbc38..9df8ff85d2 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1839,7 +1839,7 @@ static void kvm_mce_inj_srao_memscrub2(CPUState *env, target_phys_addr_t paddr) #endif -int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr) +int kvm_arch_on_sigbus_vcpu(CPUState *env, int code, void *addr) { #if defined(KVM_CAP_MCE) void *vaddr; @@ -1889,7 +1889,7 @@ int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr) return 0; } -int kvm_on_sigbus(int code, void *addr) +int kvm_arch_on_sigbus(int code, void *addr) { #if defined(KVM_CAP_MCE) if ((first_cpu->mcg_cap & MCG_SER_P) && addr && code == BUS_MCEERR_AO) { diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index 710eca1dca..93ecc57470 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -404,3 +404,13 @@ bool kvm_arch_stop_on_emulation_error(CPUState *env) { return true; } + +int kvm_arch_on_sigbus_vcpu(CPUState *env, int code, void *addr) +{ + return 1; +} + +int kvm_arch_on_sigbus(int code, void *addr) +{ + return 1; +} diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c index 38823f54f7..1702c4601d 100644 --- a/target-s390x/kvm.c +++ b/target-s390x/kvm.c @@ -505,3 +505,13 @@ bool kvm_arch_stop_on_emulation_error(CPUState *env) { return true; } + +int kvm_arch_on_sigbus_vcpu(CPUState *env, int code, void *addr) +{ + return 1; +} + +int kvm_arch_on_sigbus(int code, void *addr) +{ + return 1; +} From 55f8d6ac3e03d2859393c281737f60c65dfc9ab3 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 1 Feb 2011 22:15:52 +0100 Subject: [PATCH 11/37] Refactor signal setup functions in cpus.c Move {tcg,kvm}_init_ipi and block_io_signals to avoid prototypes, rename the former two to clarify that they deal with more than SIG_IPI. No functional changes - except for the tiny fixup of strerror usage. The forward declaration of sigbus_handler is just temporarily, it will be moved in a succeeding patch. dummy_signal is moved into the !_WIN32 block as we will soon need it also for !CONFIG_IOTHREAD. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- cpus.c | 162 +++++++++++++++++++++++++++++---------------------------- 1 file changed, 83 insertions(+), 79 deletions(-) diff --git a/cpus.c b/cpus.c index 4975317df7..62120c4506 100644 --- a/cpus.c +++ b/cpus.c @@ -222,7 +222,15 @@ fail: close(fds[1]); return err; } -#else + +#ifdef CONFIG_IOTHREAD +static void dummy_signal(int sig) +{ +} +#endif + +#else /* _WIN32 */ + HANDLE qemu_event_handle; static void dummy_event_handler(void *opaque) @@ -248,7 +256,7 @@ static void qemu_event_increment(void) exit (1); } } -#endif +#endif /* _WIN32 */ #ifndef CONFIG_IOTHREAD int qemu_init_main_loop(void) @@ -348,10 +356,6 @@ static QemuCond qemu_system_cond; static QemuCond qemu_pause_cond; static QemuCond qemu_work_cond; -static void tcg_init_ipi(void); -static void kvm_init_ipi(CPUState *env); -static sigset_t block_io_signals(void); - /* If we have signalfd, we mask out the signals we want to handle and then * use signalfd to listen for them. We rely on whatever the current signal * handler is to dispatch the signals when we receive them. @@ -387,6 +391,77 @@ static void sigfd_handler(void *opaque) } } +static void cpu_signal(int sig) +{ + if (cpu_single_env) { + cpu_exit(cpu_single_env); + } + exit_request = 1; +} + +static void qemu_kvm_init_cpu_signals(CPUState *env) +{ + int r; + sigset_t set; + struct sigaction sigact; + + memset(&sigact, 0, sizeof(sigact)); + sigact.sa_handler = dummy_signal; + sigaction(SIG_IPI, &sigact, NULL); + + pthread_sigmask(SIG_BLOCK, NULL, &set); + sigdelset(&set, SIG_IPI); + sigdelset(&set, SIGBUS); + r = kvm_set_signal_mask(env, &set); + if (r) { + fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r)); + exit(1); + } +} + +static void qemu_tcg_init_cpu_signals(void) +{ + sigset_t set; + struct sigaction sigact; + + memset(&sigact, 0, sizeof(sigact)); + sigact.sa_handler = cpu_signal; + sigaction(SIG_IPI, &sigact, NULL); + + sigemptyset(&set); + sigaddset(&set, SIG_IPI); + pthread_sigmask(SIG_UNBLOCK, &set, NULL); +} + +static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo, + void *ctx); + +static sigset_t block_io_signals(void) +{ + sigset_t set; + struct sigaction action; + + /* SIGUSR2 used by posix-aio-compat.c */ + sigemptyset(&set); + sigaddset(&set, SIGUSR2); + pthread_sigmask(SIG_UNBLOCK, &set, NULL); + + sigemptyset(&set); + sigaddset(&set, SIGIO); + sigaddset(&set, SIGALRM); + sigaddset(&set, SIG_IPI); + sigaddset(&set, SIGBUS); + pthread_sigmask(SIG_BLOCK, &set, NULL); + + memset(&action, 0, sizeof(action)); + action.sa_flags = SA_SIGINFO; + action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler; + sigaction(SIGBUS, &action, NULL); + prctl(PR_MCE_KILL, 1, 1, 0, 0); + + return set; +} + static int qemu_signalfd_init(sigset_t mask) { int sigfd; @@ -615,7 +690,7 @@ static void *kvm_cpu_thread_fn(void *arg) exit(1); } - kvm_init_ipi(env); + qemu_kvm_init_cpu_signals(env); /* signal CPU creation */ env->created = 1; @@ -638,7 +713,7 @@ static void *tcg_cpu_thread_fn(void *arg) { CPUState *env = arg; - tcg_init_ipi(); + qemu_tcg_init_cpu_signals(); qemu_thread_self(env->thread); /* signal CPU creation */ @@ -679,77 +754,6 @@ int qemu_cpu_self(void *_env) return qemu_thread_equal(&this, env->thread); } -static void cpu_signal(int sig) -{ - if (cpu_single_env) - cpu_exit(cpu_single_env); - exit_request = 1; -} - -static void tcg_init_ipi(void) -{ - sigset_t set; - struct sigaction sigact; - - memset(&sigact, 0, sizeof(sigact)); - sigact.sa_handler = cpu_signal; - sigaction(SIG_IPI, &sigact, NULL); - - sigemptyset(&set); - sigaddset(&set, SIG_IPI); - pthread_sigmask(SIG_UNBLOCK, &set, NULL); -} - -static void dummy_signal(int sig) -{ -} - -static void kvm_init_ipi(CPUState *env) -{ - int r; - sigset_t set; - struct sigaction sigact; - - memset(&sigact, 0, sizeof(sigact)); - sigact.sa_handler = dummy_signal; - sigaction(SIG_IPI, &sigact, NULL); - - pthread_sigmask(SIG_BLOCK, NULL, &set); - sigdelset(&set, SIG_IPI); - sigdelset(&set, SIGBUS); - r = kvm_set_signal_mask(env, &set); - if (r) { - fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(r)); - exit(1); - } -} - -static sigset_t block_io_signals(void) -{ - sigset_t set; - struct sigaction action; - - /* SIGUSR2 used by posix-aio-compat.c */ - sigemptyset(&set); - sigaddset(&set, SIGUSR2); - pthread_sigmask(SIG_UNBLOCK, &set, NULL); - - sigemptyset(&set); - sigaddset(&set, SIGIO); - sigaddset(&set, SIGALRM); - sigaddset(&set, SIG_IPI); - sigaddset(&set, SIGBUS); - pthread_sigmask(SIG_BLOCK, &set, NULL); - - memset(&action, 0, sizeof(action)); - action.sa_flags = SA_SIGINFO; - action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler; - sigaction(SIGBUS, &action, NULL); - prctl(PR_MCE_KILL, 1, 1, 0, 0); - - return set; -} - void qemu_mutex_lock_iothread(void) { if (kvm_enabled()) { From ff48eb5fe79ad9ce50127965bd42320c7cccc8a1 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 1 Feb 2011 22:15:53 +0100 Subject: [PATCH 12/37] kvm: Set up signal mask also for !CONFIG_IOTHREAD Block SIG_IPI, unblock it during KVM_RUN, just like in io-thread mode. It's unused so far, but this infrastructure will be required for self-IPIs and to process SIGBUS plus, in KVM mode, SIGIO and SIGALRM. As Windows doesn't support signal services, we need to provide a stub for the init function. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- cpus.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) diff --git a/cpus.c b/cpus.c index 62120c4506..f27dfbdda7 100644 --- a/cpus.c +++ b/cpus.c @@ -223,11 +223,9 @@ fail: return err; } -#ifdef CONFIG_IOTHREAD static void dummy_signal(int sig) { } -#endif #else /* _WIN32 */ @@ -259,6 +257,32 @@ static void qemu_event_increment(void) #endif /* _WIN32 */ #ifndef CONFIG_IOTHREAD +static void qemu_kvm_init_cpu_signals(CPUState *env) +{ +#ifndef _WIN32 + int r; + sigset_t set; + struct sigaction sigact; + + memset(&sigact, 0, sizeof(sigact)); + sigact.sa_handler = dummy_signal; + sigaction(SIG_IPI, &sigact, NULL); + + sigemptyset(&set); + sigaddset(&set, SIG_IPI); + pthread_sigmask(SIG_BLOCK, &set, NULL); + + pthread_sigmask(SIG_BLOCK, NULL, &set); + sigdelset(&set, SIG_IPI); + sigdelset(&set, SIGBUS); + r = kvm_set_signal_mask(env, &set); + if (r) { + fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r)); + exit(1); + } +#endif +} + int qemu_init_main_loop(void) { cpu_set_debug_excp_handler(cpu_debug_handler); @@ -284,6 +308,7 @@ void qemu_init_vcpu(void *_env) fprintf(stderr, "kvm_init_vcpu failed: %s\n", strerror(-r)); exit(1); } + qemu_kvm_init_cpu_signals(env); } } From 5db5bdacdfb115bb23d57808947f1ec1bcc00ef3 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 1 Feb 2011 22:15:54 +0100 Subject: [PATCH 13/37] kvm: Refactor qemu_kvm_eat_signals We do not use the timeout, so drop its logic. As we always poll our signals, we do not need to drop the global lock. Removing those calls allows some further simplifications. Also fix the error processing of sigpending at this chance. Signed-off-by: Jan Kiszka Reviewed-by: Paolo Bonzini Signed-off-by: Marcelo Tosatti --- cpus.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/cpus.c b/cpus.c index f27dfbdda7..d9c9111f82 100644 --- a/cpus.c +++ b/cpus.c @@ -644,31 +644,22 @@ static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo, } } -static void qemu_kvm_eat_signal(CPUState *env, int timeout) +static void qemu_kvm_eat_signals(CPUState *env) { - struct timespec ts; - int r, e; + struct timespec ts = { 0, 0 }; siginfo_t siginfo; sigset_t waitset; sigset_t chkset; - - ts.tv_sec = timeout / 1000; - ts.tv_nsec = (timeout % 1000) * 1000000; + int r; sigemptyset(&waitset); sigaddset(&waitset, SIG_IPI); sigaddset(&waitset, SIGBUS); do { - qemu_mutex_unlock(&qemu_global_mutex); - r = sigtimedwait(&waitset, &siginfo, &ts); - e = errno; - - qemu_mutex_lock(&qemu_global_mutex); - - if (r == -1 && !(e == EAGAIN || e == EINTR)) { - fprintf(stderr, "sigtimedwait: %s\n", strerror(e)); + if (r == -1 && !(errno == EAGAIN || errno == EINTR)) { + perror("sigtimedwait"); exit(1); } @@ -684,7 +675,7 @@ static void qemu_kvm_eat_signal(CPUState *env, int timeout) r = sigpending(&chkset); if (r == -1) { - fprintf(stderr, "sigpending: %s\n", strerror(e)); + perror("sigpending"); exit(1); } } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS)); @@ -695,7 +686,7 @@ static void qemu_kvm_wait_io_event(CPUState *env) while (!cpu_has_work(env)) qemu_cond_timedwait(env->halt_cond, &qemu_global_mutex, 1000); - qemu_kvm_eat_signal(env, 0); + qemu_kvm_eat_signals(env); qemu_wait_io_event_common(env); } From 9a36085b866002d8e469f46078f0db5201f44999 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 1 Feb 2011 22:15:55 +0100 Subject: [PATCH 14/37] kvm: Call qemu_kvm_eat_signals also under !CONFIG_IOTHREAD Move qemu_kvm_eat_signals around and call it also when the IO-thread is not used. Do not yet process SIGBUS, will be armed in a separate step. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- cpus.c | 90 +++++++++++++++++++++++++++++++++------------------------- 1 file changed, 52 insertions(+), 38 deletions(-) diff --git a/cpus.c b/cpus.c index d9c9111f82..f2ec74ce78 100644 --- a/cpus.c +++ b/cpus.c @@ -227,6 +227,47 @@ static void dummy_signal(int sig) { } +static void sigbus_reraise(void); + +static void qemu_kvm_eat_signals(CPUState *env) +{ + struct timespec ts = { 0, 0 }; + siginfo_t siginfo; + sigset_t waitset; + sigset_t chkset; + int r; + + sigemptyset(&waitset); + sigaddset(&waitset, SIG_IPI); + sigaddset(&waitset, SIGBUS); + + do { + r = sigtimedwait(&waitset, &siginfo, &ts); + if (r == -1 && !(errno == EAGAIN || errno == EINTR)) { + perror("sigtimedwait"); + exit(1); + } + + switch (r) { +#ifdef CONFIG_IOTHREAD + case SIGBUS: + if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr)) { + sigbus_reraise(); + } + break; +#endif + default: + break; + } + + r = sigpending(&chkset); + if (r == -1) { + perror("sigpending"); + exit(1); + } + } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS)); +} + #else /* _WIN32 */ HANDLE qemu_event_handle; @@ -254,6 +295,10 @@ static void qemu_event_increment(void) exit (1); } } + +static void qemu_kvm_eat_signals(CPUState *env) +{ +} #endif /* _WIN32 */ #ifndef CONFIG_IOTHREAD @@ -644,43 +689,6 @@ static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo, } } -static void qemu_kvm_eat_signals(CPUState *env) -{ - struct timespec ts = { 0, 0 }; - siginfo_t siginfo; - sigset_t waitset; - sigset_t chkset; - int r; - - sigemptyset(&waitset); - sigaddset(&waitset, SIG_IPI); - sigaddset(&waitset, SIGBUS); - - do { - r = sigtimedwait(&waitset, &siginfo, &ts); - if (r == -1 && !(errno == EAGAIN || errno == EINTR)) { - perror("sigtimedwait"); - exit(1); - } - - switch (r) { - case SIGBUS: - if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr)) { - sigbus_reraise(); - } - break; - default: - break; - } - - r = sigpending(&chkset); - if (r == -1) { - perror("sigpending"); - exit(1); - } - } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS)); -} - static void qemu_kvm_wait_io_event(CPUState *env) { while (!cpu_has_work(env)) @@ -953,6 +961,8 @@ static int qemu_cpu_exec(CPUState *env) bool cpu_exec_all(void) { + int r; + if (next_cpu == NULL) next_cpu = first_cpu; for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) { @@ -964,7 +974,11 @@ bool cpu_exec_all(void) if (qemu_alarm_pending()) break; if (cpu_can_run(env)) { - if (qemu_cpu_exec(env) == EXCP_DEBUG) { + r = qemu_cpu_exec(env); + if (kvm_enabled()) { + qemu_kvm_eat_signals(env); + } + if (r == EXCP_DEBUG) { break; } } else if (env->stop) { From d0f294cec0ee25461a0a15b889929c5c7dc983cd Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 1 Feb 2011 22:15:56 +0100 Subject: [PATCH 15/37] Set up signalfd under !CONFIG_IOTHREAD Will be required for SIGBUS handling. For obvious reasons, this will remain a nop on Windows hosts. Signed-off-by: Jan Kiszka Reviewed-by: Paolo Bonzini Signed-off-by: Marcelo Tosatti --- Makefile.objs | 2 +- cpus.c | 117 +++++++++++++++++++++++++++----------------------- 2 files changed, 65 insertions(+), 54 deletions(-) diff --git a/Makefile.objs b/Makefile.objs index adb584234e..b21f9d3894 100644 --- a/Makefile.objs +++ b/Makefile.objs @@ -141,7 +141,7 @@ common-obj-y += $(addprefix ui/, $(ui-obj-y)) common-obj-y += iov.o acl.o common-obj-$(CONFIG_THREAD) += qemu-thread.o -common-obj-$(CONFIG_IOTHREAD) += compatfd.o +common-obj-$(CONFIG_POSIX) += compatfd.o common-obj-y += notify.o event_notifier.o common-obj-y += qemu-timer.o qemu-timer-common.o diff --git a/cpus.c b/cpus.c index f2ec74ce78..84792f0a0e 100644 --- a/cpus.c +++ b/cpus.c @@ -227,6 +227,59 @@ static void dummy_signal(int sig) { } +/* If we have signalfd, we mask out the signals we want to handle and then + * use signalfd to listen for them. We rely on whatever the current signal + * handler is to dispatch the signals when we receive them. + */ +static void sigfd_handler(void *opaque) +{ + int fd = (unsigned long) opaque; + struct qemu_signalfd_siginfo info; + struct sigaction action; + ssize_t len; + + while (1) { + do { + len = read(fd, &info, sizeof(info)); + } while (len == -1 && errno == EINTR); + + if (len == -1 && errno == EAGAIN) { + break; + } + + if (len != sizeof(info)) { + printf("read from sigfd returned %zd: %m\n", len); + return; + } + + sigaction(info.ssi_signo, NULL, &action); + if ((action.sa_flags & SA_SIGINFO) && action.sa_sigaction) { + action.sa_sigaction(info.ssi_signo, + (siginfo_t *)&info, NULL); + } else if (action.sa_handler) { + action.sa_handler(info.ssi_signo); + } + } +} + +static int qemu_signalfd_init(sigset_t mask) +{ + int sigfd; + + sigfd = qemu_signalfd(&mask); + if (sigfd == -1) { + fprintf(stderr, "failed to create signalfd\n"); + return -errno; + } + + fcntl_setfl(sigfd, O_NONBLOCK); + + qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL, + (void *)(unsigned long) sigfd); + + return 0; +} + static void sigbus_reraise(void); static void qemu_kvm_eat_signals(CPUState *env) @@ -330,6 +383,17 @@ static void qemu_kvm_init_cpu_signals(CPUState *env) int qemu_init_main_loop(void) { +#ifndef _WIN32 + sigset_t blocked_signals; + int ret; + + sigemptyset(&blocked_signals); + + ret = qemu_signalfd_init(blocked_signals); + if (ret) { + return ret; + } +#endif cpu_set_debug_excp_handler(cpu_debug_handler); return qemu_event_init(); @@ -426,41 +490,6 @@ static QemuCond qemu_system_cond; static QemuCond qemu_pause_cond; static QemuCond qemu_work_cond; -/* If we have signalfd, we mask out the signals we want to handle and then - * use signalfd to listen for them. We rely on whatever the current signal - * handler is to dispatch the signals when we receive them. - */ -static void sigfd_handler(void *opaque) -{ - int fd = (unsigned long) opaque; - struct qemu_signalfd_siginfo info; - struct sigaction action; - ssize_t len; - - while (1) { - do { - len = read(fd, &info, sizeof(info)); - } while (len == -1 && errno == EINTR); - - if (len == -1 && errno == EAGAIN) { - break; - } - - if (len != sizeof(info)) { - printf("read from sigfd returned %zd: %m\n", len); - return; - } - - sigaction(info.ssi_signo, NULL, &action); - if ((action.sa_flags & SA_SIGINFO) && action.sa_sigaction) { - action.sa_sigaction(info.ssi_signo, - (siginfo_t *)&info, NULL); - } else if (action.sa_handler) { - action.sa_handler(info.ssi_signo); - } - } -} - static void cpu_signal(int sig) { if (cpu_single_env) { @@ -532,24 +561,6 @@ static sigset_t block_io_signals(void) return set; } -static int qemu_signalfd_init(sigset_t mask) -{ - int sigfd; - - sigfd = qemu_signalfd(&mask); - if (sigfd == -1) { - fprintf(stderr, "failed to create signalfd\n"); - return -errno; - } - - fcntl_setfl(sigfd, O_NONBLOCK); - - qemu_set_fd_handler2(sigfd, NULL, sigfd_handler, NULL, - (void *)(unsigned long) sigfd); - - return 0; -} - int qemu_init_main_loop(void) { int ret; From de758970b60f129ed9d024cc5c90c1f57fab2fd4 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 1 Feb 2011 22:15:57 +0100 Subject: [PATCH 16/37] kvm: Fix race between timer signals and vcpu entry under !IOTHREAD Found by Stefan Hajnoczi: There is a race in kvm_cpu_exec between checking for exit_request on vcpu entry and timer signals arriving before KVM starts to catch them. Plug it by blocking both timer related signals also on !CONFIG_IOTHREAD and process those via signalfd. As this fix depends on real signalfd support (otherwise the timer signals only kick the compat helper thread, and the main thread hangs), we need to detect the invalid constellation and abort configure. Signed-off-by: Jan Kiszka CC: Stefan Hajnoczi Signed-off-by: Marcelo Tosatti --- configure | 6 ++++++ cpus.c | 31 ++++++++++++++++++++++++++++++- 2 files changed, 36 insertions(+), 1 deletion(-) diff --git a/configure b/configure index 598e8e1ebb..a3f53456f4 100755 --- a/configure +++ b/configure @@ -2057,6 +2057,12 @@ EOF if compile_prog "" "" ; then signalfd=yes +elif test "$kvm" = "yes" -a "$io_thread" != "yes"; then + echo + echo "ERROR: Host kernel lacks signalfd() support," + echo "but KVM depends on it when the IO thread is disabled." + echo + exit 1 fi # check if eventfd is supported diff --git a/cpus.c b/cpus.c index 84792f0a0e..38dd506b92 100644 --- a/cpus.c +++ b/cpus.c @@ -319,6 +319,12 @@ static void qemu_kvm_eat_signals(CPUState *env) exit(1); } } while (sigismember(&chkset, SIG_IPI) || sigismember(&chkset, SIGBUS)); + +#ifndef CONFIG_IOTHREAD + if (sigismember(&chkset, SIGIO) || sigismember(&chkset, SIGALRM)) { + qemu_notify_event(); + } +#endif } #else /* _WIN32 */ @@ -368,11 +374,15 @@ static void qemu_kvm_init_cpu_signals(CPUState *env) sigemptyset(&set); sigaddset(&set, SIG_IPI); + sigaddset(&set, SIGIO); + sigaddset(&set, SIGALRM); pthread_sigmask(SIG_BLOCK, &set, NULL); pthread_sigmask(SIG_BLOCK, NULL, &set); sigdelset(&set, SIG_IPI); sigdelset(&set, SIGBUS); + sigdelset(&set, SIGIO); + sigdelset(&set, SIGALRM); r = kvm_set_signal_mask(env, &set); if (r) { fprintf(stderr, "kvm_set_signal_mask: %s\n", strerror(-r)); @@ -381,13 +391,32 @@ static void qemu_kvm_init_cpu_signals(CPUState *env) #endif } +#ifndef _WIN32 +static sigset_t block_synchronous_signals(void) +{ + sigset_t set; + + sigemptyset(&set); + if (kvm_enabled()) { + /* + * We need to process timer signals synchronously to avoid a race + * between exit_request check and KVM vcpu entry. + */ + sigaddset(&set, SIGIO); + sigaddset(&set, SIGALRM); + } + + return set; +} +#endif + int qemu_init_main_loop(void) { #ifndef _WIN32 sigset_t blocked_signals; int ret; - sigemptyset(&blocked_signals); + blocked_signals = block_synchronous_signals(); ret = qemu_signalfd_init(blocked_signals); if (ret) { From 6d9cb73c1bf80bfb0b8e7b2b3a23703e621c1405 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 1 Feb 2011 22:15:58 +0100 Subject: [PATCH 17/37] kvm: Add MCE signal support for !CONFIG_IOTHREAD Currently, we only configure and process MCE-related SIGBUS events if CONFIG_IOTHREAD is enabled. The groundwork is laid, we just need to factor out the required handler registration and system configuration. Signed-off-by: Jan Kiszka CC: Huang Ying CC: Hidetoshi Seto CC: Jin Dongming Signed-off-by: Marcelo Tosatti --- cpus.c | 107 +++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 65 insertions(+), 42 deletions(-) diff --git a/cpus.c b/cpus.c index 38dd506b92..9a3fc312d3 100644 --- a/cpus.c +++ b/cpus.c @@ -34,9 +34,6 @@ #include "cpus.h" #include "compatfd.h" -#ifdef CONFIG_LINUX -#include -#endif #ifdef SIGRTMIN #define SIG_IPI (SIGRTMIN+4) @@ -44,10 +41,24 @@ #define SIG_IPI SIGUSR1 #endif +#ifdef CONFIG_LINUX + +#include + #ifndef PR_MCE_KILL #define PR_MCE_KILL 33 #endif +#ifndef PR_MCE_KILL_SET +#define PR_MCE_KILL_SET 1 +#endif + +#ifndef PR_MCE_KILL_EARLY +#define PR_MCE_KILL_EARLY 1 +#endif + +#endif /* CONFIG_LINUX */ + static CPUState *next_cpu; /***********************************************************/ @@ -158,6 +169,52 @@ static void cpu_debug_handler(CPUState *env) vm_stop(EXCP_DEBUG); } +#ifdef CONFIG_LINUX +static void sigbus_reraise(void) +{ + sigset_t set; + struct sigaction action; + + memset(&action, 0, sizeof(action)); + action.sa_handler = SIG_DFL; + if (!sigaction(SIGBUS, &action, NULL)) { + raise(SIGBUS); + sigemptyset(&set); + sigaddset(&set, SIGBUS); + sigprocmask(SIG_UNBLOCK, &set, NULL); + } + perror("Failed to re-raise SIGBUS!\n"); + abort(); +} + +static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo, + void *ctx) +{ + if (kvm_on_sigbus(siginfo->ssi_code, + (void *)(intptr_t)siginfo->ssi_addr)) { + sigbus_reraise(); + } +} + +static void qemu_init_sigbus(void) +{ + struct sigaction action; + + memset(&action, 0, sizeof(action)); + action.sa_flags = SA_SIGINFO; + action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler; + sigaction(SIGBUS, &action, NULL); + + prctl(PR_MCE_KILL, PR_MCE_KILL_SET, PR_MCE_KILL_EARLY, 0, 0); +} + +#else /* !CONFIG_LINUX */ + +static void qemu_init_sigbus(void) +{ +} +#endif /* !CONFIG_LINUX */ + #ifndef _WIN32 static int io_thread_fd = -1; @@ -280,8 +337,6 @@ static int qemu_signalfd_init(sigset_t mask) return 0; } -static void sigbus_reraise(void); - static void qemu_kvm_eat_signals(CPUState *env) { struct timespec ts = { 0, 0 }; @@ -302,13 +357,11 @@ static void qemu_kvm_eat_signals(CPUState *env) } switch (r) { -#ifdef CONFIG_IOTHREAD case SIGBUS: if (kvm_on_sigbus_vcpu(env, siginfo.si_code, siginfo.si_addr)) { sigbus_reraise(); } break; -#endif default: break; } @@ -397,6 +450,7 @@ static sigset_t block_synchronous_signals(void) sigset_t set; sigemptyset(&set); + sigaddset(&set, SIGBUS); if (kvm_enabled()) { /* * We need to process timer signals synchronously to avoid a race @@ -425,6 +479,8 @@ int qemu_init_main_loop(void) #endif cpu_set_debug_excp_handler(cpu_debug_handler); + qemu_init_sigbus(); + return qemu_event_init(); } @@ -561,13 +617,9 @@ static void qemu_tcg_init_cpu_signals(void) pthread_sigmask(SIG_UNBLOCK, &set, NULL); } -static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo, - void *ctx); - static sigset_t block_io_signals(void) { sigset_t set; - struct sigaction action; /* SIGUSR2 used by posix-aio-compat.c */ sigemptyset(&set); @@ -581,12 +633,6 @@ static sigset_t block_io_signals(void) sigaddset(&set, SIGBUS); pthread_sigmask(SIG_BLOCK, &set, NULL); - memset(&action, 0, sizeof(action)); - action.sa_flags = SA_SIGINFO; - action.sa_sigaction = (void (*)(int, siginfo_t*, void*))sigbus_handler; - sigaction(SIGBUS, &action, NULL); - prctl(PR_MCE_KILL, 1, 1, 0, 0); - return set; } @@ -597,6 +643,8 @@ int qemu_init_main_loop(void) cpu_set_debug_excp_handler(cpu_debug_handler); + qemu_init_sigbus(); + blocked_signals = block_io_signals(); ret = qemu_signalfd_init(blocked_signals); @@ -704,31 +752,6 @@ static void qemu_tcg_wait_io_event(void) } } -static void sigbus_reraise(void) -{ - sigset_t set; - struct sigaction action; - - memset(&action, 0, sizeof(action)); - action.sa_handler = SIG_DFL; - if (!sigaction(SIGBUS, &action, NULL)) { - raise(SIGBUS); - sigemptyset(&set); - sigaddset(&set, SIGBUS); - sigprocmask(SIG_UNBLOCK, &set, NULL); - } - perror("Failed to re-raise SIGBUS!\n"); - abort(); -} - -static void sigbus_handler(int n, struct qemu_signalfd_siginfo *siginfo, - void *ctx) -{ - if (kvm_on_sigbus(siginfo->ssi_code, (void *)(intptr_t)siginfo->ssi_addr)) { - sigbus_reraise(); - } -} - static void qemu_kvm_wait_io_event(CPUState *env) { while (!cpu_has_work(env)) From 46d62fac8a6a43453322b3305ab2fcb8ee594443 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 1 Feb 2011 22:15:59 +0100 Subject: [PATCH 18/37] Introduce VCPU self-signaling service Introduce qemu_cpu_kick_self to send SIG_IPI to the calling VCPU context. First user will be kvm. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- cpus.c | 21 +++++++++++++++++++++ qemu-common.h | 1 + 2 files changed, 22 insertions(+) diff --git a/cpus.c b/cpus.c index 9a3fc312d3..6a85dc8664 100644 --- a/cpus.c +++ b/cpus.c @@ -529,6 +529,17 @@ void qemu_cpu_kick(void *env) return; } +void qemu_cpu_kick_self(void) +{ +#ifndef _WIN32 + assert(cpu_single_env); + + raise(SIG_IPI); +#else + abort(); +#endif +} + void qemu_notify_event(void) { CPUState *env = cpu_single_env; @@ -831,6 +842,16 @@ void qemu_cpu_kick(void *_env) } } +void qemu_cpu_kick_self(void) +{ + assert(cpu_single_env); + + if (!cpu_single_env->thread_kicked) { + qemu_thread_signal(cpu_single_env->thread, SIG_IPI); + cpu_single_env->thread_kicked = true; + } +} + int qemu_cpu_self(void *_env) { CPUState *env = _env; diff --git a/qemu-common.h b/qemu-common.h index c7ff280b95..a4d9c21a39 100644 --- a/qemu-common.h +++ b/qemu-common.h @@ -288,6 +288,7 @@ void qemu_notify_event(void); /* Unblock cpu */ void qemu_cpu_kick(void *env); +void qemu_cpu_kick_self(void); int qemu_cpu_self(void *env); /* work queue */ From 9ccfac9ea4b862a75a4270ed32db1f8e314911c5 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 1 Feb 2011 22:16:00 +0100 Subject: [PATCH 19/37] kvm: Unconditionally reenter kernel after IO exits KVM requires to reenter the kernel after IO exits in order to complete instruction emulation. Failing to do so will leave the kernel state inconsistently behind. To ensure that we will get back ASAP, we issue a self-signal that will cause KVM_RUN to return once the pending operations are completed. We can move kvm_arch_process_irqchip_events out of the inner VCPU loop. The only state that mattered at its old place was a pending INIT request. Catch it in kvm_arch_pre_run and also trigger a self-signal to process the request on next kvm_cpu_exec. This patch also fixes the missing exit_request check in kvm_cpu_exec in the CONFIG_IOTHREAD case. Signed-off-by: Jan Kiszka CC: Gleb Natapov Signed-off-by: Marcelo Tosatti --- kvm-all.c | 31 +++++++++++++++++-------------- target-i386/kvm.c | 5 +++++ 2 files changed, 22 insertions(+), 14 deletions(-) diff --git a/kvm-all.c b/kvm-all.c index a83aff2f66..0c20f9e858 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -199,7 +199,6 @@ int kvm_pit_in_kernel(void) return kvm_state->pit_in_kernel; } - int kvm_init_vcpu(CPUState *env) { KVMState *s = kvm_state; @@ -896,29 +895,33 @@ int kvm_cpu_exec(CPUState *env) DPRINTF("kvm_cpu_exec()\n"); + if (kvm_arch_process_irqchip_events(env)) { + env->exit_request = 0; + env->exception_index = EXCP_HLT; + return 0; + } + do { -#ifndef CONFIG_IOTHREAD - if (env->exit_request) { - DPRINTF("interrupt exit requested\n"); - ret = 0; - break; - } -#endif - - if (kvm_arch_process_irqchip_events(env)) { - ret = 0; - break; - } - if (env->kvm_vcpu_dirty) { kvm_arch_put_registers(env, KVM_PUT_RUNTIME_STATE); env->kvm_vcpu_dirty = 0; } kvm_arch_pre_run(env, run); + if (env->exit_request) { + DPRINTF("interrupt exit requested\n"); + /* + * KVM requires us to reenter the kernel after IO exits to complete + * instruction emulation. This self-signal will ensure that we + * leave ASAP again. + */ + qemu_cpu_kick_self(); + } cpu_single_env = NULL; qemu_mutex_unlock_iothread(); + ret = kvm_vcpu_ioctl(env, KVM_RUN, 0); + qemu_mutex_lock_iothread(); cpu_single_env = env; kvm_arch_post_run(env, run); diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 9df8ff85d2..8a87244167 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1426,6 +1426,11 @@ int kvm_arch_get_registers(CPUState *env) int kvm_arch_pre_run(CPUState *env, struct kvm_run *run) { + /* Force the VCPU out of its inner loop to process the INIT request */ + if (env->interrupt_request & CPU_INTERRUPT_INIT) { + env->exit_request = 1; + } + /* Inject NMI */ if (env->interrupt_request & CPU_INTERRUPT_NMI) { env->interrupt_request &= ~CPU_INTERRUPT_NMI; From b30e93e9ec01c87d53fb0c777e4b0fa258e85ca8 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 1 Feb 2011 22:16:01 +0100 Subject: [PATCH 20/37] kvm: Remove static return code of kvm_handle_io Improve the readability of the exit dispatcher by moving the static return value of kvm_handle_io to its caller. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- kvm-all.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/kvm-all.c b/kvm-all.c index 0c20f9e858..4729ec5981 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -774,8 +774,8 @@ err: return ret; } -static int kvm_handle_io(uint16_t port, void *data, int direction, int size, - uint32_t count) +static void kvm_handle_io(uint16_t port, void *data, int direction, int size, + uint32_t count) { int i; uint8_t *ptr = data; @@ -809,8 +809,6 @@ static int kvm_handle_io(uint16_t port, void *data, int direction, int size, ptr += size; } - - return 1; } #ifdef KVM_CAP_INTERNAL_ERROR_DATA @@ -944,11 +942,12 @@ int kvm_cpu_exec(CPUState *env) switch (run->exit_reason) { case KVM_EXIT_IO: DPRINTF("handle_io\n"); - ret = kvm_handle_io(run->io.port, - (uint8_t *)run + run->io.data_offset, - run->io.direction, - run->io.size, - run->io.count); + kvm_handle_io(run->io.port, + (uint8_t *)run + run->io.data_offset, + run->io.direction, + run->io.size, + run->io.count); + ret = 1; break; case KVM_EXIT_MMIO: DPRINTF("handle_mmio\n"); From cdea50ede1b8a2efe989fafc57260053b180219f Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Tue, 1 Feb 2011 22:16:02 +0100 Subject: [PATCH 21/37] kvm: Leave kvm_cpu_exec directly after KVM_EXIT_SHUTDOWN The reset we issue on KVM_EXIT_SHUTDOWN implies that we should also leave the VCPU loop. As we now check for exit_request which is set by qemu_system_reset_request, this bug is no longer critical. Still it's an unneeded extra turn. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- kvm-all.c | 1 - 1 file changed, 1 deletion(-) diff --git a/kvm-all.c b/kvm-all.c index 4729ec5981..42dfed8f16 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -963,7 +963,6 @@ int kvm_cpu_exec(CPUState *env) case KVM_EXIT_SHUTDOWN: DPRINTF("shutdown\n"); qemu_system_reset_request(); - ret = 1; break; case KVM_EXIT_UNKNOWN: fprintf(stderr, "KVM: unknown exit, hardware reason %" PRIx64 "\n", From b8cc45d6a6f7b6607d5c55817d674f3e5f92ff70 Mon Sep 17 00:00:00 2001 From: Glauber Costa Date: Thu, 3 Feb 2011 14:19:53 -0500 Subject: [PATCH 22/37] kvm: make tsc stable over migration and machine start If the machine is stopped, we should not record two different tsc values upon a save operation. The same problem happens with kvmclock. But kvmclock is taking a different diretion, being now seen as a separate device. Since this is unlikely to happen with the tsc, I am taking the approach here of simply registering a handler for state change, and using a per-CPUState variable that prevents double updates for the TSC. Signed-off-by: Glauber Costa CC: Jan Kiszka Signed-off-by: Marcelo Tosatti --- target-i386/cpu.h | 1 + target-i386/kvm.c | 18 +++++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) diff --git a/target-i386/cpu.h b/target-i386/cpu.h index af701a4412..5f1df8b4d3 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -734,6 +734,7 @@ typedef struct CPUX86State { uint32_t sipi_vector; uint32_t cpuid_kvm_features; uint32_t cpuid_svm_features; + bool tsc_valid; /* in order to simplify APIC support, we leave this pointer to the user */ diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 8a87244167..ba183c4159 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -301,6 +301,15 @@ void kvm_inject_x86_mce(CPUState *cenv, int bank, uint64_t status, #endif } +static void cpu_update_state(void *opaque, int running, int reason) +{ + CPUState *env = opaque; + + if (running) { + env->tsc_valid = false; + } +} + int kvm_arch_init_vcpu(CPUState *env) { struct { @@ -434,6 +443,8 @@ int kvm_arch_init_vcpu(CPUState *env) } #endif + qemu_add_vm_change_state_handler(cpu_update_state, env); + return kvm_vcpu_ioctl(env, KVM_SET_CPUID2, &cpuid_data); } @@ -1061,7 +1072,12 @@ static int kvm_get_msrs(CPUState *env) if (has_msr_hsave_pa) { msrs[n++].index = MSR_VM_HSAVE_PA; } - msrs[n++].index = MSR_IA32_TSC; + + if (!env->tsc_valid) { + msrs[n++].index = MSR_IA32_TSC; + env->tsc_valid = !vm_running; + } + #ifdef TARGET_X86_64 if (lm_capable_kernel) { msrs[n++].index = MSR_CSTAR; From 7e97cd88148876bad36ee7c66d526dcaed328d0d Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 7 Feb 2011 12:19:12 +0100 Subject: [PATCH 23/37] Refactor kvm&tcg function names in cpus.c Pure interface cosmetics: Ensure that only kvm core services (as declared in kvm.h) start with "kvm_". Prepend "qemu_" to those that violate this rule in cpus.c. Also rename the corresponding tcg functions for the sake of consistency. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- cpus.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/cpus.c b/cpus.c index 6a85dc8664..d54ec7d0e5 100644 --- a/cpus.c +++ b/cpus.c @@ -774,7 +774,7 @@ static void qemu_kvm_wait_io_event(CPUState *env) static int qemu_cpu_exec(CPUState *env); -static void *kvm_cpu_thread_fn(void *arg) +static void *qemu_kvm_cpu_thread_fn(void *arg) { CPUState *env = arg; int r; @@ -807,7 +807,7 @@ static void *kvm_cpu_thread_fn(void *arg) return NULL; } -static void *tcg_cpu_thread_fn(void *arg) +static void *qemu_tcg_cpu_thread_fn(void *arg) { CPUState *env = arg; @@ -926,7 +926,7 @@ void resume_all_vcpus(void) } } -static void tcg_init_vcpu(void *_env) +static void qemu_tcg_init_vcpu(void *_env) { CPUState *env = _env; /* share a single thread for all cpus with TCG */ @@ -934,7 +934,7 @@ static void tcg_init_vcpu(void *_env) env->thread = qemu_mallocz(sizeof(QemuThread)); env->halt_cond = qemu_mallocz(sizeof(QemuCond)); qemu_cond_init(env->halt_cond); - qemu_thread_create(env->thread, tcg_cpu_thread_fn, env); + qemu_thread_create(env->thread, qemu_tcg_cpu_thread_fn, env); while (env->created == 0) qemu_cond_timedwait(&qemu_cpu_cond, &qemu_global_mutex, 100); tcg_cpu_thread = env->thread; @@ -945,12 +945,12 @@ static void tcg_init_vcpu(void *_env) } } -static void kvm_start_vcpu(CPUState *env) +static void qemu_kvm_start_vcpu(CPUState *env) { env->thread = qemu_mallocz(sizeof(QemuThread)); env->halt_cond = qemu_mallocz(sizeof(QemuCond)); qemu_cond_init(env->halt_cond); - qemu_thread_create(env->thread, kvm_cpu_thread_fn, env); + qemu_thread_create(env->thread, qemu_kvm_cpu_thread_fn, env); while (env->created == 0) qemu_cond_timedwait(&qemu_cpu_cond, &qemu_global_mutex, 100); } @@ -962,9 +962,9 @@ void qemu_init_vcpu(void *_env) env->nr_cores = smp_cores; env->nr_threads = smp_threads; if (kvm_enabled()) - kvm_start_vcpu(env); + qemu_kvm_start_vcpu(env); else - tcg_init_vcpu(env); + qemu_tcg_init_vcpu(env); } void qemu_notify_event(void) From 164003228347eb0c27ab6c0e80e2753df55cdbc5 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 9 Feb 2011 16:29:37 +0100 Subject: [PATCH 24/37] Refactor cpu_has_work/any_cpu_has_work in cpus.c Avoid duplicate use of the function name cpu_has_work, it's confusing, also their scope. Refactor cpu_has_work to cpu_thread_is_idle and do the same with any_cpu_has_work. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- cpus.c | 43 +++++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 20 deletions(-) diff --git a/cpus.c b/cpus.c index d54ec7d0e5..e963208366 100644 --- a/cpus.c +++ b/cpus.c @@ -137,29 +137,30 @@ static int cpu_can_run(CPUState *env) return 1; } -static int cpu_has_work(CPUState *env) +static bool cpu_thread_is_idle(CPUState *env) { - if (env->stop) - return 1; - if (env->queued_work_first) - return 1; - if (env->stopped || !vm_running) - return 0; - if (!env->halted) - return 1; - if (qemu_cpu_has_work(env)) - return 1; - return 0; + if (env->stop || env->queued_work_first) { + return false; + } + if (env->stopped || !vm_running) { + return true; + } + if (!env->halted || qemu_cpu_has_work(env)) { + return false; + } + return true; } -static int any_cpu_has_work(void) +static bool all_cpu_threads_idle(void) { CPUState *env; - for (env = first_cpu; env != NULL; env = env->next_cpu) - if (cpu_has_work(env)) - return 1; - return 0; + for (env = first_cpu; env != NULL; env = env->next_cpu) { + if (!cpu_thread_is_idle(env)) { + return false; + } + } + return true; } static void cpu_debug_handler(CPUState *env) @@ -743,8 +744,9 @@ static void qemu_tcg_wait_io_event(void) { CPUState *env; - while (!any_cpu_has_work()) + while (all_cpu_threads_idle()) { qemu_cond_timedwait(tcg_halt_cond, &qemu_global_mutex, 1000); + } qemu_mutex_unlock(&qemu_global_mutex); @@ -765,8 +767,9 @@ static void qemu_tcg_wait_io_event(void) static void qemu_kvm_wait_io_event(CPUState *env) { - while (!cpu_has_work(env)) + while (cpu_thread_is_idle(env)) { qemu_cond_timedwait(env->halt_cond, &qemu_global_mutex, 1000); + } qemu_kvm_eat_signals(env); qemu_wait_io_event_common(env); @@ -1070,7 +1073,7 @@ bool cpu_exec_all(void) } } exit_request = 0; - return any_cpu_has_work(); + return !all_cpu_threads_idle(); } void set_numa_modes(void) From 0ab07c623c629acfbc792e5a174129c19faefbb7 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 7 Feb 2011 12:19:14 +0100 Subject: [PATCH 25/37] Fix a few coding style violations in cpus.c No functional changes. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- cpus.c | 71 ++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 44 insertions(+), 27 deletions(-) diff --git a/cpus.c b/cpus.c index e963208366..802d15a977 100644 --- a/cpus.c +++ b/cpus.c @@ -130,10 +130,12 @@ static void do_vm_stop(int reason) static int cpu_can_run(CPUState *env) { - if (env->stop) + if (env->stop) { return 0; - if (env->stopped || !vm_running) + } + if (env->stopped || !vm_running) { return 0; + } return 1; } @@ -225,9 +227,9 @@ static void qemu_event_increment(void) static const uint64_t val = 1; ssize_t ret; - if (io_thread_fd == -1) + if (io_thread_fd == -1) { return; - + } do { ret = write(io_thread_fd, &val, sizeof(val)); } while (ret < 0 && errno == EINTR); @@ -258,17 +260,17 @@ static int qemu_event_init(void) int fds[2]; err = qemu_eventfd(fds); - if (err == -1) + if (err == -1) { return -errno; - + } err = fcntl_setfl(fds[0], O_NONBLOCK); - if (err < 0) + if (err < 0) { goto fail; - + } err = fcntl_setfl(fds[1], O_NONBLOCK); - if (err < 0) + if (err < 0) { goto fail; - + } qemu_set_fd_handler2(fds[0], NULL, qemu_event_read, NULL, (void *)(unsigned long)fds[0]); @@ -527,7 +529,6 @@ void pause_all_vcpus(void) void qemu_cpu_kick(void *env) { - return; } void qemu_cpu_kick_self(void) @@ -660,13 +661,15 @@ int qemu_init_main_loop(void) blocked_signals = block_io_signals(); ret = qemu_signalfd_init(blocked_signals); - if (ret) + if (ret) { return ret; + } /* Note eventfd must be drained before signalfd handlers run */ ret = qemu_event_init(); - if (ret) + if (ret) { return ret; + } qemu_cond_init(&qemu_pause_cond); qemu_cond_init(&qemu_system_cond); @@ -696,10 +699,11 @@ void run_on_cpu(CPUState *env, void (*func)(void *data), void *data) wi.func = func; wi.data = data; - if (!env->queued_work_first) + if (!env->queued_work_first) { env->queued_work_first = &wi; - else + } else { env->queued_work_last->next = &wi; + } env->queued_work_last = &wi; wi.next = NULL; wi.done = false; @@ -717,8 +721,9 @@ static void flush_queued_work(CPUState *env) { struct qemu_work_item *wi; - if (!env->queued_work_first) + if (!env->queued_work_first) { return; + } while ((wi = env->queued_work_first)) { env->queued_work_first = wi->next; @@ -798,12 +803,14 @@ static void *qemu_kvm_cpu_thread_fn(void *arg) qemu_cond_signal(&qemu_cpu_cond); /* and wait for machine initialization */ - while (!qemu_system_ready) + while (!qemu_system_ready) { qemu_cond_timedwait(&qemu_system_cond, &qemu_global_mutex, 100); + } while (1) { - if (cpu_can_run(env)) + if (cpu_can_run(env)) { qemu_cpu_exec(env); + } qemu_kvm_wait_io_event(env); } @@ -819,13 +826,15 @@ static void *qemu_tcg_cpu_thread_fn(void *arg) /* signal CPU creation */ qemu_mutex_lock(&qemu_global_mutex); - for (env = first_cpu; env != NULL; env = env->next_cpu) + for (env = first_cpu; env != NULL; env = env->next_cpu) { env->created = 1; + } qemu_cond_signal(&qemu_cpu_cond); /* and wait for machine initialization */ - while (!qemu_system_ready) + while (!qemu_system_ready) { qemu_cond_timedwait(&qemu_system_cond, &qemu_global_mutex, 100); + } while (1) { cpu_exec_all(); @@ -838,6 +847,7 @@ static void *qemu_tcg_cpu_thread_fn(void *arg) void qemu_cpu_kick(void *_env) { CPUState *env = _env; + qemu_cond_broadcast(env->halt_cond); if (!env->thread_kicked) { qemu_thread_signal(env->thread, SIG_IPI); @@ -889,8 +899,9 @@ static int all_vcpus_paused(void) CPUState *penv = first_cpu; while (penv) { - if (!penv->stopped) + if (!penv->stopped) { return 0; + } penv = (CPUState *)penv->next_cpu; } @@ -932,14 +943,16 @@ void resume_all_vcpus(void) static void qemu_tcg_init_vcpu(void *_env) { CPUState *env = _env; + /* share a single thread for all cpus with TCG */ if (!tcg_cpu_thread) { env->thread = qemu_mallocz(sizeof(QemuThread)); env->halt_cond = qemu_mallocz(sizeof(QemuCond)); qemu_cond_init(env->halt_cond); qemu_thread_create(env->thread, qemu_tcg_cpu_thread_fn, env); - while (env->created == 0) + while (env->created == 0) { qemu_cond_timedwait(&qemu_cpu_cond, &qemu_global_mutex, 100); + } tcg_cpu_thread = env->thread; tcg_halt_cond = env->halt_cond; } else { @@ -954,8 +967,9 @@ static void qemu_kvm_start_vcpu(CPUState *env) env->halt_cond = qemu_mallocz(sizeof(QemuCond)); qemu_cond_init(env->halt_cond); qemu_thread_create(env->thread, qemu_kvm_cpu_thread_fn, env); - while (env->created == 0) + while (env->created == 0) { qemu_cond_timedwait(&qemu_cpu_cond, &qemu_global_mutex, 100); + } } void qemu_init_vcpu(void *_env) @@ -964,10 +978,11 @@ void qemu_init_vcpu(void *_env) env->nr_cores = smp_cores; env->nr_threads = smp_threads; - if (kvm_enabled()) + if (kvm_enabled()) { qemu_kvm_start_vcpu(env); - else + } else { qemu_tcg_init_vcpu(env); + } } void qemu_notify_event(void) @@ -1050,16 +1065,18 @@ bool cpu_exec_all(void) { int r; - if (next_cpu == NULL) + if (next_cpu == NULL) { next_cpu = first_cpu; + } for (; next_cpu != NULL && !exit_request; next_cpu = next_cpu->next_cpu) { CPUState *env = next_cpu; qemu_clock_enable(vm_clock, (env->singlestep_enabled & SSTEP_NOTIMER) == 0); - if (qemu_alarm_pending()) + if (qemu_alarm_pending()) { break; + } if (cpu_can_run(env)) { r = qemu_cpu_exec(env); if (kvm_enabled()) { From e07bbac542d45cb246f393f343eb3b867fed4de1 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Wed, 9 Feb 2011 16:29:40 +0100 Subject: [PATCH 26/37] Improve vm_stop reason declarations Define and use dedicated constants for vm_stop reasons, they actually have nothing to do with the EXCP_* defines used so far. At this chance, specify more detailed reasons so that VM state change handlers can evaluate them. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- cpus.c | 4 ++-- gdbstub.c | 19 ++++++++++--------- hw/ide/core.c | 2 +- hw/scsi-disk.c | 2 +- hw/virtio-blk.c | 2 +- hw/watchdog.c | 2 +- kvm-all.c | 2 +- migration.c | 2 +- monitor.c | 4 ++-- savevm.c | 4 ++-- sysemu.h | 10 ++++++++++ vl.c | 2 +- 12 files changed, 33 insertions(+), 22 deletions(-) diff --git a/cpus.c b/cpus.c index 802d15a977..ca1f01d9da 100644 --- a/cpus.c +++ b/cpus.c @@ -168,8 +168,8 @@ static bool all_cpu_threads_idle(void) static void cpu_debug_handler(CPUState *env) { gdb_set_stop_cpu(env); - debug_requested = EXCP_DEBUG; - vm_stop(EXCP_DEBUG); + debug_requested = VMSTOP_DEBUG; + vm_stop(VMSTOP_DEBUG); } #ifdef CONFIG_LINUX diff --git a/gdbstub.c b/gdbstub.c index d6556c9a2f..ed51a8a5bb 100644 --- a/gdbstub.c +++ b/gdbstub.c @@ -2194,14 +2194,14 @@ static void gdb_vm_state_change(void *opaque, int running, int reason) const char *type; int ret; - if (running || (reason != EXCP_DEBUG && reason != EXCP_INTERRUPT) || - s->state == RS_INACTIVE || s->state == RS_SYSCALL) + if (running || (reason != VMSTOP_DEBUG && reason != VMSTOP_USER) || + s->state == RS_INACTIVE || s->state == RS_SYSCALL) { return; - + } /* disable single step if it was enable */ cpu_single_step(env, 0); - if (reason == EXCP_DEBUG) { + if (reason == VMSTOP_DEBUG) { if (env->watchpoint_hit) { switch (env->watchpoint_hit->flags & BP_MEM_ACCESS) { case BP_MEM_READ: @@ -2252,7 +2252,7 @@ void gdb_do_syscall(gdb_syscall_complete_cb cb, const char *fmt, ...) gdb_current_syscall_cb = cb; s->state = RS_SYSCALL; #ifndef CONFIG_USER_ONLY - vm_stop(EXCP_DEBUG); + vm_stop(VMSTOP_DEBUG); #endif s->state = RS_IDLE; va_start(va, fmt); @@ -2326,7 +2326,7 @@ static void gdb_read_byte(GDBState *s, int ch) if (vm_running) { /* when the CPU is running, we cannot do anything except stop it when receiving a char */ - vm_stop(EXCP_INTERRUPT); + vm_stop(VMSTOP_USER); } else #endif { @@ -2588,7 +2588,7 @@ static void gdb_chr_event(void *opaque, int event) { switch (event) { case CHR_EVENT_OPENED: - vm_stop(EXCP_INTERRUPT); + vm_stop(VMSTOP_USER); gdb_has_xml = 0; break; default: @@ -2628,8 +2628,9 @@ static int gdb_monitor_write(CharDriverState *chr, const uint8_t *buf, int len) #ifndef _WIN32 static void gdb_sigterm_handler(int signal) { - if (vm_running) - vm_stop(EXCP_INTERRUPT); + if (vm_running) { + vm_stop(VMSTOP_USER); + } } #endif diff --git a/hw/ide/core.c b/hw/ide/core.c index dd63664c0d..9c91a49767 100644 --- a/hw/ide/core.c +++ b/hw/ide/core.c @@ -465,7 +465,7 @@ static int ide_handle_rw_error(IDEState *s, int error, int op) s->bus->dma->ops->set_unit(s->bus->dma, s->unit); s->bus->dma->ops->add_status(s->bus->dma, op); bdrv_mon_event(s->bs, BDRV_ACTION_STOP, is_read); - vm_stop(0); + vm_stop(VMSTOP_DISKFULL); } else { if (op & BM_STATUS_DMA_RETRY) { dma_buf_commit(s, 0); diff --git a/hw/scsi-disk.c b/hw/scsi-disk.c index 488eedd2cd..b05e6547df 100644 --- a/hw/scsi-disk.c +++ b/hw/scsi-disk.c @@ -239,7 +239,7 @@ static int scsi_handle_rw_error(SCSIDiskReq *r, int error, int type) r->status |= SCSI_REQ_STATUS_RETRY | type; bdrv_mon_event(s->bs, BDRV_ACTION_STOP, is_read); - vm_stop(0); + vm_stop(VMSTOP_DISKFULL); } else { if (type == SCSI_REQ_STATUS_RETRY_READ) { r->req.bus->complete(r->req.bus, SCSI_REASON_DATA, r->req.tag, 0); diff --git a/hw/virtio-blk.c b/hw/virtio-blk.c index ffac5a4d8f..b14fb995e8 100644 --- a/hw/virtio-blk.c +++ b/hw/virtio-blk.c @@ -78,7 +78,7 @@ static int virtio_blk_handle_rw_error(VirtIOBlockReq *req, int error, req->next = s->rq; s->rq = req; bdrv_mon_event(s->bs, BDRV_ACTION_STOP, is_read); - vm_stop(0); + vm_stop(VMSTOP_DISKFULL); } else { virtio_blk_req_complete(req, VIRTIO_BLK_S_IOERR); bdrv_mon_event(s->bs, BDRV_ACTION_REPORT, is_read); diff --git a/hw/watchdog.c b/hw/watchdog.c index e9dd56e229..1c900a1189 100644 --- a/hw/watchdog.c +++ b/hw/watchdog.c @@ -132,7 +132,7 @@ void watchdog_perform_action(void) case WDT_PAUSE: /* same as 'stop' command in monitor */ watchdog_mon_event("pause"); - vm_stop(0); + vm_stop(VMSTOP_WATCHDOG); break; case WDT_DEBUG: diff --git a/kvm-all.c b/kvm-all.c index 42dfed8f16..19cf1884cf 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -994,7 +994,7 @@ int kvm_cpu_exec(CPUState *env) if (ret < 0) { cpu_dump_state(env, stderr, fprintf, CPU_DUMP_CODE); - vm_stop(0); + vm_stop(VMSTOP_PANIC); env->exit_request = 1; } if (env->exit_request) { diff --git a/migration.c b/migration.c index 36125720a4..af3a1f2702 100644 --- a/migration.c +++ b/migration.c @@ -378,7 +378,7 @@ void migrate_fd_put_ready(void *opaque) int old_vm_running = vm_running; DPRINTF("done iterating\n"); - vm_stop(0); + vm_stop(VMSTOP_MIGRATE); if ((qemu_savevm_state_complete(s->mon, s->file)) < 0) { if (old_vm_running) { diff --git a/monitor.c b/monitor.c index 7fc311d720..22ae3bbff0 100644 --- a/monitor.c +++ b/monitor.c @@ -1255,7 +1255,7 @@ static void do_singlestep(Monitor *mon, const QDict *qdict) */ static int do_stop(Monitor *mon, const QDict *qdict, QObject **ret_data) { - vm_stop(EXCP_INTERRUPT); + vm_stop(VMSTOP_USER); return 0; } @@ -2783,7 +2783,7 @@ static void do_loadvm(Monitor *mon, const QDict *qdict) int saved_vm_running = vm_running; const char *name = qdict_get_str(qdict, "name"); - vm_stop(0); + vm_stop(VMSTOP_LOADVM); if (load_vmstate(name) == 0 && saved_vm_running) { vm_start(); diff --git a/savevm.c b/savevm.c index 6d83b0f4dc..a50fd31154 100644 --- a/savevm.c +++ b/savevm.c @@ -1575,7 +1575,7 @@ static int qemu_savevm_state(Monitor *mon, QEMUFile *f) int ret; saved_vm_running = vm_running; - vm_stop(0); + vm_stop(VMSTOP_SAVEVM); if (qemu_savevm_state_blocked(mon)) { ret = -EINVAL; @@ -1904,7 +1904,7 @@ void do_savevm(Monitor *mon, const QDict *qdict) } saved_vm_running = vm_running; - vm_stop(0); + vm_stop(VMSTOP_SAVEVM); memset(sn, 0, sizeof(*sn)); diff --git a/sysemu.h b/sysemu.h index 23ae17e2e9..0628d3d821 100644 --- a/sysemu.h +++ b/sysemu.h @@ -37,6 +37,16 @@ VMChangeStateEntry *qemu_add_vm_change_state_handler(VMChangeStateHandler *cb, void *opaque); void qemu_del_vm_change_state_handler(VMChangeStateEntry *e); +#define VMSTOP_USER 0 +#define VMSTOP_DEBUG 1 +#define VMSTOP_SHUTDOWN 2 +#define VMSTOP_DISKFULL 3 +#define VMSTOP_WATCHDOG 4 +#define VMSTOP_PANIC 5 +#define VMSTOP_SAVEVM 6 +#define VMSTOP_LOADVM 7 +#define VMSTOP_MIGRATE 8 + void vm_start(void); void vm_stop(int reason); diff --git a/vl.c b/vl.c index c9fa266157..6d2d1d344f 100644 --- a/vl.c +++ b/vl.c @@ -1433,7 +1433,7 @@ static void main_loop(void) if (qemu_shutdown_requested()) { monitor_protocol_event(QEVENT_SHUTDOWN, NULL); if (no_shutdown) { - vm_stop(0); + vm_stop(VMSTOP_SHUTDOWN); no_shutdown = 0; } else break; From 8cf71710f068f9c50ce420b1dd4ef71c2f9b2a8d Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 7 Feb 2011 12:19:16 +0100 Subject: [PATCH 27/37] Refactor debug and vmstop request interface Instead of fiddling with debug_requested and vmstop_requested directly, introduce qemu_system_debug_request and turn qemu_system_vmstop_request into a public interface. This aligns those services with exiting ones in vl.c. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- cpus.c | 9 +-------- cpus.h | 2 -- sysemu.h | 2 ++ vl.c | 20 ++++++++++++++++---- 4 files changed, 19 insertions(+), 14 deletions(-) diff --git a/cpus.c b/cpus.c index ca1f01d9da..97a6d4f5b9 100644 --- a/cpus.c +++ b/cpus.c @@ -168,8 +168,7 @@ static bool all_cpu_threads_idle(void) static void cpu_debug_handler(CPUState *env) { gdb_set_stop_cpu(env); - debug_requested = VMSTOP_DEBUG; - vm_stop(VMSTOP_DEBUG); + qemu_system_debug_request(); } #ifdef CONFIG_LINUX @@ -990,12 +989,6 @@ void qemu_notify_event(void) qemu_event_increment(); } -static void qemu_system_vmstop_request(int reason) -{ - vmstop_requested = reason; - qemu_notify_event(); -} - void cpu_stop_current(void) { if (cpu_single_env) { diff --git a/cpus.h b/cpus.h index 4cadb641c0..e0211260c3 100644 --- a/cpus.h +++ b/cpus.h @@ -11,8 +11,6 @@ void cpu_stop_current(void); /* vl.c */ extern int smp_cores; extern int smp_threads; -extern int debug_requested; -extern int vmstop_requested; void vm_state_notify(int running, int reason); bool cpu_exec_all(void); void set_numa_modes(void); diff --git a/sysemu.h b/sysemu.h index 0628d3d821..0a83ab9e56 100644 --- a/sysemu.h +++ b/sysemu.h @@ -61,6 +61,8 @@ void cpu_disable_ticks(void); void qemu_system_reset_request(void); void qemu_system_shutdown_request(void); void qemu_system_powerdown_request(void); +void qemu_system_debug_request(void); +void qemu_system_vmstop_request(int reason); int qemu_shutdown_requested(void); int qemu_reset_requested(void); int qemu_powerdown_requested(void); diff --git a/vl.c b/vl.c index 6d2d1d344f..eebe6843f7 100644 --- a/vl.c +++ b/vl.c @@ -1217,8 +1217,8 @@ static QTAILQ_HEAD(reset_handlers, QEMUResetEntry) reset_handlers = static int reset_requested; static int shutdown_requested; static int powerdown_requested; -int debug_requested; -int vmstop_requested; +static int debug_requested; +static int vmstop_requested; int qemu_shutdown_requested(void) { @@ -1312,6 +1312,18 @@ void qemu_system_powerdown_request(void) qemu_notify_event(); } +void qemu_system_debug_request(void) +{ + debug_requested = 1; + vm_stop(VMSTOP_DEBUG); +} + +void qemu_system_vmstop_request(int reason) +{ + vmstop_requested = reason; + qemu_notify_event(); +} + void main_loop_wait(int nonblocking) { IOHandlerRecord *ioh; @@ -1427,8 +1439,8 @@ static void main_loop(void) dev_time += profile_getclock() - ti; #endif - if ((r = qemu_debug_requested())) { - vm_stop(r); + if (qemu_debug_requested()) { + vm_stop(VMSTOP_DEBUG); } if (qemu_shutdown_requested()) { monitor_protocol_event(QEVENT_SHUTDOWN, NULL); From 83f338f73ecb88cc6f85d6e7b81ebef112ce07be Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 7 Feb 2011 12:19:17 +0100 Subject: [PATCH 28/37] Move debug exception handling out of cpu_exec To prepare splitting up KVM and TCG CPU entry/exit, move the debug exception into cpus.c and invoke cpu_handle_debug_exception on return from qemu_cpu_exec. This also allows to clean up the debug request signaling: We can assign the job of informing main-loop to qemu_system_debug_request and stop the calling cpu directly in cpu_handle_debug_exception. That means a debug stop will now only be signaled via debug_requested and not additionally via vmstop_requested. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- cpu-exec.c | 24 ------------------------ cpus.c | 35 ++++++++++++++++++++++++++++++----- vl.c | 2 +- 3 files changed, 31 insertions(+), 30 deletions(-) diff --git a/cpu-exec.c b/cpu-exec.c index 8c9fb8b1a2..9c0b10d1db 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -196,28 +196,6 @@ static inline TranslationBlock *tb_find_fast(void) return tb; } -static CPUDebugExcpHandler *debug_excp_handler; - -CPUDebugExcpHandler *cpu_set_debug_excp_handler(CPUDebugExcpHandler *handler) -{ - CPUDebugExcpHandler *old_handler = debug_excp_handler; - - debug_excp_handler = handler; - return old_handler; -} - -static void cpu_handle_debug_exception(CPUState *env) -{ - CPUWatchpoint *wp; - - if (!env->watchpoint_hit) - QTAILQ_FOREACH(wp, &env->watchpoints, entry) - wp->flags &= ~BP_WATCHPOINT_HIT; - - if (debug_excp_handler) - debug_excp_handler(env); -} - /* main execution loop */ volatile sig_atomic_t exit_request; @@ -287,8 +265,6 @@ int cpu_exec(CPUState *env1) if (env->exception_index >= EXCP_INTERRUPT) { /* exit request from the cpu execution loop */ ret = env->exception_index; - if (ret == EXCP_DEBUG) - cpu_handle_debug_exception(env); break; } else { #if defined(CONFIG_USER_ONLY) diff --git a/cpus.c b/cpus.c index 97a6d4f5b9..c7e86c29f4 100644 --- a/cpus.c +++ b/cpus.c @@ -165,10 +165,34 @@ static bool all_cpu_threads_idle(void) return true; } -static void cpu_debug_handler(CPUState *env) +static CPUDebugExcpHandler *debug_excp_handler; + +CPUDebugExcpHandler *cpu_set_debug_excp_handler(CPUDebugExcpHandler *handler) { + CPUDebugExcpHandler *old_handler = debug_excp_handler; + + debug_excp_handler = handler; + return old_handler; +} + +static void cpu_handle_debug_exception(CPUState *env) +{ + CPUWatchpoint *wp; + + if (!env->watchpoint_hit) { + QTAILQ_FOREACH(wp, &env->watchpoints, entry) { + wp->flags &= ~BP_WATCHPOINT_HIT; + } + } + if (debug_excp_handler) { + debug_excp_handler(env); + } + gdb_set_stop_cpu(env); qemu_system_debug_request(); +#ifdef CONFIG_IOTHREAD + env->stopped = 1; +#endif } #ifdef CONFIG_LINUX @@ -479,7 +503,6 @@ int qemu_init_main_loop(void) return ret; } #endif - cpu_set_debug_excp_handler(cpu_debug_handler); qemu_init_sigbus(); @@ -653,8 +676,6 @@ int qemu_init_main_loop(void) int ret; sigset_t blocked_signals; - cpu_set_debug_excp_handler(cpu_debug_handler); - qemu_init_sigbus(); blocked_signals = block_io_signals(); @@ -808,7 +829,10 @@ static void *qemu_kvm_cpu_thread_fn(void *arg) while (1) { if (cpu_can_run(env)) { - qemu_cpu_exec(env); + r = qemu_cpu_exec(env); + if (r == EXCP_DEBUG) { + cpu_handle_debug_exception(env); + } } qemu_kvm_wait_io_event(env); } @@ -1076,6 +1100,7 @@ bool cpu_exec_all(void) qemu_kvm_eat_signals(env); } if (r == EXCP_DEBUG) { + cpu_handle_debug_exception(env); break; } } else if (env->stop) { diff --git a/vl.c b/vl.c index eebe6843f7..b436952d6c 100644 --- a/vl.c +++ b/vl.c @@ -1315,7 +1315,7 @@ void qemu_system_powerdown_request(void) void qemu_system_debug_request(void) { debug_requested = 1; - vm_stop(VMSTOP_DEBUG); + qemu_notify_event(); } void qemu_system_vmstop_request(int reason) From 6792a57bf19ab37f61f5acf0f8e3003cf08814af Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 7 Feb 2011 12:19:18 +0100 Subject: [PATCH 29/37] kvm: Separate TCG from KVM cpu execution Mixing up TCG bits with KVM already led to problems around eflags emulation on x86. Moreover, quite some code that TCG requires on cpu enty/exit is useless for KVM. So dispatch between tcg_cpu_exec and kvm_cpu_exec as early as possible. The core logic of cpu_halted from cpu_exec is added to kvm_arch_process_irqchip_events. Moving away from cpu_exec makes exception_index meaningless for KVM, we can simply pass the exit reason directly (only "EXCP_DEBUG vs. rest" is relevant). Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- cpu-exec.c | 19 ++++++------------- cpus.c | 10 +++++----- kvm-all.c | 19 +++++++++---------- target-i386/kvm.c | 6 +++--- 4 files changed, 23 insertions(+), 31 deletions(-) diff --git a/cpu-exec.c b/cpu-exec.c index 9c0b10d1db..b03b3a749a 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -226,13 +226,11 @@ int cpu_exec(CPUState *env1) } #if defined(TARGET_I386) - if (!kvm_enabled()) { - /* put eflags in CPU temporary format */ - CC_SRC = env->eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C); - DF = 1 - (2 * ((env->eflags >> 10) & 1)); - CC_OP = CC_OP_EFLAGS; - env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C); - } + /* put eflags in CPU temporary format */ + CC_SRC = env->eflags & (CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C); + DF = 1 - (2 * ((env->eflags >> 10) & 1)); + CC_OP = CC_OP_EFLAGS; + env->eflags &= ~(DF_MASK | CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C); #elif defined(TARGET_SPARC) #elif defined(TARGET_M68K) env->cc_op = CC_OP_FLAGS; @@ -257,7 +255,7 @@ int cpu_exec(CPUState *env1) if (setjmp(env->jmp_env) == 0) { #if defined(__sparc__) && !defined(CONFIG_SOLARIS) #undef env - env = cpu_single_env; + env = cpu_single_env; #define env cpu_single_env #endif /* if an exception is pending, we execute it here */ @@ -316,11 +314,6 @@ int cpu_exec(CPUState *env1) } } - if (kvm_enabled()) { - kvm_cpu_exec(env); - longjmp(env->jmp_env, 1); - } - next_tb = 0; /* force lookup of first TB */ for(;;) { interrupt_request = env->interrupt_request; diff --git a/cpus.c b/cpus.c index c7e86c29f4..468544cc3a 100644 --- a/cpus.c +++ b/cpus.c @@ -800,8 +800,6 @@ static void qemu_kvm_wait_io_event(CPUState *env) qemu_wait_io_event_common(env); } -static int qemu_cpu_exec(CPUState *env); - static void *qemu_kvm_cpu_thread_fn(void *arg) { CPUState *env = arg; @@ -829,7 +827,7 @@ static void *qemu_kvm_cpu_thread_fn(void *arg) while (1) { if (cpu_can_run(env)) { - r = qemu_cpu_exec(env); + r = kvm_cpu_exec(env); if (r == EXCP_DEBUG) { cpu_handle_debug_exception(env); } @@ -1040,7 +1038,7 @@ void vm_stop(int reason) #endif -static int qemu_cpu_exec(CPUState *env) +static int tcg_cpu_exec(CPUState *env) { int ret; #ifdef CONFIG_PROFILER @@ -1095,9 +1093,11 @@ bool cpu_exec_all(void) break; } if (cpu_can_run(env)) { - r = qemu_cpu_exec(env); if (kvm_enabled()) { + r = kvm_cpu_exec(env); qemu_kvm_eat_signals(env); + } else { + r = tcg_cpu_exec(env); } if (r == EXCP_DEBUG) { cpu_handle_debug_exception(env); diff --git a/kvm-all.c b/kvm-all.c index 19cf1884cf..802c6b8369 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -895,10 +895,11 @@ int kvm_cpu_exec(CPUState *env) if (kvm_arch_process_irqchip_events(env)) { env->exit_request = 0; - env->exception_index = EXCP_HLT; - return 0; + return EXCP_HLT; } + cpu_single_env = env; + do { if (env->kvm_vcpu_dirty) { kvm_arch_put_registers(env, KVM_PUT_RUNTIME_STATE); @@ -927,7 +928,6 @@ int kvm_cpu_exec(CPUState *env) kvm_flush_coalesced_mmio_buffer(); if (ret == -EINTR || ret == -EAGAIN) { - cpu_exit(env); DPRINTF("io window exit\n"); ret = 0; break; @@ -978,8 +978,8 @@ int kvm_cpu_exec(CPUState *env) DPRINTF("kvm_exit_debug\n"); #ifdef KVM_CAP_SET_GUEST_DEBUG if (kvm_arch_debug(&run->debug.arch)) { - env->exception_index = EXCP_DEBUG; - return 0; + ret = EXCP_DEBUG; + goto out; } /* re-enter, this exception was guest-internal */ ret = 1; @@ -995,13 +995,12 @@ int kvm_cpu_exec(CPUState *env) if (ret < 0) { cpu_dump_state(env, stderr, fprintf, CPU_DUMP_CODE); vm_stop(VMSTOP_PANIC); - env->exit_request = 1; - } - if (env->exit_request) { - env->exit_request = 0; - env->exception_index = EXCP_INTERRUPT; } + ret = EXCP_INTERRUPT; +out: + env->exit_request = 0; + cpu_single_env = NULL; return ret; } diff --git a/target-i386/kvm.c b/target-i386/kvm.c index ba183c4159..377a0a352b 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1502,12 +1502,13 @@ int kvm_arch_post_run(CPUState *env, struct kvm_run *run) int kvm_arch_process_irqchip_events(CPUState *env) { + if (env->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI)) { + env->halted = 0; + } if (env->interrupt_request & CPU_INTERRUPT_INIT) { kvm_cpu_synchronize_state(env); do_cpu_init(env); - env->exception_index = EXCP_HALTED; } - if (env->interrupt_request & CPU_INTERRUPT_SIPI) { kvm_cpu_synchronize_state(env); do_cpu_sipi(env); @@ -1522,7 +1523,6 @@ static int kvm_handle_halt(CPUState *env) (env->eflags & IF_MASK)) && !(env->interrupt_request & CPU_INTERRUPT_NMI)) { env->halted = 1; - env->exception_index = EXCP_HLT; return 0; } From db1669bcca7cc67d7301394d7d59dffb4c0fc9b0 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 7 Feb 2011 12:19:19 +0100 Subject: [PATCH 30/37] kvm: x86: Prepare VCPU loop for in-kernel irqchip Effectively no functional change yet as kvm_irqchip_in_kernel still only returns 0, but this patch will allow qemu-kvm to adopt the VCPU loop of upsteam KVM. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- target-i386/kvm.c | 69 ++++++++++++++++++++++++++--------------------- 1 file changed, 38 insertions(+), 31 deletions(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 377a0a352b..f02f478e30 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1442,11 +1442,6 @@ int kvm_arch_get_registers(CPUState *env) int kvm_arch_pre_run(CPUState *env, struct kvm_run *run) { - /* Force the VCPU out of its inner loop to process the INIT request */ - if (env->interrupt_request & CPU_INTERRUPT_INIT) { - env->exit_request = 1; - } - /* Inject NMI */ if (env->interrupt_request & CPU_INTERRUPT_NMI) { env->interrupt_request &= ~CPU_INTERRUPT_NMI; @@ -1454,35 +1449,43 @@ int kvm_arch_pre_run(CPUState *env, struct kvm_run *run) kvm_vcpu_ioctl(env, KVM_NMI); } - /* Try to inject an interrupt if the guest can accept it */ - if (run->ready_for_interrupt_injection && - (env->interrupt_request & CPU_INTERRUPT_HARD) && - (env->eflags & IF_MASK)) { - int irq; - - env->interrupt_request &= ~CPU_INTERRUPT_HARD; - irq = cpu_get_pic_interrupt(env); - if (irq >= 0) { - struct kvm_interrupt intr; - intr.irq = irq; - /* FIXME: errors */ - DPRINTF("injected interrupt %d\n", irq); - kvm_vcpu_ioctl(env, KVM_INTERRUPT, &intr); + if (!kvm_irqchip_in_kernel()) { + /* Force the VCPU out of its inner loop to process the INIT request */ + if (env->interrupt_request & CPU_INTERRUPT_INIT) { + env->exit_request = 1; } - } - /* If we have an interrupt but the guest is not ready to receive an - * interrupt, request an interrupt window exit. This will - * cause a return to userspace as soon as the guest is ready to - * receive interrupts. */ - if ((env->interrupt_request & CPU_INTERRUPT_HARD)) { - run->request_interrupt_window = 1; - } else { - run->request_interrupt_window = 0; - } + /* Try to inject an interrupt if the guest can accept it */ + if (run->ready_for_interrupt_injection && + (env->interrupt_request & CPU_INTERRUPT_HARD) && + (env->eflags & IF_MASK)) { + int irq; - DPRINTF("setting tpr\n"); - run->cr8 = cpu_get_apic_tpr(env->apic_state); + env->interrupt_request &= ~CPU_INTERRUPT_HARD; + irq = cpu_get_pic_interrupt(env); + if (irq >= 0) { + struct kvm_interrupt intr; + + intr.irq = irq; + /* FIXME: errors */ + DPRINTF("injected interrupt %d\n", irq); + kvm_vcpu_ioctl(env, KVM_INTERRUPT, &intr); + } + } + + /* If we have an interrupt but the guest is not ready to receive an + * interrupt, request an interrupt window exit. This will + * cause a return to userspace as soon as the guest is ready to + * receive interrupts. */ + if ((env->interrupt_request & CPU_INTERRUPT_HARD)) { + run->request_interrupt_window = 1; + } else { + run->request_interrupt_window = 0; + } + + DPRINTF("setting tpr\n"); + run->cr8 = cpu_get_apic_tpr(env->apic_state); + } return 0; } @@ -1502,6 +1505,10 @@ int kvm_arch_post_run(CPUState *env, struct kvm_run *run) int kvm_arch_process_irqchip_events(CPUState *env) { + if (kvm_irqchip_in_kernel()) { + return 0; + } + if (env->interrupt_request & (CPU_INTERRUPT_HARD | CPU_INTERRUPT_NMI)) { env->halted = 0; } From 7a39fe588251ba042c91bf23d53b0ba820bf964c Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 7 Feb 2011 12:19:20 +0100 Subject: [PATCH 31/37] kvm: Drop return values from kvm_arch_pre/post_run We do not check them, and the only arch with non-empty implementations always returns 0 (this is also true for qemu-kvm). Signed-off-by: Jan Kiszka CC: Alexander Graf Signed-off-by: Marcelo Tosatti --- kvm.h | 5 ++--- target-i386/kvm.c | 8 ++------ target-ppc/kvm.c | 6 ++---- target-s390x/kvm.c | 6 ++---- 4 files changed, 8 insertions(+), 17 deletions(-) diff --git a/kvm.h b/kvm.h index b2fb5c61e7..7a4d550898 100644 --- a/kvm.h +++ b/kvm.h @@ -99,12 +99,11 @@ int kvm_vcpu_ioctl(CPUState *env, int type, ...); extern const KVMCapabilityInfo kvm_arch_required_capabilities[]; -int kvm_arch_post_run(CPUState *env, struct kvm_run *run); +void kvm_arch_pre_run(CPUState *env, struct kvm_run *run); +void kvm_arch_post_run(CPUState *env, struct kvm_run *run); int kvm_arch_handle_exit(CPUState *env, struct kvm_run *run); -int kvm_arch_pre_run(CPUState *env, struct kvm_run *run); - int kvm_arch_process_irqchip_events(CPUState *env); int kvm_arch_get_registers(CPUState *env); diff --git a/target-i386/kvm.c b/target-i386/kvm.c index f02f478e30..8668cb3a97 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1440,7 +1440,7 @@ int kvm_arch_get_registers(CPUState *env) return 0; } -int kvm_arch_pre_run(CPUState *env, struct kvm_run *run) +void kvm_arch_pre_run(CPUState *env, struct kvm_run *run) { /* Inject NMI */ if (env->interrupt_request & CPU_INTERRUPT_NMI) { @@ -1486,11 +1486,9 @@ int kvm_arch_pre_run(CPUState *env, struct kvm_run *run) DPRINTF("setting tpr\n"); run->cr8 = cpu_get_apic_tpr(env->apic_state); } - - return 0; } -int kvm_arch_post_run(CPUState *env, struct kvm_run *run) +void kvm_arch_post_run(CPUState *env, struct kvm_run *run) { if (run->if_flag) { env->eflags |= IF_MASK; @@ -1499,8 +1497,6 @@ int kvm_arch_post_run(CPUState *env, struct kvm_run *run) } cpu_set_apic_tpr(env->apic_state, run->cr8); cpu_set_apic_base(env->apic_state, run->apic_base); - - return 0; } int kvm_arch_process_irqchip_events(CPUState *env) diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index 93ecc57470..bd4012a4ec 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -256,14 +256,12 @@ int kvm_arch_pre_run(CPUState *env, struct kvm_run *run) return 0; } -int kvm_arch_post_run(CPUState *env, struct kvm_run *run) +void kvm_arch_post_run(CPUState *env, struct kvm_run *run) { - return 0; } -int kvm_arch_process_irqchip_events(CPUState *env) +void kvm_arch_process_irqchip_events(CPUState *env) { - return 0; } static int kvmppc_handle_halt(CPUState *env) diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c index 1702c4601d..b349812dba 100644 --- a/target-s390x/kvm.c +++ b/target-s390x/kvm.c @@ -169,14 +169,12 @@ int kvm_arch_remove_sw_breakpoint(CPUState *env, struct kvm_sw_breakpoint *bp) return 0; } -int kvm_arch_pre_run(CPUState *env, struct kvm_run *run) +void kvm_arch_pre_run(CPUState *env, struct kvm_run *run) { - return 0; } -int kvm_arch_post_run(CPUState *env, struct kvm_run *run) +void kvm_arch_post_run(CPUState *env, struct kvm_run *run) { - return 0; } int kvm_arch_process_irqchip_events(CPUState *env) From ce377af399563195d066d5fee0c7b717967932ee Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 7 Feb 2011 12:19:21 +0100 Subject: [PATCH 32/37] kvm: x86: Catch and report failing IRQ and NMI injections We do not need to abort, but the user should be notified that weird things go on. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- target-i386/kvm.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 8668cb3a97..0aa0a410da 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1442,11 +1442,17 @@ int kvm_arch_get_registers(CPUState *env) void kvm_arch_pre_run(CPUState *env, struct kvm_run *run) { + int ret; + /* Inject NMI */ if (env->interrupt_request & CPU_INTERRUPT_NMI) { env->interrupt_request &= ~CPU_INTERRUPT_NMI; DPRINTF("injected NMI\n"); - kvm_vcpu_ioctl(env, KVM_NMI); + ret = kvm_vcpu_ioctl(env, KVM_NMI); + if (ret < 0) { + fprintf(stderr, "KVM: injection failed, NMI lost (%s)\n", + strerror(-ret)); + } } if (!kvm_irqchip_in_kernel()) { @@ -1467,9 +1473,13 @@ void kvm_arch_pre_run(CPUState *env, struct kvm_run *run) struct kvm_interrupt intr; intr.irq = irq; - /* FIXME: errors */ DPRINTF("injected interrupt %d\n", irq); - kvm_vcpu_ioctl(env, KVM_INTERRUPT, &intr); + ret = kvm_vcpu_ioctl(env, KVM_INTERRUPT, &intr); + if (ret < 0) { + fprintf(stderr, + "KVM: injection failed, interrupt lost (%s)\n", + strerror(-ret)); + } } } From 8e045ac407029c004e31eb46b0586309d618fbbd Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 7 Feb 2011 12:19:22 +0100 Subject: [PATCH 33/37] kvm: Remove unneeded memory slot reservation The number of slots and the location of private ones changed several times in KVM's early days. However, it's stable since 2.6.29 (our required baseline), and slots 8..11 are no longer reserved since then. So remove this unneeded restriction. Signed-off-by: Jan Kiszka CC: Alex Williamson Signed-off-by: Marcelo Tosatti --- kvm-all.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/kvm-all.c b/kvm-all.c index 802c6b8369..14b6c1e736 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -91,10 +91,6 @@ static KVMSlot *kvm_alloc_slot(KVMState *s) int i; for (i = 0; i < ARRAY_SIZE(s->slots); i++) { - /* KVM private memory slots */ - if (i >= 8 && i < 12) { - continue; - } if (s->slots[i].memory_size == 0) { return &s->slots[i]; } From e5896b12e20b86ba9d16582888d60cf5cb286517 Mon Sep 17 00:00:00 2001 From: Anthony PERARD Date: Mon, 7 Feb 2011 12:19:23 +0100 Subject: [PATCH 34/37] Introduce log_start/log_stop in CPUPhysMemoryClient In order to use log_start/log_stop with Xen as well in the vga code, this two operations have been put in CPUPhysMemoryClient. The two new functions cpu_physical_log_start,cpu_physical_log_stop are used in hw/vga.c and replace the kvm_log_start/stop. With this, vga does no longer depends on kvm header. [ Jan: rebasing and style fixlets ] Signed-off-by: Anthony PERARD Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- cpu-all.h | 6 ++++++ cpu-common.h | 4 ++++ exec.c | 30 ++++++++++++++++++++++++++++++ hw/vga.c | 31 ++++++++++++++++--------------- hw/vhost.c | 2 ++ kvm-all.c | 8 ++++++-- kvm-stub.c | 10 ---------- kvm.h | 3 --- 8 files changed, 64 insertions(+), 30 deletions(-) diff --git a/cpu-all.h b/cpu-all.h index ffbd6a4df8..87b0f86667 100644 --- a/cpu-all.h +++ b/cpu-all.h @@ -959,6 +959,12 @@ int cpu_physical_memory_get_dirty_tracking(void); int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, target_phys_addr_t end_addr); +int cpu_physical_log_start(target_phys_addr_t start_addr, + ram_addr_t size); + +int cpu_physical_log_stop(target_phys_addr_t start_addr, + ram_addr_t size); + void dump_exec_info(FILE *f, fprintf_function cpu_fprintf); #endif /* !CONFIG_USER_ONLY */ diff --git a/cpu-common.h b/cpu-common.h index 6d4a898ad1..54d21d4717 100644 --- a/cpu-common.h +++ b/cpu-common.h @@ -96,6 +96,10 @@ struct CPUPhysMemoryClient { target_phys_addr_t end_addr); int (*migration_log)(struct CPUPhysMemoryClient *client, int enable); + int (*log_start)(struct CPUPhysMemoryClient *client, + target_phys_addr_t phys_addr, ram_addr_t size); + int (*log_stop)(struct CPUPhysMemoryClient *client, + target_phys_addr_t phys_addr, ram_addr_t size); QLIST_ENTRY(CPUPhysMemoryClient) list; }; diff --git a/exec.c b/exec.c index 1b70dc1b5e..d611100dc3 100644 --- a/exec.c +++ b/exec.c @@ -2078,6 +2078,36 @@ int cpu_physical_sync_dirty_bitmap(target_phys_addr_t start_addr, return ret; } +int cpu_physical_log_start(target_phys_addr_t start_addr, + ram_addr_t size) +{ + CPUPhysMemoryClient *client; + QLIST_FOREACH(client, &memory_client_list, list) { + if (client->log_start) { + int r = client->log_start(client, start_addr, size); + if (r < 0) { + return r; + } + } + } + return 0; +} + +int cpu_physical_log_stop(target_phys_addr_t start_addr, + ram_addr_t size) +{ + CPUPhysMemoryClient *client; + QLIST_FOREACH(client, &memory_client_list, list) { + if (client->log_stop) { + int r = client->log_stop(client, start_addr, size); + if (r < 0) { + return r; + } + } + } + return 0; +} + static inline void tlb_update_dirty(CPUTLBEntry *tlb_entry) { ram_addr_t ram_addr; diff --git a/hw/vga.c b/hw/vga.c index e2151a2458..c22b8af833 100644 --- a/hw/vga.c +++ b/hw/vga.c @@ -28,7 +28,6 @@ #include "vga_int.h" #include "pixel_ops.h" #include "qemu-timer.h" -#include "kvm.h" //#define DEBUG_VGA //#define DEBUG_VGA_MEM @@ -1573,34 +1572,36 @@ static void vga_sync_dirty_bitmap(VGACommonState *s) void vga_dirty_log_start(VGACommonState *s) { - if (kvm_enabled() && s->map_addr) - kvm_log_start(s->map_addr, s->map_end - s->map_addr); + if (s->map_addr) { + cpu_physical_log_start(s->map_addr, s->map_end - s->map_addr); + } - if (kvm_enabled() && s->lfb_vram_mapped) { - kvm_log_start(isa_mem_base + 0xa0000, 0x8000); - kvm_log_start(isa_mem_base + 0xa8000, 0x8000); + if (s->lfb_vram_mapped) { + cpu_physical_log_start(isa_mem_base + 0xa0000, 0x8000); + cpu_physical_log_start(isa_mem_base + 0xa8000, 0x8000); } #ifdef CONFIG_BOCHS_VBE - if (kvm_enabled() && s->vbe_mapped) { - kvm_log_start(VBE_DISPI_LFB_PHYSICAL_ADDRESS, s->vram_size); + if (s->vbe_mapped) { + cpu_physical_log_start(VBE_DISPI_LFB_PHYSICAL_ADDRESS, s->vram_size); } #endif } void vga_dirty_log_stop(VGACommonState *s) { - if (kvm_enabled() && s->map_addr) - kvm_log_stop(s->map_addr, s->map_end - s->map_addr); + if (s->map_addr) { + cpu_physical_log_stop(s->map_addr, s->map_end - s->map_addr); + } - if (kvm_enabled() && s->lfb_vram_mapped) { - kvm_log_stop(isa_mem_base + 0xa0000, 0x8000); - kvm_log_stop(isa_mem_base + 0xa8000, 0x8000); + if (s->lfb_vram_mapped) { + cpu_physical_log_stop(isa_mem_base + 0xa0000, 0x8000); + cpu_physical_log_stop(isa_mem_base + 0xa8000, 0x8000); } #ifdef CONFIG_BOCHS_VBE - if (kvm_enabled() && s->vbe_mapped) { - kvm_log_stop(VBE_DISPI_LFB_PHYSICAL_ADDRESS, s->vram_size); + if (s->vbe_mapped) { + cpu_physical_log_stop(VBE_DISPI_LFB_PHYSICAL_ADDRESS, s->vram_size); } #endif } diff --git a/hw/vhost.c b/hw/vhost.c index 38cc3b365b..0ca3507f44 100644 --- a/hw/vhost.c +++ b/hw/vhost.c @@ -607,6 +607,8 @@ int vhost_dev_init(struct vhost_dev *hdev, int devfd, bool force) hdev->client.set_memory = vhost_client_set_memory; hdev->client.sync_dirty_bitmap = vhost_client_sync_dirty_bitmap; hdev->client.migration_log = vhost_client_migration_log; + hdev->client.log_start = NULL; + hdev->client.log_stop = NULL; hdev->mem = qemu_mallocz(offsetof(struct vhost_memory, regions)); hdev->log = NULL; hdev->log_size = 0; diff --git a/kvm-all.c b/kvm-all.c index 14b6c1e736..ecac0b3552 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -274,13 +274,15 @@ static int kvm_dirty_pages_log_change(target_phys_addr_t phys_addr, return kvm_set_user_memory_region(s, mem); } -int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size) +static int kvm_log_start(CPUPhysMemoryClient *client, + target_phys_addr_t phys_addr, ram_addr_t size) { return kvm_dirty_pages_log_change(phys_addr, size, KVM_MEM_LOG_DIRTY_PAGES, KVM_MEM_LOG_DIRTY_PAGES); } -int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size) +static int kvm_log_stop(CPUPhysMemoryClient *client, + target_phys_addr_t phys_addr, ram_addr_t size) { return kvm_dirty_pages_log_change(phys_addr, size, 0, KVM_MEM_LOG_DIRTY_PAGES); @@ -644,6 +646,8 @@ static CPUPhysMemoryClient kvm_cpu_phys_memory_client = { .set_memory = kvm_client_set_memory, .sync_dirty_bitmap = kvm_client_sync_dirty_bitmap, .migration_log = kvm_client_migration_log, + .log_start = kvm_log_start, + .log_stop = kvm_log_stop, }; int kvm_init(void) diff --git a/kvm-stub.c b/kvm-stub.c index d6b6c8e794..30f6ec3956 100644 --- a/kvm-stub.c +++ b/kvm-stub.c @@ -33,16 +33,6 @@ int kvm_init_vcpu(CPUState *env) return -ENOSYS; } -int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size) -{ - return -ENOSYS; -} - -int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size) -{ - return -ENOSYS; -} - int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size) { return -ENOSYS; diff --git a/kvm.h b/kvm.h index 7a4d550898..4caa6ec19e 100644 --- a/kvm.h +++ b/kvm.h @@ -58,9 +58,6 @@ int kvm_init_vcpu(CPUState *env); int kvm_cpu_exec(CPUState *env); #if !defined(CONFIG_USER_ONLY) -int kvm_log_start(target_phys_addr_t phys_addr, ram_addr_t size); -int kvm_log_stop(target_phys_addr_t phys_addr, ram_addr_t size); - void kvm_setup_guest_memory(void *start, size_t size); int kvm_coalesce_mmio_region(target_phys_addr_t start, ram_addr_t size); From 638a84af9fced4b410243e45f755552303a17a3c Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 7 Feb 2011 12:19:24 +0100 Subject: [PATCH 35/37] cirrus: Remove obsolete kvm.h include Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- hw/cirrus_vga.c | 1 - 1 file changed, 1 deletion(-) diff --git a/hw/cirrus_vga.c b/hw/cirrus_vga.c index 5f45b5dee7..2724f7b480 100644 --- a/hw/cirrus_vga.c +++ b/hw/cirrus_vga.c @@ -31,7 +31,6 @@ #include "pci.h" #include "console.h" #include "vga_int.h" -#include "kvm.h" #include "loader.h" /* From 6a7af8cb04c345eb1ed9d95250ef3ad4400e65c5 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 7 Feb 2011 12:19:25 +0100 Subject: [PATCH 36/37] kvm: Make kvm_state globally available KVM-assisted devices need access to it but we have no clean channel to distribute a reference. As a workaround until there is a better solution, export kvm_state for global use, though use should remain restricted to the mentioned scenario. Signed-off-by: Jan Kiszka Signed-off-by: Marcelo Tosatti --- kvm-all.c | 2 +- kvm.h | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/kvm-all.c b/kvm-all.c index ecac0b3552..e6a7de4722 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -78,7 +78,7 @@ struct KVMState int many_ioeventfds; }; -static KVMState *kvm_state; +KVMState *kvm_state; static const KVMCapabilityInfo kvm_required_capabilites[] = { KVM_CAP_INFO(USER_MEMORY), diff --git a/kvm.h b/kvm.h index 4caa6ec19e..59b2c29fd9 100644 --- a/kvm.h +++ b/kvm.h @@ -85,6 +85,7 @@ int kvm_on_sigbus(int code, void *addr); struct KVMState; typedef struct KVMState KVMState; +extern KVMState *kvm_state; int kvm_ioctl(KVMState *s, int type, ...); From 0ec329dab938e2d97d12a91f8ed15fec27b325e0 Mon Sep 17 00:00:00 2001 From: Jan Kiszka Date: Mon, 7 Feb 2011 12:19:26 +0100 Subject: [PATCH 37/37] kvm: x86: Introduce kvmclock device to save/restore its state If kvmclock is used, which implies the kernel supports it, register a kvmclock device with the sysbus. Its main purpose is to save and restore the kernel state on migration, but this will also allow to visualize it one day. Signed-off-by: Jan Kiszka CC: Glauber Costa Signed-off-by: Marcelo Tosatti --- Makefile.target | 4 +- hw/kvmclock.c | 125 ++++++++++++++++++++++++++++++++++++++++++++++++ hw/kvmclock.h | 14 ++++++ hw/pc_piix.c | 32 ++++++++++--- 4 files changed, 166 insertions(+), 9 deletions(-) create mode 100644 hw/kvmclock.c create mode 100644 hw/kvmclock.h diff --git a/Makefile.target b/Makefile.target index a6c30ddc23..5a0fd40696 100644 --- a/Makefile.target +++ b/Makefile.target @@ -37,7 +37,7 @@ ifndef CONFIG_HAIKU LIBS+=-lm endif -kvm.o kvm-all.o vhost.o vhost_net.o: QEMU_CFLAGS+=$(KVM_CFLAGS) +kvm.o kvm-all.o vhost.o vhost_net.o kvmclock.o: QEMU_CFLAGS+=$(KVM_CFLAGS) config-target.h: config-target.h-timestamp config-target.h-timestamp: config-target.mak @@ -218,7 +218,7 @@ obj-i386-y += cirrus_vga.o apic.o ioapic.o piix_pci.o obj-i386-y += vmport.o applesmc.o obj-i386-y += device-hotplug.o pci-hotplug.o smbios.o wdt_ib700.o obj-i386-y += debugcon.o multiboot.o -obj-i386-y += pc_piix.o +obj-i386-y += pc_piix.o kvmclock.o obj-i386-$(CONFIG_SPICE) += qxl.o qxl-logger.o qxl-render.o # shared objects diff --git a/hw/kvmclock.c b/hw/kvmclock.c new file mode 100644 index 0000000000..b6ceddfba6 --- /dev/null +++ b/hw/kvmclock.c @@ -0,0 +1,125 @@ +/* + * QEMU KVM support, paravirtual clock device + * + * Copyright (C) 2011 Siemens AG + * + * Authors: + * Jan Kiszka + * + * This work is licensed under the terms of the GNU GPL version 2. + * See the COPYING file in the top-level directory. + * + */ + +#include "qemu-common.h" +#include "sysemu.h" +#include "sysbus.h" +#include "kvm.h" +#include "kvmclock.h" + +#if defined(CONFIG_KVM_PARA) && defined(KVM_CAP_ADJUST_CLOCK) + +#include +#include + +typedef struct KVMClockState { + SysBusDevice busdev; + uint64_t clock; + bool clock_valid; +} KVMClockState; + +static void kvmclock_pre_save(void *opaque) +{ + KVMClockState *s = opaque; + struct kvm_clock_data data; + int ret; + + if (s->clock_valid) { + return; + } + ret = kvm_vm_ioctl(kvm_state, KVM_GET_CLOCK, &data); + if (ret < 0) { + fprintf(stderr, "KVM_GET_CLOCK failed: %s\n", strerror(ret)); + data.clock = 0; + } + s->clock = data.clock; + /* + * If the VM is stopped, declare the clock state valid to avoid re-reading + * it on next vmsave (which would return a different value). Will be reset + * when the VM is continued. + */ + s->clock_valid = !vm_running; +} + +static int kvmclock_post_load(void *opaque, int version_id) +{ + KVMClockState *s = opaque; + struct kvm_clock_data data; + + data.clock = s->clock; + data.flags = 0; + return kvm_vm_ioctl(kvm_state, KVM_SET_CLOCK, &data); +} + +static void kvmclock_vm_state_change(void *opaque, int running, int reason) +{ + KVMClockState *s = opaque; + + if (running) { + s->clock_valid = false; + } +} + +static int kvmclock_init(SysBusDevice *dev) +{ + KVMClockState *s = FROM_SYSBUS(KVMClockState, dev); + + qemu_add_vm_change_state_handler(kvmclock_vm_state_change, s); + return 0; +} + +static const VMStateDescription kvmclock_vmsd = { + .name = "kvmclock", + .version_id = 1, + .minimum_version_id = 1, + .minimum_version_id_old = 1, + .pre_save = kvmclock_pre_save, + .post_load = kvmclock_post_load, + .fields = (VMStateField[]) { + VMSTATE_UINT64(clock, KVMClockState), + VMSTATE_END_OF_LIST() + } +}; + +static SysBusDeviceInfo kvmclock_info = { + .qdev.name = "kvmclock", + .qdev.size = sizeof(KVMClockState), + .qdev.vmsd = &kvmclock_vmsd, + .qdev.no_user = 1, + .init = kvmclock_init, +}; + +/* Note: Must be called after VCPU initialization. */ +void kvmclock_create(void) +{ + if (kvm_enabled() && + first_cpu->cpuid_kvm_features & (1ULL << KVM_FEATURE_CLOCKSOURCE)) { + sysbus_create_simple("kvmclock", -1, NULL); + } +} + +static void kvmclock_register_device(void) +{ + if (kvm_enabled()) { + sysbus_register_withprop(&kvmclock_info); + } +} + +device_init(kvmclock_register_device); + +#else /* !(CONFIG_KVM_PARA && KVM_CAP_ADJUST_CLOCK) */ + +void kvmclock_create(void) +{ +} +#endif /* !(CONFIG_KVM_PARA && KVM_CAP_ADJUST_CLOCK) */ diff --git a/hw/kvmclock.h b/hw/kvmclock.h new file mode 100644 index 0000000000..7a83cbe8f6 --- /dev/null +++ b/hw/kvmclock.h @@ -0,0 +1,14 @@ +/* + * QEMU KVM support, paravirtual clock device + * + * Copyright (C) 2011 Siemens AG + * + * Authors: + * Jan Kiszka + * + * This work is licensed under the terms of the GNU GPL version 2. + * See the COPYING file in the top-level directory. + * + */ + +void kvmclock_create(void); diff --git a/hw/pc_piix.c b/hw/pc_piix.c index d0bd0cd1fa..291845478d 100644 --- a/hw/pc_piix.c +++ b/hw/pc_piix.c @@ -32,6 +32,7 @@ #include "boards.h" #include "ide.h" #include "kvm.h" +#include "kvmclock.h" #include "sysemu.h" #include "sysbus.h" #include "arch_init.h" @@ -66,7 +67,8 @@ static void pc_init1(ram_addr_t ram_size, const char *kernel_cmdline, const char *initrd_filename, const char *cpu_model, - int pci_enabled) + int pci_enabled, + int kvmclock_enabled) { int i; ram_addr_t below_4g_mem_size, above_4g_mem_size; @@ -86,6 +88,10 @@ static void pc_init1(ram_addr_t ram_size, pc_cpus_init(cpu_model); + if (kvmclock_enabled) { + kvmclock_create(); + } + /* allocate ram and load rom/bios */ pc_memory_init(ram_size, kernel_filename, kernel_cmdline, initrd_filename, &below_4g_mem_size, &above_4g_mem_size); @@ -193,7 +199,19 @@ static void pc_init_pci(ram_addr_t ram_size, { pc_init1(ram_size, boot_device, kernel_filename, kernel_cmdline, - initrd_filename, cpu_model, 1); + initrd_filename, cpu_model, 1, 1); +} + +static void pc_init_pci_no_kvmclock(ram_addr_t ram_size, + const char *boot_device, + const char *kernel_filename, + const char *kernel_cmdline, + const char *initrd_filename, + const char *cpu_model) +{ + pc_init1(ram_size, boot_device, + kernel_filename, kernel_cmdline, + initrd_filename, cpu_model, 1, 0); } static void pc_init_isa(ram_addr_t ram_size, @@ -207,7 +225,7 @@ static void pc_init_isa(ram_addr_t ram_size, cpu_model = "486"; pc_init1(ram_size, boot_device, kernel_filename, kernel_cmdline, - initrd_filename, cpu_model, 0); + initrd_filename, cpu_model, 0, 1); } static QEMUMachine pc_machine = { @@ -222,7 +240,7 @@ static QEMUMachine pc_machine = { static QEMUMachine pc_machine_v0_13 = { .name = "pc-0.13", .desc = "Standard PC", - .init = pc_init_pci, + .init = pc_init_pci_no_kvmclock, .max_cpus = 255, .compat_props = (GlobalProperty[]) { { @@ -249,7 +267,7 @@ static QEMUMachine pc_machine_v0_13 = { static QEMUMachine pc_machine_v0_12 = { .name = "pc-0.12", .desc = "Standard PC", - .init = pc_init_pci, + .init = pc_init_pci_no_kvmclock, .max_cpus = 255, .compat_props = (GlobalProperty[]) { { @@ -280,7 +298,7 @@ static QEMUMachine pc_machine_v0_12 = { static QEMUMachine pc_machine_v0_11 = { .name = "pc-0.11", .desc = "Standard PC, qemu 0.11", - .init = pc_init_pci, + .init = pc_init_pci_no_kvmclock, .max_cpus = 255, .compat_props = (GlobalProperty[]) { { @@ -319,7 +337,7 @@ static QEMUMachine pc_machine_v0_11 = { static QEMUMachine pc_machine_v0_10 = { .name = "pc-0.10", .desc = "Standard PC, qemu 0.10", - .init = pc_init_pci, + .init = pc_init_pci_no_kvmclock, .max_cpus = 255, .compat_props = (GlobalProperty[]) { {