From f8502cfbbf0bdd30603159a23d86cd19ad202a25 Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 21 Oct 2010 17:46:49 +0900 Subject: [PATCH 1/7] x86, mce: ignore SRAO only when MCG_SER_P is available And restruct this block to call kvm_mce_in_exception() only when it is required. Signed-off-by: Hidetoshi Seto Signed-off-by: Marcelo Tosatti --- target-i386/kvm.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 512d533970..b813953028 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -239,12 +239,16 @@ static void kvm_do_inject_x86_mce(void *_data) struct kvm_x86_mce_data *data = _data; int r; - /* If there is an MCE excpetion being processed, ignore this SRAO MCE */ - r = kvm_mce_in_exception(data->env); - if (r == -1) - fprintf(stderr, "Failed to get MCE status\n"); - else if (r && !(data->mce->status & MCI_STATUS_AR)) - return; + /* If there is an MCE exception being processed, ignore this SRAO MCE */ + if ((data->env->mcg_cap & MCG_SER_P) && + !(data->mce->status & MCI_STATUS_AR)) { + r = kvm_mce_in_exception(data->env); + if (r == -1) { + fprintf(stderr, "Failed to get MCE status\n"); + } else if (r) { + return; + } + } r = kvm_set_mce(data->env, data->mce); if (r < 0) { From f71ac88fe97620f9cc80facc5e00826b8256fe5d Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 21 Oct 2010 17:47:06 +0900 Subject: [PATCH 2/7] x86, mce: broadcast mce depending on the cpu version There is no reason why SRAO event received by the main thread is the only one that being broadcasted. According to the x86 ASDM vol.3A 15.10.4.1, MCE signal is broadcast on processor version 06H_EH or later. This change is required to handle SRAR in smp guests. Signed-off-by: Hidetoshi Seto Signed-off-by: Marcelo Tosatti --- target-i386/kvm.c | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index b813953028..9144f74e2d 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -1636,6 +1636,28 @@ static void hardware_memory_error(void) exit(1); } +#ifdef KVM_CAP_MCE +static void kvm_mce_broadcast_rest(CPUState *env) +{ + CPUState *cenv; + int family, model, cpuver = env->cpuid_version; + + family = (cpuver >> 8) & 0xf; + model = ((cpuver >> 12) & 0xf0) + ((cpuver >> 4) & 0xf); + + /* Broadcast MCA signal for processor version 06H_EH and above */ + if ((family == 6 && model >= 14) || family > 6) { + for (cenv = first_cpu; cenv != NULL; cenv = cenv->next_cpu) { + if (cenv == env) { + continue; + } + kvm_inject_x86_mce(cenv, 1, MCI_STATUS_VAL | MCI_STATUS_UC, + MCG_STATUS_MCIP | MCG_STATUS_RIPV, 0, 0, 1); + } + } +} +#endif + int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr) { #if defined(KVM_CAP_MCE) @@ -1693,6 +1715,7 @@ int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr) fprintf(stderr, "kvm_set_mce: %s\n", strerror(errno)); abort(); } + kvm_mce_broadcast_rest(env); } else #endif { @@ -1715,7 +1738,6 @@ int kvm_on_sigbus(int code, void *addr) void *vaddr; ram_addr_t ram_addr; target_phys_addr_t paddr; - CPUState *cenv; /* Hope we are lucky for AO MCE */ vaddr = addr; @@ -1731,10 +1753,7 @@ int kvm_on_sigbus(int code, void *addr) kvm_inject_x86_mce(first_cpu, 9, status, MCG_STATUS_MCIP | MCG_STATUS_RIPV, paddr, (MCM_ADDR_PHYS << 6) | 0xc, 1); - for (cenv = first_cpu->next_cpu; cenv != NULL; cenv = cenv->next_cpu) { - kvm_inject_x86_mce(cenv, 1, MCI_STATUS_VAL | MCI_STATUS_UC, - MCG_STATUS_MCIP | MCG_STATUS_RIPV, 0, 0, 1); - } + kvm_mce_broadcast_rest(first_cpu); } else #endif { From d8da8574b167144d1868f343514dfb88716e2edb Mon Sep 17 00:00:00 2001 From: Hidetoshi Seto Date: Thu, 21 Oct 2010 17:23:14 +0900 Subject: [PATCH 3/7] Fix build on !KVM_CAP_MCE This patch removes following warnings: target-i386/kvm.c: In function 'kvm_put_msrs': target-i386/kvm.c:782: error: unused variable 'i' target-i386/kvm.c: In function 'kvm_get_msrs': target-i386/kvm.c:1083: error: label at end of compound statement Signed-off-by: Hidetoshi Seto Signed-off-by: Marcelo Tosatti --- target-i386/kvm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 9144f74e2d..587ee195da 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -783,7 +783,7 @@ static int kvm_put_msrs(CPUState *env, int level) struct kvm_msr_entry entries[100]; } msr_data; struct kvm_msr_entry *msrs = msr_data.entries; - int i, n = 0; + int n = 0; kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs); kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp); @@ -805,6 +805,7 @@ static int kvm_put_msrs(CPUState *env, int level) } #ifdef KVM_CAP_MCE if (env->mcg_cap) { + int i; if (level == KVM_PUT_RESET_STATE) kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status); else if (level == KVM_PUT_FULL_STATE) { @@ -1089,9 +1090,9 @@ static int kvm_get_msrs(CPUState *env) if (msrs[i].index >= MSR_MC0_CTL && msrs[i].index < MSR_MC0_CTL + (env->mcg_cap & 0xff) * 4) { env->mce_banks[msrs[i].index - MSR_MC0_CTL] = msrs[i].data; - break; } #endif + break; } } From aa851e365b3f62ad86cf599860217b60e02dc893 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 21 Oct 2010 13:35:01 -0200 Subject: [PATCH 4/7] kvm: add save/restore of MSR_VM_HSAVE_PA commit 2bba4446746add456ceeb0e8359a43032a2ea333 Author: Alexander Graf Date: Thu Dec 18 15:38:32 2008 +0100 Enable nested SVM support in userspace Signed-off-by: Marcelo Tosatti --- target-i386/kvm.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 587ee195da..e6c9a1d8f1 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -790,6 +790,7 @@ static int kvm_put_msrs(CPUState *env, int level) kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip); if (kvm_has_msr_star(env)) kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star); + kvm_msr_entry_set(&msrs[n++], MSR_VM_HSAVE_PA, env->vm_hsave); #ifdef TARGET_X86_64 /* FIXME if lm capable */ kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar); @@ -1015,6 +1016,7 @@ static int kvm_get_msrs(CPUState *env) msrs[n++].index = MSR_IA32_SYSENTER_EIP; if (kvm_has_msr_star(env)) msrs[n++].index = MSR_STAR; + msrs[n++].index = MSR_VM_HSAVE_PA; msrs[n++].index = MSR_IA32_TSC; #ifdef TARGET_X86_64 /* FIXME lm_capable_kernel */ @@ -1071,6 +1073,9 @@ static int kvm_get_msrs(CPUState *env) case MSR_IA32_TSC: env->tsc = msrs[i].data; break; + case MSR_VM_HSAVE_PA: + env->vm_hsave = msrs[i].data; + break; case MSR_KVM_SYSTEM_TIME: env->system_time_msr = msrs[i].data; break; From 75b10c43365e2a9cab5398f31b96a463b0d57eff Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 21 Oct 2010 13:35:02 -0200 Subject: [PATCH 5/7] kvm: factor out kvm_has_msr_star And add kvm_has_msr_hsave_pa(), to avoid warnings on older kernels without support. Signed-off-by: Marcelo Tosatti --- target-i386/kvm.c | 41 ++++++++++++++++++++++++++++++----------- 1 file changed, 30 insertions(+), 11 deletions(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index e6c9a1d8f1..06474d6161 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -438,23 +438,26 @@ void kvm_arch_reset_vcpu(CPUState *env) } } -static int kvm_has_msr_star(CPUState *env) +int has_msr_star; +int has_msr_hsave_pa; + +static void kvm_supported_msrs(CPUState *env) { - static int has_msr_star; + static int kvm_supported_msrs; int ret; /* first time */ - if (has_msr_star == 0) { + if (kvm_supported_msrs == 0) { struct kvm_msr_list msr_list, *kvm_msr_list; - has_msr_star = -1; + kvm_supported_msrs = -1; /* Obtain MSR list from KVM. These are the MSRs that we must * save/restore */ msr_list.nmsrs = 0; ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, &msr_list); if (ret < 0 && ret != -E2BIG) { - return 0; + return; } /* Old kernel modules had a bug and could write beyond the provided memory. Allocate at least a safe amount of 1K. */ @@ -470,7 +473,11 @@ static int kvm_has_msr_star(CPUState *env) for (i = 0; i < kvm_msr_list->nmsrs; i++) { if (kvm_msr_list->indices[i] == MSR_STAR) { has_msr_star = 1; - break; + continue; + } + if (kvm_msr_list->indices[i] == MSR_VM_HSAVE_PA) { + has_msr_hsave_pa = 1; + continue; } } } @@ -478,9 +485,19 @@ static int kvm_has_msr_star(CPUState *env) free(kvm_msr_list); } - if (has_msr_star == 1) - return 1; - return 0; + return; +} + +static int kvm_has_msr_hsave_pa(CPUState *env) +{ + kvm_supported_msrs(env); + return has_msr_hsave_pa; +} + +static int kvm_has_msr_star(CPUState *env) +{ + kvm_supported_msrs(env); + return has_msr_star; } static int kvm_init_identity_map_page(KVMState *s) @@ -790,7 +807,8 @@ static int kvm_put_msrs(CPUState *env, int level) kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip); if (kvm_has_msr_star(env)) kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star); - kvm_msr_entry_set(&msrs[n++], MSR_VM_HSAVE_PA, env->vm_hsave); + if (kvm_has_msr_hsave_pa(env)) + kvm_msr_entry_set(&msrs[n++], MSR_VM_HSAVE_PA, env->vm_hsave); #ifdef TARGET_X86_64 /* FIXME if lm capable */ kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar); @@ -1016,7 +1034,8 @@ static int kvm_get_msrs(CPUState *env) msrs[n++].index = MSR_IA32_SYSENTER_EIP; if (kvm_has_msr_star(env)) msrs[n++].index = MSR_STAR; - msrs[n++].index = MSR_VM_HSAVE_PA; + if (kvm_has_msr_hsave_pa(env)) + msrs[n++].index = MSR_VM_HSAVE_PA; msrs[n++].index = MSR_IA32_TSC; #ifdef TARGET_X86_64 /* FIXME lm_capable_kernel */ From 384331a61a8ca42f32516ced20cb52470c5f6f57 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 21 Oct 2010 13:35:03 -0200 Subject: [PATCH 6/7] kvm: writeback SMP TSCs on migration only commit 6389c45441269baa2873e6feafebd17105ddeaf6 Author: Jan Kiszka Date: Mon Mar 1 18:17:26 2010 +0100 qemu-kvm: Cleanup/fix TSC and PV clock writeback Signed-off-by: Marcelo Tosatti --- target-i386/kvm.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 06474d6161..e2f7e2ef4a 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -817,7 +817,15 @@ static int kvm_put_msrs(CPUState *env, int level) kvm_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar); #endif if (level == KVM_PUT_FULL_STATE) { - kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc); + /* + * KVM is yet unable to synchronize TSC values of multiple VCPUs on + * writeback. Until this is fixed, we only write the offset to SMP + * guests after migration, desynchronizing the VCPUs, but avoiding + * huge jump-backs that would occur without any writeback at all. + */ + if (smp_cpus == 1 || env->tsc != 0) { + kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc); + } kvm_msr_entry_set(&msrs[n++], MSR_KVM_SYSTEM_TIME, env->system_time_msr); kvm_msr_entry_set(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr); From 25d2e3613d159782b66f497184ebdcf3ccb686ab Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Thu, 21 Oct 2010 13:35:04 -0200 Subject: [PATCH 7/7] kvm: save/restore x86-64 MSRs on x86-64 kernels Signed-off-by: Marcelo Tosatti --- target-i386/kvm.c | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) diff --git a/target-i386/kvm.c b/target-i386/kvm.c index e2f7e2ef4a..ae0a034ab0 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -15,6 +15,7 @@ #include #include #include +#include #include @@ -53,6 +54,8 @@ #define BUS_MCEERR_AO 5 #endif +static int lm_capable_kernel; + #ifdef KVM_CAP_EXT_CPUID static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max) @@ -523,6 +526,11 @@ int kvm_arch_init(KVMState *s, int smp_cpus) { int ret; + struct utsname utsname; + + uname(&utsname); + lm_capable_kernel = strcmp(utsname.machine, "x86_64") == 0; + /* create vm86 tss. KVM uses vm86 mode to emulate 16-bit code * directly. In order to use vm86 mode, a TSS is needed. Since this * must be part of guest physical memory, we need to allocate it. Older @@ -810,11 +818,12 @@ static int kvm_put_msrs(CPUState *env, int level) if (kvm_has_msr_hsave_pa(env)) kvm_msr_entry_set(&msrs[n++], MSR_VM_HSAVE_PA, env->vm_hsave); #ifdef TARGET_X86_64 - /* FIXME if lm capable */ - kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar); - kvm_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase); - kvm_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask); - kvm_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar); + if (lm_capable_kernel) { + kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar); + kvm_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase); + kvm_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask); + kvm_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar); + } #endif if (level == KVM_PUT_FULL_STATE) { /* @@ -1046,11 +1055,12 @@ static int kvm_get_msrs(CPUState *env) msrs[n++].index = MSR_VM_HSAVE_PA; msrs[n++].index = MSR_IA32_TSC; #ifdef TARGET_X86_64 - /* FIXME lm_capable_kernel */ - msrs[n++].index = MSR_CSTAR; - msrs[n++].index = MSR_KERNELGSBASE; - msrs[n++].index = MSR_FMASK; - msrs[n++].index = MSR_LSTAR; + if (lm_capable_kernel) { + msrs[n++].index = MSR_CSTAR; + msrs[n++].index = MSR_KERNELGSBASE; + msrs[n++].index = MSR_FMASK; + msrs[n++].index = MSR_LSTAR; + } #endif msrs[n++].index = MSR_KVM_SYSTEM_TIME; msrs[n++].index = MSR_KVM_WALL_CLOCK;