Merge remote branch 'qemu-kvm/uq/master' into staging

This commit is contained in:
Anthony Liguori 2010-10-22 08:02:14 -05:00
commit dbb1413589
1 changed files with 99 additions and 33 deletions

View File

@ -15,6 +15,7 @@
#include <sys/types.h>
#include <sys/ioctl.h>
#include <sys/mman.h>
#include <sys/utsname.h>
#include <linux/kvm.h>
@ -53,6 +54,8 @@
#define BUS_MCEERR_AO 5
#endif
static int lm_capable_kernel;
#ifdef KVM_CAP_EXT_CPUID
static struct kvm_cpuid2 *try_get_cpuid(KVMState *s, int max)
@ -239,12 +242,16 @@ static void kvm_do_inject_x86_mce(void *_data)
struct kvm_x86_mce_data *data = _data;
int r;
/* If there is an MCE excpetion being processed, ignore this SRAO MCE */
r = kvm_mce_in_exception(data->env);
if (r == -1)
fprintf(stderr, "Failed to get MCE status\n");
else if (r && !(data->mce->status & MCI_STATUS_AR))
return;
/* If there is an MCE exception being processed, ignore this SRAO MCE */
if ((data->env->mcg_cap & MCG_SER_P) &&
!(data->mce->status & MCI_STATUS_AR)) {
r = kvm_mce_in_exception(data->env);
if (r == -1) {
fprintf(stderr, "Failed to get MCE status\n");
} else if (r) {
return;
}
}
r = kvm_set_mce(data->env, data->mce);
if (r < 0) {
@ -434,23 +441,26 @@ void kvm_arch_reset_vcpu(CPUState *env)
}
}
static int kvm_has_msr_star(CPUState *env)
int has_msr_star;
int has_msr_hsave_pa;
static void kvm_supported_msrs(CPUState *env)
{
static int has_msr_star;
static int kvm_supported_msrs;
int ret;
/* first time */
if (has_msr_star == 0) {
if (kvm_supported_msrs == 0) {
struct kvm_msr_list msr_list, *kvm_msr_list;
has_msr_star = -1;
kvm_supported_msrs = -1;
/* Obtain MSR list from KVM. These are the MSRs that we must
* save/restore */
msr_list.nmsrs = 0;
ret = kvm_ioctl(env->kvm_state, KVM_GET_MSR_INDEX_LIST, &msr_list);
if (ret < 0 && ret != -E2BIG) {
return 0;
return;
}
/* Old kernel modules had a bug and could write beyond the provided
memory. Allocate at least a safe amount of 1K. */
@ -466,7 +476,11 @@ static int kvm_has_msr_star(CPUState *env)
for (i = 0; i < kvm_msr_list->nmsrs; i++) {
if (kvm_msr_list->indices[i] == MSR_STAR) {
has_msr_star = 1;
break;
continue;
}
if (kvm_msr_list->indices[i] == MSR_VM_HSAVE_PA) {
has_msr_hsave_pa = 1;
continue;
}
}
}
@ -474,9 +488,19 @@ static int kvm_has_msr_star(CPUState *env)
free(kvm_msr_list);
}
if (has_msr_star == 1)
return 1;
return 0;
return;
}
static int kvm_has_msr_hsave_pa(CPUState *env)
{
kvm_supported_msrs(env);
return has_msr_hsave_pa;
}
static int kvm_has_msr_star(CPUState *env)
{
kvm_supported_msrs(env);
return has_msr_star;
}
static int kvm_init_identity_map_page(KVMState *s)
@ -502,6 +526,11 @@ int kvm_arch_init(KVMState *s, int smp_cpus)
{
int ret;
struct utsname utsname;
uname(&utsname);
lm_capable_kernel = strcmp(utsname.machine, "x86_64") == 0;
/* create vm86 tss. KVM uses vm86 mode to emulate 16-bit code
* directly. In order to use vm86 mode, a TSS is needed. Since this
* must be part of guest physical memory, we need to allocate it. Older
@ -779,28 +808,40 @@ static int kvm_put_msrs(CPUState *env, int level)
struct kvm_msr_entry entries[100];
} msr_data;
struct kvm_msr_entry *msrs = msr_data.entries;
int i, n = 0;
int n = 0;
kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_CS, env->sysenter_cs);
kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_ESP, env->sysenter_esp);
kvm_msr_entry_set(&msrs[n++], MSR_IA32_SYSENTER_EIP, env->sysenter_eip);
if (kvm_has_msr_star(env))
kvm_msr_entry_set(&msrs[n++], MSR_STAR, env->star);
if (kvm_has_msr_hsave_pa(env))
kvm_msr_entry_set(&msrs[n++], MSR_VM_HSAVE_PA, env->vm_hsave);
#ifdef TARGET_X86_64
/* FIXME if lm capable */
kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
kvm_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
kvm_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
kvm_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
if (lm_capable_kernel) {
kvm_msr_entry_set(&msrs[n++], MSR_CSTAR, env->cstar);
kvm_msr_entry_set(&msrs[n++], MSR_KERNELGSBASE, env->kernelgsbase);
kvm_msr_entry_set(&msrs[n++], MSR_FMASK, env->fmask);
kvm_msr_entry_set(&msrs[n++], MSR_LSTAR, env->lstar);
}
#endif
if (level == KVM_PUT_FULL_STATE) {
kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
/*
* KVM is yet unable to synchronize TSC values of multiple VCPUs on
* writeback. Until this is fixed, we only write the offset to SMP
* guests after migration, desynchronizing the VCPUs, but avoiding
* huge jump-backs that would occur without any writeback at all.
*/
if (smp_cpus == 1 || env->tsc != 0) {
kvm_msr_entry_set(&msrs[n++], MSR_IA32_TSC, env->tsc);
}
kvm_msr_entry_set(&msrs[n++], MSR_KVM_SYSTEM_TIME,
env->system_time_msr);
kvm_msr_entry_set(&msrs[n++], MSR_KVM_WALL_CLOCK, env->wall_clock_msr);
}
#ifdef KVM_CAP_MCE
if (env->mcg_cap) {
int i;
if (level == KVM_PUT_RESET_STATE)
kvm_msr_entry_set(&msrs[n++], MSR_MCG_STATUS, env->mcg_status);
else if (level == KVM_PUT_FULL_STATE) {
@ -1010,13 +1051,16 @@ static int kvm_get_msrs(CPUState *env)
msrs[n++].index = MSR_IA32_SYSENTER_EIP;
if (kvm_has_msr_star(env))
msrs[n++].index = MSR_STAR;
if (kvm_has_msr_hsave_pa(env))
msrs[n++].index = MSR_VM_HSAVE_PA;
msrs[n++].index = MSR_IA32_TSC;
#ifdef TARGET_X86_64
/* FIXME lm_capable_kernel */
msrs[n++].index = MSR_CSTAR;
msrs[n++].index = MSR_KERNELGSBASE;
msrs[n++].index = MSR_FMASK;
msrs[n++].index = MSR_LSTAR;
if (lm_capable_kernel) {
msrs[n++].index = MSR_CSTAR;
msrs[n++].index = MSR_KERNELGSBASE;
msrs[n++].index = MSR_FMASK;
msrs[n++].index = MSR_LSTAR;
}
#endif
msrs[n++].index = MSR_KVM_SYSTEM_TIME;
msrs[n++].index = MSR_KVM_WALL_CLOCK;
@ -1066,6 +1110,9 @@ static int kvm_get_msrs(CPUState *env)
case MSR_IA32_TSC:
env->tsc = msrs[i].data;
break;
case MSR_VM_HSAVE_PA:
env->vm_hsave = msrs[i].data;
break;
case MSR_KVM_SYSTEM_TIME:
env->system_time_msr = msrs[i].data;
break;
@ -1085,9 +1132,9 @@ static int kvm_get_msrs(CPUState *env)
if (msrs[i].index >= MSR_MC0_CTL &&
msrs[i].index < MSR_MC0_CTL + (env->mcg_cap & 0xff) * 4) {
env->mce_banks[msrs[i].index - MSR_MC0_CTL] = msrs[i].data;
break;
}
#endif
break;
}
}
@ -1632,6 +1679,28 @@ static void hardware_memory_error(void)
exit(1);
}
#ifdef KVM_CAP_MCE
static void kvm_mce_broadcast_rest(CPUState *env)
{
CPUState *cenv;
int family, model, cpuver = env->cpuid_version;
family = (cpuver >> 8) & 0xf;
model = ((cpuver >> 12) & 0xf0) + ((cpuver >> 4) & 0xf);
/* Broadcast MCA signal for processor version 06H_EH and above */
if ((family == 6 && model >= 14) || family > 6) {
for (cenv = first_cpu; cenv != NULL; cenv = cenv->next_cpu) {
if (cenv == env) {
continue;
}
kvm_inject_x86_mce(cenv, 1, MCI_STATUS_VAL | MCI_STATUS_UC,
MCG_STATUS_MCIP | MCG_STATUS_RIPV, 0, 0, 1);
}
}
}
#endif
int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr)
{
#if defined(KVM_CAP_MCE)
@ -1689,6 +1758,7 @@ int kvm_on_sigbus_vcpu(CPUState *env, int code, void *addr)
fprintf(stderr, "kvm_set_mce: %s\n", strerror(errno));
abort();
}
kvm_mce_broadcast_rest(env);
} else
#endif
{
@ -1711,7 +1781,6 @@ int kvm_on_sigbus(int code, void *addr)
void *vaddr;
ram_addr_t ram_addr;
target_phys_addr_t paddr;
CPUState *cenv;
/* Hope we are lucky for AO MCE */
vaddr = addr;
@ -1727,10 +1796,7 @@ int kvm_on_sigbus(int code, void *addr)
kvm_inject_x86_mce(first_cpu, 9, status,
MCG_STATUS_MCIP | MCG_STATUS_RIPV, paddr,
(MCM_ADDR_PHYS << 6) | 0xc, 1);
for (cenv = first_cpu->next_cpu; cenv != NULL; cenv = cenv->next_cpu) {
kvm_inject_x86_mce(cenv, 1, MCI_STATUS_VAL | MCI_STATUS_UC,
MCG_STATUS_MCIP | MCG_STATUS_RIPV, 0, 0, 1);
}
kvm_mce_broadcast_rest(first_cpu);
} else
#endif
{