diff --git a/bsd-user/main.c b/bsd-user/main.c index 0f9169d6dd..4ba61da896 100644 --- a/bsd-user/main.c +++ b/bsd-user/main.c @@ -1003,8 +1003,6 @@ int main(int argc, char **argv) cpu->opaque = ts; #if defined(TARGET_I386) - cpu_x86_set_cpl(env, 3); - env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK; env->hflags |= HF_PE_MASK; if (env->features[FEAT_1_EDX] & CPUID_SSE) { diff --git a/cpu-exec.c b/cpu-exec.c index 2f54054d8c..38e5f02a30 100644 --- a/cpu-exec.c +++ b/cpu-exec.c @@ -335,6 +335,18 @@ int cpu_exec(CPUArchState *env) cpu_loop_exit(cpu); } #endif +#if defined(TARGET_I386) + if (interrupt_request & CPU_INTERRUPT_INIT) { + cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0); + do_cpu_init(x86_cpu); + cpu->exception_index = EXCP_HALTED; + cpu_loop_exit(cpu); + } +#else + if (interrupt_request & CPU_INTERRUPT_RESET) { + cpu_reset(cpu); + } +#endif #if defined(TARGET_I386) #if !defined(CONFIG_USER_ONLY) if (interrupt_request & CPU_INTERRUPT_POLL) { @@ -342,13 +354,7 @@ int cpu_exec(CPUArchState *env) apic_poll_irq(x86_cpu->apic_state); } #endif - if (interrupt_request & CPU_INTERRUPT_INIT) { - cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, - 0); - do_cpu_init(x86_cpu); - cpu->exception_index = EXCP_HALTED; - cpu_loop_exit(cpu); - } else if (interrupt_request & CPU_INTERRUPT_SIPI) { + if (interrupt_request & CPU_INTERRUPT_SIPI) { do_cpu_sipi(x86_cpu); } else if (env->hflags2 & HF2_GIF_MASK) { if ((interrupt_request & CPU_INTERRUPT_SMI) && @@ -405,9 +411,6 @@ int cpu_exec(CPUArchState *env) } } #elif defined(TARGET_PPC) - if ((interrupt_request & CPU_INTERRUPT_RESET)) { - cpu_reset(cpu); - } if (interrupt_request & CPU_INTERRUPT_HARD) { ppc_hw_interrupt(env); if (env->pending_interrupts == 0) { diff --git a/hw/i386/kvm/pci-assign.c b/hw/i386/kvm/pci-assign.c index e55421adcd..de33657563 100644 --- a/hw/i386/kvm/pci-assign.c +++ b/hw/i386/kvm/pci-assign.c @@ -1300,6 +1300,7 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev, Error **errp) if (pos != 0 && kvm_device_msix_supported(kvm_state)) { int bar_nr; uint32_t msix_table_entry; + uint16_t msix_max; verify_irqchip_in_kernel(&local_err); if (local_err) { @@ -1315,9 +1316,10 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev, Error **errp) } pci_dev->msix_cap = pos; - pci_set_word(pci_dev->config + pos + PCI_MSIX_FLAGS, - pci_get_word(pci_dev->config + pos + PCI_MSIX_FLAGS) & - PCI_MSIX_FLAGS_QSIZE); + msix_max = (pci_get_word(pci_dev->config + pos + PCI_MSIX_FLAGS) & + PCI_MSIX_FLAGS_QSIZE) + 1; + msix_max = MIN(msix_max, KVM_MAX_MSIX_PER_DEV); + pci_set_word(pci_dev->config + pos + PCI_MSIX_FLAGS, msix_max - 1); /* Only enable and function mask bits are writable */ pci_set_word(pci_dev->wmask + pos + PCI_MSIX_FLAGS, @@ -1327,9 +1329,7 @@ static int assigned_device_pci_cap_init(PCIDevice *pci_dev, Error **errp) bar_nr = msix_table_entry & PCI_MSIX_FLAGS_BIRMASK; msix_table_entry &= ~PCI_MSIX_FLAGS_BIRMASK; dev->msix_table_addr = pci_region[bar_nr].base_addr + msix_table_entry; - dev->msix_max = pci_get_word(pci_dev->config + pos + PCI_MSIX_FLAGS); - dev->msix_max &= PCI_MSIX_FLAGS_QSIZE; - dev->msix_max += 1; + dev->msix_max = msix_max; } /* Minimal PM support, nothing writable, device appears to NAK changes */ @@ -1664,6 +1664,7 @@ static void assigned_dev_register_msix_mmio(AssignedDevice *dev, Error **errp) MAP_ANONYMOUS|MAP_PRIVATE, 0, 0); if (dev->msix_table == MAP_FAILED) { error_setg_errno(errp, errno, "failed to allocate msix_table"); + dev->msix_table = NULL; return; } diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 07de2384ad..e6369d5be6 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -471,11 +471,12 @@ static void port92_write(void *opaque, hwaddr addr, uint64_t val, unsigned size) { Port92State *s = opaque; + int oldval = s->outport; DPRINTF("port92: write 0x%02x\n", val); s->outport = val; qemu_set_irq(*s->a20_out, (val >> 1) & 1); - if (val & 1) { + if ((val & 1) && !(oldval & 1)) { qemu_system_reset_request(); } } diff --git a/hw/intc/apic_common.c b/hw/intc/apic_common.c index 71376533ca..ce3d903b13 100644 --- a/hw/intc/apic_common.c +++ b/hw/intc/apic_common.c @@ -200,7 +200,7 @@ void apic_init_reset(DeviceState *dev) s->initial_count = 0; s->initial_count_load_time = 0; s->next_time = 0; - s->wait_for_sipi = 1; + s->wait_for_sipi = !cpu_is_bsp(s->cpu); if (s->timer) { timer_del(s->timer); diff --git a/include/exec/cpu-all.h b/include/exec/cpu-all.h index fb649a4029..9cab592dc5 100644 --- a/include/exec/cpu-all.h +++ b/include/exec/cpu-all.h @@ -381,6 +381,9 @@ CPUArchState *cpu_copy(CPUArchState *env); /* Debug event pending. */ #define CPU_INTERRUPT_DEBUG 0x0080 +/* Reset signal. */ +#define CPU_INTERRUPT_RESET 0x0400 + /* Several target-specific external hardware interrupts. Each target/cpu.h should define proper names based on these defines. */ #define CPU_INTERRUPT_TGT_EXT_0 0x0008 @@ -395,9 +398,8 @@ CPUArchState *cpu_copy(CPUArchState *env); instruction being executed. These, therefore, are not masked while single-stepping within the debugger. */ #define CPU_INTERRUPT_TGT_INT_0 0x0100 -#define CPU_INTERRUPT_TGT_INT_1 0x0400 -#define CPU_INTERRUPT_TGT_INT_2 0x0800 -#define CPU_INTERRUPT_TGT_INT_3 0x2000 +#define CPU_INTERRUPT_TGT_INT_1 0x0800 +#define CPU_INTERRUPT_TGT_INT_2 0x2000 /* First unused bit: 0x4000. */ diff --git a/include/sysemu/kvm.h b/include/sysemu/kvm.h index 5ad4e0e1e2..e7ad9d159a 100644 --- a/include/sysemu/kvm.h +++ b/include/sysemu/kvm.h @@ -245,8 +245,6 @@ int kvm_arch_init_vcpu(CPUState *cpu); /* Returns VCPU ID to be used on KVM_CREATE_VCPU ioctl() */ unsigned long kvm_arch_vcpu_id(CPUState *cpu); -void kvm_arch_reset_vcpu(CPUState *cpu); - int kvm_arch_on_sigbus_vcpu(CPUState *cpu, int code, void *addr); int kvm_arch_on_sigbus(int code, void *addr); @@ -383,4 +381,24 @@ void kvm_init_irq_routing(KVMState *s); * > 0: irq chip was created */ int kvm_arch_irqchip_create(KVMState *s); + +/** + * kvm_set_one_reg - set a register value in KVM via KVM_SET_ONE_REG ioctl + * @id: The register ID + * @source: The pointer to the value to be set. It must point to a variable + * of the correct type/size for the register being accessed. + * + * Returns: 0 on success, or a negative errno on failure. + */ +int kvm_set_one_reg(CPUState *cs, uint64_t id, void *source); + +/** + * kvm_get_one_reg - get a register value from KVM via KVM_GET_ONE_REG ioctl + * @id: The register ID + * @target: The pointer where the value is to be stored. It must point to a + * variable of the correct type/size for the register being accessed. + * + * Returns: 0 on success, or a negative errno on failure. + */ +int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target); #endif diff --git a/kvm-all.c b/kvm-all.c index 5cb7f26f4f..a343ede4d4 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -223,13 +223,6 @@ static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot) return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem); } -static void kvm_reset_vcpu(void *opaque) -{ - CPUState *cpu = opaque; - - kvm_arch_reset_vcpu(cpu); -} - int kvm_init_vcpu(CPUState *cpu) { KVMState *s = kvm_state; @@ -269,10 +262,6 @@ int kvm_init_vcpu(CPUState *cpu) } ret = kvm_arch_init_vcpu(cpu); - if (ret == 0) { - qemu_register_reset(kvm_reset_vcpu, cpu); - kvm_arch_reset_vcpu(cpu); - } err: return ret; } @@ -2114,3 +2103,31 @@ int kvm_create_device(KVMState *s, uint64_t type, bool test) return test ? 0 : create_dev.fd; } + +int kvm_set_one_reg(CPUState *cs, uint64_t id, void *source) +{ + struct kvm_one_reg reg; + int r; + + reg.id = id; + reg.addr = (uintptr_t) source; + r = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); + if (r) { + trace_kvm_failed_reg_set(id, strerror(r)); + } + return r; +} + +int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target) +{ + struct kvm_one_reg reg; + int r; + + reg.id = id; + reg.addr = (uintptr_t) target; + r = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); + if (r) { + trace_kvm_failed_reg_get(id, strerror(r)); + } + return r; +} diff --git a/linux-user/main.c b/linux-user/main.c index c38fecfdd9..882186e1a0 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -4051,8 +4051,6 @@ int main(int argc, char **argv, char **envp) #endif #if defined(TARGET_I386) - cpu_x86_set_cpl(env, 3); - env->cr[0] = CR0_PG_MASK | CR0_WP_MASK | CR0_PE_MASK; env->hflags |= HF_PE_MASK; if (env->features[FEAT_1_EDX] & CPUID_SSE) { diff --git a/target-arm/cpu.c b/target-arm/cpu.c index c0ddc3e3df..6c6f2b3d46 100644 --- a/target-arm/cpu.c +++ b/target-arm/cpu.c @@ -29,6 +29,7 @@ #include "hw/arm/arm.h" #include "sysemu/sysemu.h" #include "sysemu/kvm.h" +#include "kvm_arm.h" static void arm_cpu_set_pc(CPUState *cs, vaddr value) { @@ -165,6 +166,12 @@ static void arm_cpu_reset(CPUState *s) * tb_flush(). */ tb_flush(env); + +#ifndef CONFIG_USER_ONLY + if (kvm_enabled()) { + kvm_arm_reset_vcpu(cpu); + } +#endif } #ifndef CONFIG_USER_ONLY diff --git a/target-arm/kvm32.c b/target-arm/kvm32.c index a690d9935f..b79750c57e 100644 --- a/target-arm/kvm32.c +++ b/target-arm/kvm32.c @@ -510,11 +510,9 @@ int kvm_arch_get_registers(CPUState *cs) return 0; } -void kvm_arch_reset_vcpu(CPUState *cs) +void kvm_arm_reset_vcpu(ARMCPU *cpu) { /* Feed the kernel back its initial register state */ - ARMCPU *cpu = ARM_CPU(cs); - memmove(cpu->cpreg_values, cpu->cpreg_reset_values, cpu->cpreg_array_len * sizeof(cpu->cpreg_values[0])); diff --git a/target-arm/kvm64.c b/target-arm/kvm64.c index e115879d9a..c729b9ec9f 100644 --- a/target-arm/kvm64.c +++ b/target-arm/kvm64.c @@ -260,6 +260,6 @@ int kvm_arch_get_registers(CPUState *cs) return ret; } -void kvm_arch_reset_vcpu(CPUState *cs) +void kvm_arm_reset_vcpu(ARMCPU *cpu) { } diff --git a/target-arm/kvm_arm.h b/target-arm/kvm_arm.h index 137c5671e9..dc4e2336fa 100644 --- a/target-arm/kvm_arm.h +++ b/target-arm/kvm_arm.h @@ -67,6 +67,14 @@ bool write_list_to_kvmstate(ARMCPU *cpu); */ bool write_kvmstate_to_list(ARMCPU *cpu); +/** + * kvm_arm_reset_vcpu: + * @cpu: ARMCPU + * + * Called at reset time to kernel registers to their initial values. + */ +void kvm_arm_reset_vcpu(ARMCPU *cpu); + #ifdef CONFIG_KVM /** * kvm_arm_create_scratch_host_vcpu: diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 8f193a9330..042a48d703 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -24,6 +24,7 @@ #include "cpu.h" #include "sysemu/kvm.h" #include "sysemu/cpus.h" +#include "kvm_i386.h" #include "topology.h" #include "qemu/option.h" @@ -2417,8 +2418,7 @@ static void x86_cpu_reset(CPUState *s) xcc->parent_reset(s); - - memset(env, 0, offsetof(CPUX86State, pat)); + memset(env, 0, offsetof(CPUX86State, cpuid_level)); tlb_flush(s, 1); @@ -2484,8 +2484,7 @@ static void x86_cpu_reset(CPUState *s) cpu_breakpoint_remove_all(s, BP_CPU); cpu_watchpoint_remove_all(s, BP_CPU); - env->tsc_adjust = 0; - env->tsc = 0; + env->xcr0 = 1; #if !defined(CONFIG_USER_ONLY) /* We hard-wire the BSP to the first CPU. */ @@ -2494,6 +2493,10 @@ static void x86_cpu_reset(CPUState *s) } s->halted = !cpu_is_bsp(cpu); + + if (kvm_enabled()) { + kvm_arch_reset_vcpu(cpu); + } #endif } diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 2a22a7d64e..e9cbdabc03 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -124,9 +124,9 @@ #define ID_MASK 0x00200000 /* hidden flags - used internally by qemu to represent additional cpu - states. Only the CPL, INHIBIT_IRQ, SMM and SVMI are not - redundant. We avoid using the IOPL_MASK, TF_MASK, VM_MASK and AC_MASK - bit positions to ease oring with eflags. */ + states. Only the INHIBIT_IRQ, SMM and SVMI are not redundant. We + avoid using the IOPL_MASK, TF_MASK, VM_MASK and AC_MASK bit + positions to ease oring with eflags. */ /* current cpl */ #define HF_CPL_SHIFT 0 /* true if soft mmu is being used */ @@ -606,10 +606,11 @@ typedef uint32_t FeatureWordArray[FEATURE_WORDS]; #define CPU_INTERRUPT_NMI CPU_INTERRUPT_TGT_EXT_3 #define CPU_INTERRUPT_MCE CPU_INTERRUPT_TGT_EXT_4 #define CPU_INTERRUPT_VIRQ CPU_INTERRUPT_TGT_INT_0 -#define CPU_INTERRUPT_INIT CPU_INTERRUPT_TGT_INT_1 -#define CPU_INTERRUPT_SIPI CPU_INTERRUPT_TGT_INT_2 -#define CPU_INTERRUPT_TPR CPU_INTERRUPT_TGT_INT_3 +#define CPU_INTERRUPT_SIPI CPU_INTERRUPT_TGT_INT_1 +#define CPU_INTERRUPT_TPR CPU_INTERRUPT_TGT_INT_2 +/* Use a clearer name for this. */ +#define CPU_INTERRUPT_INIT CPU_INTERRUPT_RESET typedef enum { CC_OP_DYNAMIC, /* must use dynamic code to get cc_op */ @@ -797,6 +798,13 @@ typedef struct CPUX86State { target_ulong cr[5]; /* NOTE: cr1 is unused */ int32_t a20_mask; + BNDReg bnd_regs[4]; + BNDCSReg bndcs_regs; + uint64_t msr_bndcfgs; + + /* Beginning of state preserved by INIT (dummy marker). */ + struct {} start_init_save; + /* FPU state */ unsigned int fpstt; /* top of stack index */ uint16_t fpus; @@ -819,6 +827,8 @@ typedef struct CPUX86State { XMMReg xmm_t0; MMXReg mmx_t0; + XMMReg ymmh_regs[CPU_NB_REGS]; + /* sysenter registers */ uint32_t sysenter_cs; target_ulong sysenter_esp; @@ -827,15 +837,6 @@ typedef struct CPUX86State { uint64_t star; uint64_t vm_hsave; - uint64_t vm_vmcb; - uint64_t tsc_offset; - uint64_t intercept; - uint16_t intercept_cr_read; - uint16_t intercept_cr_write; - uint16_t intercept_dr_read; - uint16_t intercept_dr_write; - uint32_t intercept_exceptions; - uint8_t v_tpr; #ifdef TARGET_X86_64 target_ulong lstar; @@ -843,11 +844,6 @@ typedef struct CPUX86State { target_ulong fmask; target_ulong kernelgsbase; #endif - uint64_t system_time_msr; - uint64_t wall_clock_msr; - uint64_t steal_time_msr; - uint64_t async_pf_en_msr; - uint64_t pv_eoi_en_msr; uint64_t tsc; uint64_t tsc_adjust; @@ -864,6 +860,19 @@ typedef struct CPUX86State { uint64_t msr_fixed_counters[MAX_FIXED_COUNTERS]; uint64_t msr_gp_counters[MAX_GP_COUNTERS]; uint64_t msr_gp_evtsel[MAX_GP_COUNTERS]; + + uint64_t pat; + uint32_t smbase; + + /* End of state preserved by INIT (dummy marker). */ + struct {} end_init_save; + + uint64_t system_time_msr; + uint64_t wall_clock_msr; + uint64_t steal_time_msr; + uint64_t async_pf_en_msr; + uint64_t pv_eoi_en_msr; + uint64_t msr_hv_hypercall; uint64_t msr_hv_guest_os_id; uint64_t msr_hv_vapic; @@ -878,9 +887,18 @@ typedef struct CPUX86State { struct CPUBreakpoint *cpu_breakpoint[4]; struct CPUWatchpoint *cpu_watchpoint[4]; }; /* break/watchpoints for dr[0..3] */ - uint32_t smbase; int old_exception; /* exception in flight */ + uint64_t vm_vmcb; + uint64_t tsc_offset; + uint64_t intercept; + uint16_t intercept_cr_read; + uint16_t intercept_cr_write; + uint16_t intercept_dr_read; + uint16_t intercept_dr_write; + uint32_t intercept_exceptions; + uint8_t v_tpr; + /* KVM states, automatically cleared on reset */ uint8_t nmi_injected; uint8_t nmi_pending; @@ -888,7 +906,6 @@ typedef struct CPUX86State { CPU_COMMON /* Fields from here on are preserved across CPU reset. */ - uint64_t pat; /* processor features (e.g. for CPUID insn) */ uint32_t cpuid_level; @@ -928,12 +945,7 @@ typedef struct CPUX86State { uint16_t fpus_vmstate; uint16_t fptag_vmstate; uint16_t fpregs_format_vmstate; - uint64_t xstate_bv; - XMMReg ymmh_regs[CPU_NB_REGS]; - BNDReg bnd_regs[4]; - BNDCSReg bndcs_regs; - uint64_t msr_bndcfgs; uint64_t xcr0; @@ -974,6 +986,7 @@ static inline void cpu_x86_load_seg_cache(CPUX86State *env, /* update the hidden flags */ { if (seg_reg == R_CS) { + int cpl = selector & 3; #ifdef TARGET_X86_64 if ((env->hflags & HF_LMA_MASK) && (flags & DESC_L_MASK)) { /* long mode */ @@ -983,11 +996,19 @@ static inline void cpu_x86_load_seg_cache(CPUX86State *env, #endif { /* legacy / compatibility case */ + if (!(env->cr[0] & CR0_PE_MASK)) + cpl = 0; + else if (env->eflags & VM_MASK) + cpl = 3; new_hflags = (env->segs[R_CS].flags & DESC_B_MASK) >> (DESC_B_SHIFT - HF_CS32_SHIFT); env->hflags = (env->hflags & ~(HF_CS32_MASK | HF_CS64_MASK)) | new_hflags; } +#if HF_CPL_MASK != 3 +#error HF_CPL_MASK is hardcoded +#endif + env->hflags = (env->hflags & ~HF_CPL_MASK) | cpl; } new_hflags = (env->segs[R_SS].flags & DESC_B_MASK) >> (DESC_B_SHIFT - HF_SS32_SHIFT); @@ -1031,16 +1052,6 @@ int cpu_x86_get_descr_debug(CPUX86State *env, unsigned int selector, target_ulong *base, unsigned int *limit, unsigned int *flags); -/* wrapper, just in case memory mappings must be changed */ -static inline void cpu_x86_set_cpl(CPUX86State *s, int cpl) -{ -#if HF_CPL_MASK == 3 - s->hflags = (s->hflags & ~HF_CPL_MASK) | cpl; -#else -#error HF_CPL_MASK is hardcoded -#endif -} - /* op_helper.c */ /* used for debug or cpu save/restore */ void cpu_get_fp80(uint64_t *pmant, uint16_t *pexp, floatx80 f); diff --git a/target-i386/helper.c b/target-i386/helper.c index 372f0e3ecb..46d20e4b89 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -19,6 +19,7 @@ #include "cpu.h" #include "sysemu/kvm.h" +#include "kvm_i386.h" #ifndef CONFIG_USER_ONLY #include "sysemu/sysemu.h" #include "monitor/monitor.h" @@ -1329,12 +1330,21 @@ void do_cpu_init(X86CPU *cpu) { CPUState *cs = CPU(cpu); CPUX86State *env = &cpu->env; + CPUX86State *save = g_new(CPUX86State, 1); int sipi = cs->interrupt_request & CPU_INTERRUPT_SIPI; - uint64_t pat = env->pat; + + *save = *env; cpu_reset(cs); cs->interrupt_request = sipi; - env->pat = pat; + memcpy(&env->start_init_save, &save->start_init_save, + offsetof(CPUX86State, end_init_save) - + offsetof(CPUX86State, start_init_save)); + g_free(save); + + if (kvm_enabled()) { + kvm_arch_do_init_vcpu(cpu); + } apic_init_reset(cpu->apic_state); } diff --git a/target-i386/kvm.c b/target-i386/kvm.c index 4389959f61..0d894ef4aa 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -30,6 +30,8 @@ #include "qemu/config-file.h" #include "hw/i386/pc.h" #include "hw/i386/apic.h" +#include "hw/i386/apic_internal.h" +#include "hw/i386/apic-msidef.h" #include "exec/ioport.h" #include #include "hw/pci/pci.h" @@ -130,14 +132,13 @@ static const struct kvm_para_features { { KVM_CAP_NOP_IO_DELAY, KVM_FEATURE_NOP_IO_DELAY }, { KVM_CAP_PV_MMU, KVM_FEATURE_MMU_OP }, { KVM_CAP_ASYNC_PF, KVM_FEATURE_ASYNC_PF }, - { -1, -1 } }; static int get_para_features(KVMState *s) { int i, features = 0; - for (i = 0; i < ARRAY_SIZE(para_features) - 1; i++) { + for (i = 0; i < ARRAY_SIZE(para_features); i++) { if (kvm_check_extension(s, para_features[i].cap)) { features |= (1 << para_features[i].feature); } @@ -724,9 +725,8 @@ int kvm_arch_init_vcpu(CPUState *cs) return 0; } -void kvm_arch_reset_vcpu(CPUState *cs) +void kvm_arch_reset_vcpu(X86CPU *cpu) { - X86CPU *cpu = X86_CPU(cs); CPUX86State *env = &cpu->env; env->exception_injected = -1; @@ -740,6 +740,16 @@ void kvm_arch_reset_vcpu(CPUState *cs) } } +void kvm_arch_do_init_vcpu(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + + /* APs get directly into wait-for-SIPI state. */ + if (env->mp_state == KVM_MP_STATE_UNINITIALIZED) { + env->mp_state = KVM_MP_STATE_INIT_RECEIVED; + } +} + static int kvm_get_supported_msrs(KVMState *s) { static int kvm_supported_msrs; @@ -2005,14 +2015,15 @@ void kvm_arch_pre_run(CPUState *cpu, struct kvm_run *run) } } - if (!kvm_irqchip_in_kernel()) { - /* Force the VCPU out of its inner loop to process any INIT requests - * or pending TPR access reports. */ - if (cpu->interrupt_request & - (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) { - cpu->exit_request = 1; - } + /* Force the VCPU out of its inner loop to process any INIT requests + * or (for userspace APIC, but it is cheap to combine the checks here) + * pending TPR access reports. + */ + if (cpu->interrupt_request & (CPU_INTERRUPT_INIT | CPU_INTERRUPT_TPR)) { + cpu->exit_request = 1; + } + if (!kvm_irqchip_in_kernel()) { /* Try to inject an interrupt if the guest can accept it */ if (run->ready_for_interrupt_injection && (cpu->interrupt_request & CPU_INTERRUPT_HARD) && @@ -2092,6 +2103,11 @@ int kvm_arch_process_async_events(CPUState *cs) } } + if (cs->interrupt_request & CPU_INTERRUPT_INIT) { + kvm_cpu_synchronize_state(cs); + do_cpu_init(cpu); + } + if (kvm_irqchip_in_kernel()) { return 0; } @@ -2105,10 +2121,6 @@ int kvm_arch_process_async_events(CPUState *cs) (cs->interrupt_request & CPU_INTERRUPT_NMI)) { cs->halted = 0; } - if (cs->interrupt_request & CPU_INTERRUPT_INIT) { - kvm_cpu_synchronize_state(cs); - do_cpu_init(cpu); - } if (cs->interrupt_request & CPU_INTERRUPT_SIPI) { kvm_cpu_synchronize_state(cs); do_cpu_sipi(cpu); diff --git a/target-i386/kvm_i386.h b/target-i386/kvm_i386.h index 4392ab4359..cac30fd381 100644 --- a/target-i386/kvm_i386.h +++ b/target-i386/kvm_i386.h @@ -14,6 +14,8 @@ #include "sysemu/kvm.h" bool kvm_allows_irq0_override(void); +void kvm_arch_reset_vcpu(X86CPU *cs); +void kvm_arch_do_init_vcpu(X86CPU *cs); int kvm_device_pci_assign(KVMState *s, PCIHostDeviceAddress *dev_addr, uint32_t flags, uint32_t *dev_id); diff --git a/target-i386/seg_helper.c b/target-i386/seg_helper.c index 8c3f92c22b..3cf862ee60 100644 --- a/target-i386/seg_helper.c +++ b/target-i386/seg_helper.c @@ -409,11 +409,7 @@ static void switch_tss(CPUX86State *env, int tss_selector, for (i = 0; i < 6; i++) { load_seg_vm(env, i, new_segs[i]); } - /* in vm86, CPL is always 3 */ - cpu_x86_set_cpl(env, 3); } else { - /* CPL is set the RPL of CS */ - cpu_x86_set_cpl(env, new_segs[R_CS] & 3); /* first just selectors as the rest may trigger exceptions */ for (i = 0; i < 6; i++) { cpu_x86_load_seg_cache(env, i, new_segs[i], 0, 0, 0); @@ -739,6 +735,12 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, } } + /* interrupt gate clear IF mask */ + if ((type & 1) == 0) { + env->eflags &= ~IF_MASK; + } + env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK); + if (new_stack) { if (env->eflags & VM_MASK) { cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0, 0); @@ -757,14 +759,7 @@ static void do_interrupt_protected(CPUX86State *env, int intno, int is_int, get_seg_base(e1, e2), get_seg_limit(e1, e2), e2); - cpu_x86_set_cpl(env, dpl); env->eip = offset; - - /* interrupt gate clear IF mask */ - if ((type & 1) == 0) { - env->eflags &= ~IF_MASK; - } - env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK); } #ifdef TARGET_X86_64 @@ -911,6 +906,12 @@ static void do_interrupt64(CPUX86State *env, int intno, int is_int, PUSHQ(esp, error_code); } + /* interrupt gate clear IF mask */ + if ((type & 1) == 0) { + env->eflags &= ~IF_MASK; + } + env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK); + if (new_stack) { ss = 0 | dpl; cpu_x86_load_seg_cache(env, R_SS, ss, 0, 0, 0); @@ -922,14 +923,7 @@ static void do_interrupt64(CPUX86State *env, int intno, int is_int, get_seg_base(e1, e2), get_seg_limit(e1, e2), e2); - cpu_x86_set_cpl(env, dpl); env->eip = offset; - - /* interrupt gate clear IF mask */ - if ((type & 1) == 0) { - env->eflags &= ~IF_MASK; - } - env->eflags &= ~(TF_MASK | VM_MASK | RF_MASK | NT_MASK); } #endif @@ -960,7 +954,8 @@ void helper_syscall(CPUX86State *env, int next_eip_addend) code64 = env->hflags & HF_CS64_MASK; - cpu_x86_set_cpl(env, 0); + env->eflags &= ~env->fmask; + cpu_load_eflags(env, env->eflags, 0); cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc, 0, 0xffffffff, DESC_G_MASK | DESC_P_MASK | @@ -972,8 +967,6 @@ void helper_syscall(CPUX86State *env, int next_eip_addend) DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | DESC_A_MASK); - env->eflags &= ~env->fmask; - cpu_load_eflags(env, env->eflags, 0); if (code64) { env->eip = env->lstar; } else { @@ -982,7 +975,7 @@ void helper_syscall(CPUX86State *env, int next_eip_addend) } else { env->regs[R_ECX] = (uint32_t)(env->eip + next_eip_addend); - cpu_x86_set_cpl(env, 0); + env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK); cpu_x86_load_seg_cache(env, R_CS, selector & 0xfffc, 0, 0xffffffff, DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | @@ -993,7 +986,6 @@ void helper_syscall(CPUX86State *env, int next_eip_addend) DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | DESC_A_MASK); - env->eflags &= ~(IF_MASK | RF_MASK | VM_MASK); env->eip = (uint32_t)env->star; } } @@ -1014,6 +1006,9 @@ void helper_sysret(CPUX86State *env, int dflag) } selector = (env->star >> 48) & 0xffff; if (env->hflags & HF_LMA_MASK) { + cpu_load_eflags(env, (uint32_t)(env->regs[11]), TF_MASK | AC_MASK + | ID_MASK | IF_MASK | IOPL_MASK | VM_MASK | RF_MASK | + NT_MASK); if (dflag == 2) { cpu_x86_load_seg_cache(env, R_CS, (selector + 16) | 3, 0, 0xffffffff, @@ -1035,11 +1030,8 @@ void helper_sysret(CPUX86State *env, int dflag) DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | (3 << DESC_DPL_SHIFT) | DESC_W_MASK | DESC_A_MASK); - cpu_load_eflags(env, (uint32_t)(env->regs[11]), TF_MASK | AC_MASK - | ID_MASK | IF_MASK | IOPL_MASK | VM_MASK | RF_MASK | - NT_MASK); - cpu_x86_set_cpl(env, 3); } else { + env->eflags |= IF_MASK; cpu_x86_load_seg_cache(env, R_CS, selector | 3, 0, 0xffffffff, DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | @@ -1051,8 +1043,6 @@ void helper_sysret(CPUX86State *env, int dflag) DESC_G_MASK | DESC_B_MASK | DESC_P_MASK | DESC_S_MASK | (3 << DESC_DPL_SHIFT) | DESC_W_MASK | DESC_A_MASK); - env->eflags |= IF_MASK; - cpu_x86_set_cpl(env, 3); } } #endif @@ -1905,7 +1895,6 @@ void helper_lcall_protected(CPUX86State *env, int new_cs, target_ulong new_eip, get_seg_base(e1, e2), get_seg_limit(e1, e2), e2); - cpu_x86_set_cpl(env, dpl); SET_ESP(sp, sp_mask); env->eip = offset; } @@ -2134,7 +2123,6 @@ static inline void helper_ret_protected(CPUX86State *env, int shift, get_seg_base(e1, e2), get_seg_limit(e1, e2), e2); - cpu_x86_set_cpl(env, rpl); sp = new_esp; #ifdef TARGET_X86_64 if (env->hflags & HF_CS64_MASK) { @@ -2185,7 +2173,6 @@ static inline void helper_ret_protected(CPUX86State *env, int shift, IF_MASK | IOPL_MASK | VM_MASK | NT_MASK | VIF_MASK | VIP_MASK); load_seg_vm(env, R_CS, new_cs & 0xffff); - cpu_x86_set_cpl(env, 3); load_seg_vm(env, R_SS, new_ss & 0xffff); load_seg_vm(env, R_ES, new_es & 0xffff); load_seg_vm(env, R_DS, new_ds & 0xffff); @@ -2238,7 +2225,6 @@ void helper_sysenter(CPUX86State *env) raise_exception_err(env, EXCP0D_GPF, 0); } env->eflags &= ~(VM_MASK | IF_MASK | RF_MASK); - cpu_x86_set_cpl(env, 0); #ifdef TARGET_X86_64 if (env->hflags & HF_LMA_MASK) { @@ -2274,7 +2260,6 @@ void helper_sysexit(CPUX86State *env, int dflag) if (env->sysenter_cs == 0 || cpl != 0) { raise_exception_err(env, EXCP0D_GPF, 0); } - cpu_x86_set_cpl(env, 3); #ifdef TARGET_X86_64 if (dflag == 2) { cpu_x86_load_seg_cache(env, R_CS, ((env->sysenter_cs + 32) & 0xfffc) | diff --git a/target-i386/smm_helper.c b/target-i386/smm_helper.c index 35901c9e89..4841d53b24 100644 --- a/target-i386/smm_helper.c +++ b/target-i386/smm_helper.c @@ -163,6 +163,13 @@ void do_smm_enter(X86CPU *cpu) cpu_load_eflags(env, 0, ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK)); env->eip = 0x00008000; + cpu_x86_update_cr0(env, + env->cr[0] & ~(CR0_PE_MASK | CR0_EM_MASK | CR0_TS_MASK | + CR0_PG_MASK)); + cpu_x86_update_cr4(env, 0); + env->dr[7] = 0x00000400; + CC_OP = CC_OP_EFLAGS; + cpu_x86_load_seg_cache(env, R_CS, (env->smbase >> 4) & 0xffff, env->smbase, 0xffffffff, 0); cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0xffffffff, 0); @@ -170,13 +177,6 @@ void do_smm_enter(X86CPU *cpu) cpu_x86_load_seg_cache(env, R_SS, 0, 0, 0xffffffff, 0); cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0xffffffff, 0); cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0xffffffff, 0); - - cpu_x86_update_cr0(env, - env->cr[0] & ~(CR0_PE_MASK | CR0_EM_MASK | CR0_TS_MASK | - CR0_PG_MASK)); - cpu_x86_update_cr4(env, 0); - env->dr[7] = 0x00000400; - CC_OP = CC_OP_EFLAGS; } void helper_rsm(CPUX86State *env) @@ -191,16 +191,6 @@ void helper_rsm(CPUX86State *env) #ifdef TARGET_X86_64 cpu_load_efer(env, ldq_phys(cs->as, sm_state + 0x7ed0)); - for (i = 0; i < 6; i++) { - offset = 0x7e00 + i * 16; - cpu_x86_load_seg_cache(env, i, - lduw_phys(cs->as, sm_state + offset), - ldq_phys(cs->as, sm_state + offset + 8), - ldl_phys(cs->as, sm_state + offset + 4), - (lduw_phys(cs->as, sm_state + offset + 2) & - 0xf0ff) << 8); - } - env->gdt.base = ldq_phys(cs->as, sm_state + 0x7e68); env->gdt.limit = ldl_phys(cs->as, sm_state + 0x7e64); @@ -238,6 +228,16 @@ void helper_rsm(CPUX86State *env) cpu_x86_update_cr3(env, ldl_phys(cs->as, sm_state + 0x7f50)); cpu_x86_update_cr0(env, ldl_phys(cs->as, sm_state + 0x7f58)); + for (i = 0; i < 6; i++) { + offset = 0x7e00 + i * 16; + cpu_x86_load_seg_cache(env, i, + lduw_phys(cs->as, sm_state + offset), + ldq_phys(cs->as, sm_state + offset + 8), + ldl_phys(cs->as, sm_state + offset + 4), + (lduw_phys(cs->as, sm_state + offset + 2) & + 0xf0ff) << 8); + } + val = ldl_phys(cs->as, sm_state + 0x7efc); /* revision ID */ if (val & 0x20000) { env->smbase = ldl_phys(cs->as, sm_state + 0x7f00) & ~0x7fff; diff --git a/target-i386/svm_helper.c b/target-i386/svm_helper.c index aa17ecdece..846eaa5918 100644 --- a/target-i386/svm_helper.c +++ b/target-i386/svm_helper.c @@ -282,9 +282,6 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) env->vm_vmcb + offsetof(struct vmcb, save.dr7)); env->dr[6] = ldq_phys(cs->as, env->vm_vmcb + offsetof(struct vmcb, save.dr6)); - cpu_x86_set_cpl(env, ldub_phys(cs->as, - env->vm_vmcb + offsetof(struct vmcb, - save.cpl))); /* FIXME: guest state consistency checks */ @@ -703,7 +700,8 @@ void helper_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1) cpu_load_eflags(env, ldq_phys(cs->as, env->vm_hsave + offsetof(struct vmcb, save.rflags)), - ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK)); + ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK | + VM_MASK)); CC_OP = CC_OP_EFLAGS; svm_load_seg_cache(env, env->vm_hsave + offsetof(struct vmcb, save.es), @@ -728,7 +726,6 @@ void helper_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1) env->vm_hsave + offsetof(struct vmcb, save.dr7)); /* other setups */ - cpu_x86_set_cpl(env, 0); stq_phys(cs->as, env->vm_vmcb + offsetof(struct vmcb, control.exit_code), exit_code); stq_phys(cs->as, env->vm_vmcb + offsetof(struct vmcb, control.exit_info_1), @@ -756,10 +753,6 @@ void helper_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1) from the page table indicated the host's CR3. If the PDPEs contain illegal state, the processor causes a shutdown. */ - /* Forces CR0.PE = 1, RFLAGS.VM = 0. */ - env->cr[0] |= CR0_PE_MASK; - env->eflags &= ~VM_MASK; - /* Disables all breakpoints in the host DR7 register. */ /* Checks the reloaded host state for consistency. */ diff --git a/target-ppc/cpu.h b/target-ppc/cpu.h index d4983405a2..75ed5fa636 100644 --- a/target-ppc/cpu.h +++ b/target-ppc/cpu.h @@ -2042,9 +2042,6 @@ enum { PPC_INTERRUPT_PERFM, /* Performance monitor interrupt */ }; -/* CPU should be reset next, restart from scratch afterwards */ -#define CPU_INTERRUPT_RESET CPU_INTERRUPT_TGT_INT_0 - /*****************************************************************************/ static inline target_ulong cpu_read_xer(CPUPPCState *env) diff --git a/target-ppc/kvm.c b/target-ppc/kvm.c index 4b81e5f253..8ff1777dcb 100644 --- a/target-ppc/kvm.c +++ b/target-ppc/kvm.c @@ -430,10 +430,6 @@ int kvm_arch_init_vcpu(CPUState *cs) return ret; } -void kvm_arch_reset_vcpu(CPUState *cpu) -{ -} - static void kvm_sw_tlb_put(PowerPCCPU *cpu) { CPUPPCState *env = &cpu->env; diff --git a/target-s390x/cpu.c b/target-s390x/cpu.c index dfd83e8aef..c3082b73c5 100644 --- a/target-s390x/cpu.c +++ b/target-s390x/cpu.c @@ -152,6 +152,10 @@ static void s390_cpu_full_reset(CPUState *s) * after incrementing the cpu counter */ #if !defined(CONFIG_USER_ONLY) s->halted = 1; + + if (kvm_enabled()) { + kvm_s390_reset_vcpu(cpu); + } #endif tlb_flush(s, 1); } diff --git a/target-s390x/cpu.h b/target-s390x/cpu.h index aad277af49..06454d6da5 100644 --- a/target-s390x/cpu.h +++ b/target-s390x/cpu.h @@ -359,11 +359,16 @@ void s390x_cpu_timer(void *opaque); int s390_virtio_hypercall(CPUS390XState *env); #ifdef CONFIG_KVM +void kvm_s390_reset_vcpu(S390CPU *cpu); void kvm_s390_interrupt(S390CPU *cpu, int type, uint32_t code); void kvm_s390_virtio_irq(S390CPU *cpu, int config_change, uint64_t token); void kvm_s390_interrupt_internal(S390CPU *cpu, int type, uint32_t parm, uint64_t parm64, int vm); #else +static inline void kvm_s390_reset_vcpu(S390CPU *cpu) +{ +} + static inline void kvm_s390_interrupt(S390CPU *cpu, int type, uint32_t code) { } diff --git a/target-s390x/kvm.c b/target-s390x/kvm.c index b7b0edc4f1..56179afece 100644 --- a/target-s390x/kvm.c +++ b/target-s390x/kvm.c @@ -117,47 +117,20 @@ int kvm_arch_init_vcpu(CPUState *cpu) return 0; } -void kvm_arch_reset_vcpu(CPUState *cpu) +void kvm_s390_reset_vcpu(S390CPU *cpu) { + CPUState *cs = CPU(cpu); + /* The initial reset call is needed here to reset in-kernel * vcpu data that we can't access directly from QEMU * (i.e. with older kernels which don't support sync_regs/ONE_REG). * Before this ioctl cpu_synchronize_state() is called in common kvm * code (kvm-all) */ - if (kvm_vcpu_ioctl(cpu, KVM_S390_INITIAL_RESET, NULL)) { + if (kvm_vcpu_ioctl(cs, KVM_S390_INITIAL_RESET, NULL)) { perror("Can't reset vcpu\n"); } } -static int kvm_set_one_reg(CPUState *cs, uint64_t id, void *source) -{ - struct kvm_one_reg reg; - int r; - - reg.id = id; - reg.addr = (uint64_t) source; - r = kvm_vcpu_ioctl(cs, KVM_SET_ONE_REG, ®); - if (r) { - trace_kvm_failed_reg_set(id, strerror(errno)); - } - return r; -} - -static int kvm_get_one_reg(CPUState *cs, uint64_t id, void *target) -{ - struct kvm_one_reg reg; - int r; - - reg.id = id; - reg.addr = (uint64_t) target; - r = kvm_vcpu_ioctl(cs, KVM_GET_ONE_REG, ®); - if (r) { - trace_kvm_failed_reg_get(id, strerror(errno)); - } - return r; -} - - int kvm_arch_put_registers(CPUState *cs, int level) { S390CPU *cpu = S390_CPU(cs); diff --git a/trace-events b/trace-events index af4449d2ba..2c5b30763e 100644 --- a/trace-events +++ b/trace-events @@ -1230,6 +1230,8 @@ kvm_run_exit(int cpu_index, uint32_t reason) "cpu_index %d, reason %d" kvm_device_ioctl(int fd, int type, void *arg) "dev fd %d, type 0x%x, arg %p" kvm_failed_spr_set(int str, const char *msg) "Warning: Unable to set SPR %d to KVM: %s" kvm_failed_spr_get(int str, const char *msg) "Warning: Unable to retrieve SPR %d from KVM: %s" +kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s" +kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s" # memory.c memory_region_ops_read(void *mr, uint64_t addr, uint64_t value, unsigned size) "mr %p addr %#"PRIx64" value %#"PRIx64" size %u" @@ -1246,7 +1248,3 @@ xen_pv_mmio_write(uint64_t addr) "WARNING: write to Xen PV Device MMIO space (ad # hw/pci/pci_host.c pci_cfg_read(const char *dev, unsigned devid, unsigned fnid, unsigned offs, unsigned val) "%s %02u:%u @0x%x -> 0x%x" pci_cfg_write(const char *dev, unsigned devid, unsigned fnid, unsigned offs, unsigned val) "%s %02u:%u @0x%x <- 0x%x" - -# target-s390/kvm.c -kvm_failed_reg_get(uint64_t id, const char *msg) "Warning: Unable to retrieve ONEREG %" PRIu64 " from KVM: %s" -kvm_failed_reg_set(uint64_t id, const char *msg) "Warning: Unable to set ONEREG %" PRIu64 " to KVM: %s"