From f85c758dbee54cc3612a6e873ef7cecdb66ebee5 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 17 Jul 2017 11:14:26 +0300 Subject: [PATCH 1/6] KVM: x86: masking out upper bits MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kvm_read_cr3() returns an unsigned long and gfn is a u64. We intended to mask out the bottom 5 bits but because of the type issue we mask the top 32 bits as well. I don't know if this is a real problem, but it causes static checker warnings. Signed-off-by: Dan Carpenter Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 5b8f07889f6a..82a63c59f77b 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -597,8 +597,8 @@ bool pdptrs_changed(struct kvm_vcpu *vcpu) (unsigned long *)&vcpu->arch.regs_avail)) return true; - gfn = (kvm_read_cr3(vcpu) & ~31u) >> PAGE_SHIFT; - offset = (kvm_read_cr3(vcpu) & ~31u) & (PAGE_SIZE - 1); + gfn = (kvm_read_cr3(vcpu) & ~31ul) >> PAGE_SHIFT; + offset = (kvm_read_cr3(vcpu) & ~31ul) & (PAGE_SIZE - 1); r = kvm_read_nested_guest_page(vcpu, gfn, pdpte, offset, sizeof(pdpte), PFERR_USER_MASK | PFERR_WRITE_MASK); if (r < 0) From 4c4a6f790ee862ee9f0dc8b35c71f55bcf792b71 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 14 Jul 2017 13:36:11 +0200 Subject: [PATCH 2/6] KVM: nVMX: track NMI blocking state separately for each VMCS MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vmx_recover_nmi_blocking is using a cached value of the guest interruptibility info, which is stored in vmx->nmi_known_unmasked. vmx_recover_nmi_blocking is run for both normal and nested guests, so the cached value must be per-VMCS. This fixes eventinj.flat in a nested non-EPT environment. With EPT it works, because the EPT violation handler doesn't have the vmx->nmi_known_unmasked optimization (it is unnecessary because, unlike vmx_recover_nmi_blocking, it can just look at the exit qualification). Thanks to Wanpeng Li for debugging the testcase and providing an initial patch. Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 84e62acf2dd8..a0c472bcb2a2 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -198,7 +198,8 @@ struct loaded_vmcs { struct vmcs *vmcs; struct vmcs *shadow_vmcs; int cpu; - int launched; + bool launched; + bool nmi_known_unmasked; struct list_head loaded_vmcss_on_cpu_link; }; @@ -5510,10 +5511,8 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); - if (!is_guest_mode(vcpu)) { - ++vcpu->stat.nmi_injections; - vmx->nmi_known_unmasked = false; - } + ++vcpu->stat.nmi_injections; + vmx->loaded_vmcs->nmi_known_unmasked = false; if (vmx->rmode.vm86_active) { if (kvm_inject_realmode_interrupt(vcpu, NMI_VECTOR, 0) != EMULATE_DONE) @@ -5527,16 +5526,21 @@ static void vmx_inject_nmi(struct kvm_vcpu *vcpu) static bool vmx_get_nmi_mask(struct kvm_vcpu *vcpu) { - if (to_vmx(vcpu)->nmi_known_unmasked) + struct vcpu_vmx *vmx = to_vmx(vcpu); + bool masked; + + if (vmx->loaded_vmcs->nmi_known_unmasked) return false; - return vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; + masked = vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI; + vmx->loaded_vmcs->nmi_known_unmasked = !masked; + return masked; } static void vmx_set_nmi_mask(struct kvm_vcpu *vcpu, bool masked) { struct vcpu_vmx *vmx = to_vmx(vcpu); - vmx->nmi_known_unmasked = !masked; + vmx->loaded_vmcs->nmi_known_unmasked = !masked; if (masked) vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); @@ -8736,7 +8740,7 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) idtv_info_valid = vmx->idt_vectoring_info & VECTORING_INFO_VALID_MASK; - if (vmx->nmi_known_unmasked) + if (vmx->loaded_vmcs->nmi_known_unmasked) return; /* * Can't use vmx->exit_intr_info since we're not sure what @@ -8760,7 +8764,7 @@ static void vmx_recover_nmi_blocking(struct vcpu_vmx *vmx) vmcs_set_bits(GUEST_INTERRUPTIBILITY_INFO, GUEST_INTR_STATE_NMI); else - vmx->nmi_known_unmasked = + vmx->loaded_vmcs->nmi_known_unmasked = !(vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_NMI); } From b3f1dfb6e818a2352c38e3e37bb983eae98621b5 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Mon, 17 Jul 2017 12:00:34 -0700 Subject: [PATCH 3/6] KVM: nVMX: Disallow VM-entry in MOV-SS shadow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Immediately following MOV-to-SS/POP-to-SS, VM-entry is disallowed. This check comes after the check for a valid VMCS. When this check fails, the instruction pointer should fall through to the next instruction, the ALU flags should be set to indicate VMfailValid, and the VM-instruction error should be set to 26 ("VM entry with events blocked by MOV SS"). Signed-off-by: Jim Mattson Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a0c472bcb2a2..791c018a0034 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -10492,6 +10492,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) { struct vmcs12 *vmcs12; struct vcpu_vmx *vmx = to_vmx(vcpu); + u32 interrupt_shadow = vmx_get_interrupt_shadow(vcpu); u32 exit_qual; int ret; @@ -10516,6 +10517,12 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) * for misconfigurations which will anyway be caught by the processor * when using the merged vmcs02. */ + if (interrupt_shadow & KVM_X86_SHADOW_INT_MOV_SS) { + nested_vmx_failValid(vcpu, + VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS); + goto out; + } + if (vmcs12->launch_state == launch) { nested_vmx_failValid(vcpu, launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS From c2ce3f5d89d57301e2756ac325fe2ebc33bfec30 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 19 Jul 2017 14:53:04 +0200 Subject: [PATCH 4/6] x86: add MULTIUSER dependency for KVM MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KVM tries to select 'TASKSTATS', which had additional dependencies: warning: (KVM) selects TASKSTATS which has unmet direct dependencies (NET && MULTIUSER) Signed-off-by: Arnd Bergmann Signed-off-by: Radim Krčmář --- arch/x86/kvm/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/x86/kvm/Kconfig b/arch/x86/kvm/Kconfig index 760433b2574a..2688c7dc5323 100644 --- a/arch/x86/kvm/Kconfig +++ b/arch/x86/kvm/Kconfig @@ -22,7 +22,7 @@ config KVM depends on HAVE_KVM depends on HIGH_RES_TIMERS # for TASKSTATS/TASK_DELAY_ACCT: - depends on NET + depends on NET && MULTIUSER select PREEMPT_NOTIFIERS select MMU_NOTIFIER select ANON_INODES From f244deed7a254b4a61333e6886575ce0102e8356 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 20 Jul 2017 01:11:54 -0700 Subject: [PATCH 5/6] KVM: VMX: Fix invalid guest state detection after task-switch emulation MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This can be reproduced by EPT=1, unrestricted_guest=N, emulate_invalid_state=Y or EPT=0, the trace of kvm-unit-tests/taskswitch2.flat is like below, it tries to emulate invalid guest state task-switch: kvm_exit: reason TASK_SWITCH rip 0x0 info 40000058 0 kvm_emulate_insn: 42000:0:0f 0b (0x2) kvm_emulate_insn: 42000:0:0f 0b (0x2) failed kvm_inj_exception: #UD (0x0) kvm_entry: vcpu 0 kvm_exit: reason TASK_SWITCH rip 0x0 info 40000058 0 kvm_emulate_insn: 42000:0:0f 0b (0x2) kvm_emulate_insn: 42000:0:0f 0b (0x2) failed kvm_inj_exception: #UD (0x0) ...................... It appears that the task-switch emulation updates rflags (and vm86 flag) only after the segments are loaded, causing vmx->emulation_required to be set, when in fact invalid guest state emulation is not needed. This patch fixes it by updating vmx->emulation_required after the rflags (and vm86 flag) is updated in task-switch emulation. Thanks Radim for moving the update to vmx__set_flags and adding Paolo's suggestion for the check. Suggested-by: Nadav Amit Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Nadav Amit Signed-off-by: Wanpeng Li Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 791c018a0034..29fd8af5c347 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2327,6 +2327,11 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu) __vmx_load_host_state(to_vmx(vcpu)); } +static bool emulation_required(struct kvm_vcpu *vcpu) +{ + return emulate_invalid_guest_state && !guest_state_valid(vcpu); +} + static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu); /* @@ -2364,6 +2369,8 @@ static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) { + unsigned long old_rflags = vmx_get_rflags(vcpu); + __set_bit(VCPU_EXREG_RFLAGS, (ulong *)&vcpu->arch.regs_avail); to_vmx(vcpu)->rflags = rflags; if (to_vmx(vcpu)->rmode.vm86_active) { @@ -2371,6 +2378,9 @@ static void vmx_set_rflags(struct kvm_vcpu *vcpu, unsigned long rflags) rflags |= X86_EFLAGS_IOPL | X86_EFLAGS_VM; } vmcs_writel(GUEST_RFLAGS, rflags); + + if ((old_rflags ^ to_vmx(vcpu)->rflags) & X86_EFLAGS_VM) + to_vmx(vcpu)->emulation_required = emulation_required(vcpu); } static u32 vmx_get_pkru(struct kvm_vcpu *vcpu) @@ -3858,11 +3868,6 @@ static __init int alloc_kvm_area(void) return 0; } -static bool emulation_required(struct kvm_vcpu *vcpu) -{ - return emulate_invalid_guest_state && !guest_state_valid(vcpu); -} - static void fix_pmode_seg(struct kvm_vcpu *vcpu, int seg, struct kvm_segment *save) { From f1ff89ec4447c4e39d275a1ca3de43eed2a92745 Mon Sep 17 00:00:00 2001 From: Roman Kagan Date: Thu, 20 Jul 2017 17:26:40 +0300 Subject: [PATCH 6/6] kvm: x86: hyperv: avoid livelock in oneshot SynIC timers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the SynIC timer message delivery fails due to SINT message slot being busy, there's no point to attempt starting the timer again until we're notified of the slot being released by the guest (via EOM or EOI). Even worse, when a oneshot timer fails to deliver its message, its re-arming with an expiration time in the past leads to immediate retry of the delivery, and so on, without ever letting the guest vcpu to run and release the slot, which results in a livelock. To avoid that, only start the timer when there's no timer message pending delivery. When there is, meaning the slot is busy, the processing will be restarted upon notification from the guest that the slot is released. Signed-off-by: Roman Kagan Signed-off-by: Radim Krčmář --- arch/x86/kvm/hyperv.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 2695a34fa1c5..337b6d2730fa 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -649,9 +649,10 @@ void kvm_hv_process_stimers(struct kvm_vcpu *vcpu) } if ((stimer->config & HV_STIMER_ENABLE) && - stimer->count) - stimer_start(stimer); - else + stimer->count) { + if (!stimer->msg_pending) + stimer_start(stimer); + } else stimer_cleanup(stimer); } }