From 7d4bd1d2819ef1035ba1ed648358df37b51ade6f Mon Sep 17 00:00:00 2001
From: Will Deacon <will.deacon@arm.com>
Date: Fri, 1 Apr 2016 12:12:22 +0100
Subject: [PATCH 1/8] arm64: KVM: Add braces to multi-line if statement in
 virtual PMU code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The kernel is written in C, not python, so we need braces around
multi-line if statements. GCC 6 actually warns about this, thanks to the
fantastic new "-Wmisleading-indentation" flag:

 | virt/kvm/arm/pmu.c: In function ‘kvm_pmu_overflow_status’:
 | virt/kvm/arm/pmu.c:198:3: warning: statement is indented as if it were guarded by... [-Wmisleading-indentation]
 |    reg &= vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
 |    ^~~
 | arch/arm64/kvm/../../../virt/kvm/arm/pmu.c:196:2: note: ...this ‘if’ clause, but it is not
 |   if ((vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
 |   ^~

As it turns out, this particular case is harmless (we just do some &=
operations with 0), but worth fixing nonetheless.

Signed-off-by: Will Deacon <will.deacon@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/pmu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/arm/pmu.c b/virt/kvm/arm/pmu.c
index b5754c6c5508..575c7aa30d7e 100644
--- a/virt/kvm/arm/pmu.c
+++ b/virt/kvm/arm/pmu.c
@@ -193,11 +193,12 @@ static u64 kvm_pmu_overflow_status(struct kvm_vcpu *vcpu)
 {
 	u64 reg = 0;
 
-	if ((vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E))
+	if ((vcpu_sys_reg(vcpu, PMCR_EL0) & ARMV8_PMU_PMCR_E)) {
 		reg = vcpu_sys_reg(vcpu, PMOVSSET_EL0);
 		reg &= vcpu_sys_reg(vcpu, PMCNTENSET_EL0);
 		reg &= vcpu_sys_reg(vcpu, PMINTENSET_EL1);
 		reg &= kvm_pmu_valid_counter_mask(vcpu);
+	}
 
 	return reg;
 }

From 1c5631c73fc2261a5df64a72c155cb53dcdc0c45 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Wed, 6 Apr 2016 09:37:22 +0100
Subject: [PATCH 2/8] KVM: arm/arm64: Handle forward time correction gracefully

On a host that runs NTP, corrections can have a direct impact on
the background timer that we program on the behalf of a vcpu.

In particular, NTP performing a forward correction will result in
a timer expiring sooner than expected from a guest point of view.
Not a big deal, we kick the vcpu anyway.

But on wake-up, the vcpu thread is going to perform a check to
find out whether or not it should block. And at that point, the
timer check is going to say "timer has not expired yet, go back
to sleep". This results in the timer event being lost forever.

There are multiple ways to handle this. One would be record that
the timer has expired and let kvm_cpu_has_pending_timer return
true in that case, but that would be fairly invasive. Another is
to check for the "short sleep" condition in the hrtimer callback,
and restart the timer for the remaining time when the condition
is detected.

This patch implements the latter, with a bit of refactoring in
order to avoid too much code duplication.

Cc: <stable@vger.kernel.org>
Reported-by: Alexander Graf <agraf@suse.de>
Reviewed-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 virt/kvm/arm/arch_timer.c | 49 +++++++++++++++++++++++++++++++--------
 1 file changed, 39 insertions(+), 10 deletions(-)

diff --git a/virt/kvm/arm/arch_timer.c b/virt/kvm/arm/arch_timer.c
index a9ad4fe3f68f..9aaa35dd9144 100644
--- a/virt/kvm/arm/arch_timer.c
+++ b/virt/kvm/arm/arch_timer.c
@@ -91,6 +91,8 @@ static void kvm_timer_inject_irq_work(struct work_struct *work)
 	vcpu = container_of(work, struct kvm_vcpu, arch.timer_cpu.expired);
 	vcpu->arch.timer_cpu.armed = false;
 
+	WARN_ON(!kvm_timer_should_fire(vcpu));
+
 	/*
 	 * If the vcpu is blocked we want to wake it up so that it will see
 	 * the timer has expired when entering the guest.
@@ -98,10 +100,46 @@ static void kvm_timer_inject_irq_work(struct work_struct *work)
 	kvm_vcpu_kick(vcpu);
 }
 
+static u64 kvm_timer_compute_delta(struct kvm_vcpu *vcpu)
+{
+	cycle_t cval, now;
+
+	cval = vcpu->arch.timer_cpu.cntv_cval;
+	now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
+
+	if (now < cval) {
+		u64 ns;
+
+		ns = cyclecounter_cyc2ns(timecounter->cc,
+					 cval - now,
+					 timecounter->mask,
+					 &timecounter->frac);
+		return ns;
+	}
+
+	return 0;
+}
+
 static enum hrtimer_restart kvm_timer_expire(struct hrtimer *hrt)
 {
 	struct arch_timer_cpu *timer;
+	struct kvm_vcpu *vcpu;
+	u64 ns;
+
 	timer = container_of(hrt, struct arch_timer_cpu, timer);
+	vcpu = container_of(timer, struct kvm_vcpu, arch.timer_cpu);
+
+	/*
+	 * Check that the timer has really expired from the guest's
+	 * PoV (NTP on the host may have forced it to expire
+	 * early). If we should have slept longer, restart it.
+	 */
+	ns = kvm_timer_compute_delta(vcpu);
+	if (unlikely(ns)) {
+		hrtimer_forward_now(hrt, ns_to_ktime(ns));
+		return HRTIMER_RESTART;
+	}
+
 	queue_work(wqueue, &timer->expired);
 	return HRTIMER_NORESTART;
 }
@@ -176,8 +214,6 @@ static int kvm_timer_update_state(struct kvm_vcpu *vcpu)
 void kvm_timer_schedule(struct kvm_vcpu *vcpu)
 {
 	struct arch_timer_cpu *timer = &vcpu->arch.timer_cpu;
-	u64 ns;
-	cycle_t cval, now;
 
 	BUG_ON(timer_is_armed(timer));
 
@@ -197,14 +233,7 @@ void kvm_timer_schedule(struct kvm_vcpu *vcpu)
 		return;
 
 	/*  The timer has not yet expired, schedule a background timer */
-	cval = timer->cntv_cval;
-	now = kvm_phys_timer_read() - vcpu->kvm->arch.timer.cntvoff;
-
-	ns = cyclecounter_cyc2ns(timecounter->cc,
-				 cval - now,
-				 timecounter->mask,
-				 &timecounter->frac);
-	timer_arm(timer, ns);
+	timer_arm(timer, kvm_timer_compute_delta(vcpu));
 }
 
 void kvm_timer_unschedule(struct kvm_vcpu *vcpu)

From 6141570c36f0c937d5deff20d9cf08cbd8d8ed48 Mon Sep 17 00:00:00 2001
From: Marc Zyngier <marc.zyngier@arm.com>
Date: Tue, 5 Apr 2016 16:11:47 +0100
Subject: [PATCH 3/8] arm64: KVM: Warn when PARange is less than 40 bits

We always thought that 40bits of PA range would be the minimum people
would actually build. Anything less is terrifyingly small.

Turns out that we were both right and wrong. Nobody has ever built
such a system, but the ARM Foundation Model has a PARange set to 36bits.
Just because we can. Oh well. Now, the KVM API explicitely says that
we offer a 40bit PA space to the VM, so we shouldn't run KVM on
the Foundation Model at all.

That being said, this patch offers a less agressive alternative, and
loudly warns about the configuration being unsupported. You'll still
be able to run VMs (at your own risks, though).

This is just a workaround until we have a proper userspace API where
we report the PARange to userspace.

Signed-off-by: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm64/include/asm/kvm_arm.h  |  6 ++---
 arch/arm64/include/asm/kvm_asm.h  |  2 +-
 arch/arm64/include/asm/kvm_host.h |  7 +++---
 arch/arm64/kvm/hyp/s2-setup.c     | 39 +++++++++++++++++++++++++++++--
 4 files changed, 44 insertions(+), 10 deletions(-)

diff --git a/arch/arm64/include/asm/kvm_arm.h b/arch/arm64/include/asm/kvm_arm.h
index 4150fd8bae01..3f29887995bc 100644
--- a/arch/arm64/include/asm/kvm_arm.h
+++ b/arch/arm64/include/asm/kvm_arm.h
@@ -151,8 +151,7 @@
  */
 #define VTCR_EL2_FLAGS		(VTCR_EL2_TG0_64K | VTCR_EL2_SH0_INNER | \
 				 VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
-				 VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B | \
-				 VTCR_EL2_RES1)
+				 VTCR_EL2_SL0_LVL1 | VTCR_EL2_RES1)
 #define VTTBR_X		(38 - VTCR_EL2_T0SZ_40B)
 #else
 /*
@@ -163,8 +162,7 @@
  */
 #define VTCR_EL2_FLAGS		(VTCR_EL2_TG0_4K | VTCR_EL2_SH0_INNER | \
 				 VTCR_EL2_ORGN0_WBWA | VTCR_EL2_IRGN0_WBWA | \
-				 VTCR_EL2_SL0_LVL1 | VTCR_EL2_T0SZ_40B | \
-				 VTCR_EL2_RES1)
+				 VTCR_EL2_SL0_LVL1 | VTCR_EL2_RES1)
 #define VTTBR_X		(37 - VTCR_EL2_T0SZ_40B)
 #endif
 
diff --git a/arch/arm64/include/asm/kvm_asm.h b/arch/arm64/include/asm/kvm_asm.h
index eb7490d232a0..40a0a24e6c98 100644
--- a/arch/arm64/include/asm/kvm_asm.h
+++ b/arch/arm64/include/asm/kvm_asm.h
@@ -54,7 +54,7 @@ extern void __vgic_v3_init_lrs(void);
 
 extern u32 __kvm_get_mdcr_el2(void);
 
-extern void __init_stage2_translation(void);
+extern u32 __init_stage2_translation(void);
 
 #endif
 
diff --git a/arch/arm64/include/asm/kvm_host.h b/arch/arm64/include/asm/kvm_host.h
index 227ed475dbd3..4cd4196e94a9 100644
--- a/arch/arm64/include/asm/kvm_host.h
+++ b/arch/arm64/include/asm/kvm_host.h
@@ -370,11 +370,12 @@ int kvm_arm_vcpu_arch_get_attr(struct kvm_vcpu *vcpu,
 int kvm_arm_vcpu_arch_has_attr(struct kvm_vcpu *vcpu,
 			       struct kvm_device_attr *attr);
 
-/* #define kvm_call_hyp(f, ...) __kvm_call_hyp(kvm_ksym_ref(f), ##__VA_ARGS__) */
-
 static inline void __cpu_init_stage2(void)
 {
-	kvm_call_hyp(__init_stage2_translation);
+	u32 parange = kvm_call_hyp(__init_stage2_translation);
+
+	WARN_ONCE(parange < 40,
+		  "PARange is %d bits, unsupported configuration!", parange);
 }
 
 #endif /* __ARM64_KVM_HOST_H__ */
diff --git a/arch/arm64/kvm/hyp/s2-setup.c b/arch/arm64/kvm/hyp/s2-setup.c
index 5a9f3bf542b0..bcbe761a5a3d 100644
--- a/arch/arm64/kvm/hyp/s2-setup.c
+++ b/arch/arm64/kvm/hyp/s2-setup.c
@@ -20,9 +20,10 @@
 #include <asm/kvm_asm.h>
 #include <asm/kvm_hyp.h>
 
-void __hyp_text __init_stage2_translation(void)
+u32 __hyp_text __init_stage2_translation(void)
 {
 	u64 val = VTCR_EL2_FLAGS;
+	u64 parange;
 	u64 tmp;
 
 	/*
@@ -30,7 +31,39 @@ void __hyp_text __init_stage2_translation(void)
 	 * bits in VTCR_EL2. Amusingly, the PARange is 4 bits, while
 	 * PS is only 3. Fortunately, bit 19 is RES0 in VTCR_EL2...
 	 */
-	val |= (read_sysreg(id_aa64mmfr0_el1) & 7) << 16;
+	parange = read_sysreg(id_aa64mmfr0_el1) & 7;
+	val |= parange << 16;
+
+	/* Compute the actual PARange... */
+	switch (parange) {
+	case 0:
+		parange = 32;
+		break;
+	case 1:
+		parange = 36;
+		break;
+	case 2:
+		parange = 40;
+		break;
+	case 3:
+		parange = 42;
+		break;
+	case 4:
+		parange = 44;
+		break;
+	case 5:
+	default:
+		parange = 48;
+		break;
+	}
+
+	/*
+	 * ... and clamp it to 40 bits, unless we have some braindead
+	 * HW that implements less than that. In all cases, we'll
+	 * return that value for the rest of the kernel to decide what
+	 * to do.
+	 */
+	val |= 64 - (parange > 40 ? 40 : parange);
 
 	/*
 	 * Read the VMIDBits bits from ID_AA64MMFR1_EL1 and set the VS
@@ -42,4 +75,6 @@ void __hyp_text __init_stage2_translation(void)
 			VTCR_EL2_VS_8BIT;
 
 	write_sysreg(val, vtcr_el2);
+
+	return parange;
 }

From 06a71a24bae57a07afee9cda6b00495347d8a448 Mon Sep 17 00:00:00 2001
From: Sudeep Holla <sudeep.holla@arm.com>
Date: Mon, 4 Apr 2016 14:46:51 +0100
Subject: [PATCH 4/8] arm64: KVM: unregister notifiers in hyp mode teardown
 path

Commit 1e947bad0b63 ("arm64: KVM: Skip HYP setup when already running
in HYP") re-organized the hyp init code and ended up leaving the CPU
hotplug and PM notifier even if hyp mode initialization fails.

Since KVM is not yet supported with ACPI, the above mentioned commit
breaks CPU hotplug in ACPI boot.

This patch fixes teardown_hyp_mode to properly unregister both CPU
hotplug and PM notifiers in the teardown path.

Fixes: 1e947bad0b63 ("arm64: KVM: Skip HYP setup when already running in HYP")
Cc: Christoffer Dall <christoffer.dall@linaro.org>
Cc: Marc Zyngier <marc.zyngier@arm.com>
Signed-off-by: Sudeep Holla <sudeep.holla@arm.com>
Signed-off-by: Christoffer Dall <christoffer.dall@linaro.org>
---
 arch/arm/kvm/arm.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c
index b5384311dec4..dded1b763c16 100644
--- a/arch/arm/kvm/arm.c
+++ b/arch/arm/kvm/arm.c
@@ -1112,10 +1112,17 @@ static void __init hyp_cpu_pm_init(void)
 {
 	cpu_pm_register_notifier(&hyp_init_cpu_pm_nb);
 }
+static void __init hyp_cpu_pm_exit(void)
+{
+	cpu_pm_unregister_notifier(&hyp_init_cpu_pm_nb);
+}
 #else
 static inline void hyp_cpu_pm_init(void)
 {
 }
+static inline void hyp_cpu_pm_exit(void)
+{
+}
 #endif
 
 static void teardown_common_resources(void)
@@ -1141,9 +1148,7 @@ static int init_subsystems(void)
 	/*
 	 * Register CPU Hotplug notifier
 	 */
-	cpu_notifier_register_begin();
-	err = __register_cpu_notifier(&hyp_init_cpu_nb);
-	cpu_notifier_register_done();
+	err = register_cpu_notifier(&hyp_init_cpu_nb);
 	if (err) {
 		kvm_err("Cannot register KVM init CPU notifier (%d)\n", err);
 		return err;
@@ -1193,6 +1198,8 @@ static void teardown_hyp_mode(void)
 	free_hyp_pgds();
 	for_each_possible_cpu(cpu)
 		free_page(per_cpu(kvm_arm_hyp_stack_page, cpu));
+	unregister_cpu_notifier(&hyp_init_cpu_nb);
+	hyp_cpu_pm_exit();
 }
 
 static int init_vhe_mode(void)

From 3d8e15dd6de644736916c8ba012c1cc6b356d4ba Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Thu, 7 Apr 2016 17:21:43 +0200
Subject: [PATCH 5/8] KVM: new maintainer on the block
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Avi has kept Gleb busy enough, and Radim has been helping me
for a while, so let's "reward" him with an entry in
MAINTAINERS.

Acked-by: Gleb Natapov <gleb@kernel.org>
Cc: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
Signed-off-by: Radim Krčmář <rkrcmar@redhat.com>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 40eb1dbe2ae5..7f9f4bda9d15 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -6252,8 +6252,8 @@ S:	Maintained
 F:	tools/testing/selftests
 
 KERNEL VIRTUAL MACHINE (KVM)
-M:	Gleb Natapov <gleb@kernel.org>
 M:	Paolo Bonzini <pbonzini@redhat.com>
+M:	Radim Krčmář <rkrcmar@redhat.com>
 L:	kvm@vger.kernel.org
 W:	http://www.linux-kvm.org
 T:	git git://git.kernel.org/pub/scm/virt/kvm/kvm.git

From 7a98205deebfff9fc96f90d9e7b1a334b0bd3e2b Mon Sep 17 00:00:00 2001
From: Xiao Guangrong <guangrong.xiao@linux.intel.com>
Date: Fri, 25 Mar 2016 21:19:35 +0800
Subject: [PATCH 6/8] KVM: MMU: fix permission_fault()

kvm-unit-tests complained about the PFEC is not set properly, e.g,:
test pte.rw pte.d pte.nx pde.p pde.rw pde.pse user fetch: FAIL: error code 15
expected 5
Dump mapping: address: 0x123400000000
------L4: 3e95007
------L3: 3e96007
------L2: 2000083

It's caused by the reason that PFEC returned to guest is copied from the
PFEC triggered by shadow page table

This patch fixes it and makes the logic of updating errcode more clean

Signed-off-by: Xiao Guangrong <guangrong.xiao@linux.intel.com>
[Do not assume pfec.p=1. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/mmu.h         | 9 ++++-----
 arch/x86/kvm/paging_tmpl.h | 2 +-
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/mmu.h b/arch/x86/kvm/mmu.h
index b70df72e2b33..66b33b96a31b 100644
--- a/arch/x86/kvm/mmu.h
+++ b/arch/x86/kvm/mmu.h
@@ -173,10 +173,9 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 	int index = (pfec >> 1) +
 		    (smap >> (X86_EFLAGS_AC_BIT - PFERR_RSVD_BIT + 1));
 	bool fault = (mmu->permissions[index] >> pte_access) & 1;
+	u32 errcode = PFERR_PRESENT_MASK;
 
 	WARN_ON(pfec & (PFERR_PK_MASK | PFERR_RSVD_MASK));
-	pfec |= PFERR_PRESENT_MASK;
-
 	if (unlikely(mmu->pkru_mask)) {
 		u32 pkru_bits, offset;
 
@@ -189,15 +188,15 @@ static inline u8 permission_fault(struct kvm_vcpu *vcpu, struct kvm_mmu *mmu,
 		pkru_bits = (kvm_read_pkru(vcpu) >> (pte_pkey * 2)) & 3;
 
 		/* clear present bit, replace PFEC.RSVD with ACC_USER_MASK. */
-		offset = pfec - 1 +
+		offset = (pfec & ~1) +
 			((pte_access & PT_USER_MASK) << (PFERR_RSVD_BIT - PT_USER_SHIFT));
 
 		pkru_bits &= mmu->pkru_mask >> offset;
-		pfec |= -pkru_bits & PFERR_PK_MASK;
+		errcode |= -pkru_bits & PFERR_PK_MASK;
 		fault |= (pkru_bits != 0);
 	}
 
-	return -(uint32_t)fault & pfec;
+	return -(u32)fault & errcode;
 }
 
 void kvm_mmu_invalidate_zap_all_pages(struct kvm *kvm);
diff --git a/arch/x86/kvm/paging_tmpl.h b/arch/x86/kvm/paging_tmpl.h
index 1d971c7553c3..bc019f70e0b6 100644
--- a/arch/x86/kvm/paging_tmpl.h
+++ b/arch/x86/kvm/paging_tmpl.h
@@ -360,7 +360,7 @@ retry_walk:
 			goto error;
 
 		if (unlikely(is_rsvd_bits_set(mmu, pte, walker->level))) {
-			errcode |= PFERR_RSVD_MASK | PFERR_PRESENT_MASK;
+			errcode = PFERR_RSVD_MASK | PFERR_PRESENT_MASK;
 			goto error;
 		}
 

From fc5b7f3bf1e1414bd4e91db6918c85ace0c873a5 Mon Sep 17 00:00:00 2001
From: David Matlack <dmatlack@google.com>
Date: Wed, 30 Mar 2016 12:24:47 -0700
Subject: [PATCH 7/8] kvm: x86: do not leak guest xcr0 into host interrupt
 handlers

An interrupt handler that uses the fpu can kill a KVM VM, if it runs
under the following conditions:
 - the guest's xcr0 register is loaded on the cpu
 - the guest's fpu context is not loaded
 - the host is using eagerfpu

Note that the guest's xcr0 register and fpu context are not loaded as
part of the atomic world switch into "guest mode". They are loaded by
KVM while the cpu is still in "host mode".

Usage of the fpu in interrupt context is gated by irq_fpu_usable(). The
interrupt handler will look something like this:

if (irq_fpu_usable()) {
        kernel_fpu_begin();

        [... code that uses the fpu ...]

        kernel_fpu_end();
}

As long as the guest's fpu is not loaded and the host is using eager
fpu, irq_fpu_usable() returns true (interrupted_kernel_fpu_idle()
returns true). The interrupt handler proceeds to use the fpu with
the guest's xcr0 live.

kernel_fpu_begin() saves the current fpu context. If this uses
XSAVE[OPT], it may leave the xsave area in an undesirable state.
According to the SDM, during XSAVE bit i of XSTATE_BV is not modified
if bit i is 0 in xcr0. So it's possible that XSTATE_BV[i] == 1 and
xcr0[i] == 0 following an XSAVE.

kernel_fpu_end() restores the fpu context. Now if any bit i in
XSTATE_BV == 1 while xcr0[i] == 0, XRSTOR generates a #GP. The
fault is trapped and SIGSEGV is delivered to the current process.

Only pre-4.2 kernels appear to be vulnerable to this sequence of
events. Commit 653f52c ("kvm,x86: load guest FPU context more eagerly")
from 4.2 forces the guest's fpu to always be loaded on eagerfpu hosts.

This patch fixes the bug by keeping the host's xcr0 loaded outside
of the interrupts-disabled region where KVM switches into guest mode.

Cc: stable@vger.kernel.org
Suggested-by: Andy Lutomirski <luto@amacapital.net>
Signed-off-by: David Matlack <dmatlack@google.com>
[Move load after goto cancel_injection. - Paolo]
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/x86.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c
index 0a2c70e43bc8..9b7798c7b210 100644
--- a/arch/x86/kvm/x86.c
+++ b/arch/x86/kvm/x86.c
@@ -700,7 +700,6 @@ static int __kvm_set_xcr(struct kvm_vcpu *vcpu, u32 index, u64 xcr)
 		if ((xcr0 & XFEATURE_MASK_AVX512) != XFEATURE_MASK_AVX512)
 			return 1;
 	}
-	kvm_put_guest_xcr0(vcpu);
 	vcpu->arch.xcr0 = xcr0;
 
 	if ((xcr0 ^ old_xcr0) & XFEATURE_MASK_EXTEND)
@@ -6590,8 +6589,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	kvm_x86_ops->prepare_guest_switch(vcpu);
 	if (vcpu->fpu_active)
 		kvm_load_guest_fpu(vcpu);
-	kvm_load_guest_xcr0(vcpu);
-
 	vcpu->mode = IN_GUEST_MODE;
 
 	srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx);
@@ -6618,6 +6615,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 		goto cancel_injection;
 	}
 
+	kvm_load_guest_xcr0(vcpu);
+
 	if (req_immediate_exit)
 		smp_send_reschedule(vcpu->cpu);
 
@@ -6667,6 +6666,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu)
 	vcpu->mode = OUTSIDE_GUEST_MODE;
 	smp_wmb();
 
+	kvm_put_guest_xcr0(vcpu);
+
 	/* Interrupt is enabled by handle_external_intr() */
 	kvm_x86_ops->handle_external_intr(vcpu);
 
@@ -7314,7 +7315,6 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 	 * and assume host would use all available bits.
 	 * Guest xcr0 would be loaded later.
 	 */
-	kvm_put_guest_xcr0(vcpu);
 	vcpu->guest_fpu_loaded = 1;
 	__kernel_fpu_begin();
 	__copy_kernel_to_fpregs(&vcpu->arch.guest_fpu.state);
@@ -7323,8 +7323,6 @@ void kvm_load_guest_fpu(struct kvm_vcpu *vcpu)
 
 void kvm_put_guest_fpu(struct kvm_vcpu *vcpu)
 {
-	kvm_put_guest_xcr0(vcpu);
-
 	if (!vcpu->guest_fpu_loaded) {
 		vcpu->fpu_counter = 0;
 		return;

From 316314cae15fb0e3869b76b468f59a0c83ac3d4e Mon Sep 17 00:00:00 2001
From: Paolo Bonzini <pbonzini@redhat.com>
Date: Mon, 21 Mar 2016 12:33:00 +0100
Subject: [PATCH 8/8] KVM: x86: mask CPUID(0xD,0x1).EAX against host value
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This ensures that the guest doesn't see XSAVE extensions
(e.g. xgetbv1 or xsavec) that the host lacks.

Cc: stable@vger.kernel.org
Reviewed-by: Radim Krčmář <rkrcmar@redhat.com>
Signed-off-by: Paolo Bonzini <pbonzini@redhat.com>
---
 arch/x86/kvm/cpuid.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c
index 8efb839948e5..bbbaa802d13e 100644
--- a/arch/x86/kvm/cpuid.c
+++ b/arch/x86/kvm/cpuid.c
@@ -534,6 +534,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function,
 			do_cpuid_1_ent(&entry[i], function, idx);
 			if (idx == 1) {
 				entry[i].eax &= kvm_cpuid_D_1_eax_x86_features;
+				cpuid_mask(&entry[i].eax, CPUID_D_1_EAX);
 				entry[i].ebx = 0;
 				if (entry[i].eax & (F(XSAVES)|F(XSAVEC)))
 					entry[i].ebx =