From a06cdb5676272a12056820aeb49a1416ad2d0c6f Mon Sep 17 00:00:00 2001
From: Jean Delvare <khali@linux-fr.org>
Date: Tue, 18 May 2010 09:34:12 +0200
Subject: [PATCH 1/8] KVM: powerpc: fix init/exit annotation

kvmppc_e500_exit() is a module_exit function, so it should be tagged
with __exit, not __init. The incorrect annotation was added by commit
2986b8c72c272ea58edd37903b042c6da985627d.

Signed-off-by: Jean Delvare <khali@linux-fr.org>
Cc: stable@kernel.org
Signed-off-by: Alexander Graf <agraf@suse.de>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/powerpc/kvm/e500.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/powerpc/kvm/e500.c b/arch/powerpc/kvm/e500.c
index bc2b4004eb26..e8a00b0c4449 100644
--- a/arch/powerpc/kvm/e500.c
+++ b/arch/powerpc/kvm/e500.c
@@ -164,7 +164,7 @@ static int __init kvmppc_e500_init(void)
 	return kvm_init(NULL, sizeof(struct kvmppc_vcpu_e500), 0, THIS_MODULE);
 }
 
-static void __init kvmppc_e500_exit(void)
+static void __exit kvmppc_e500_exit(void)
 {
 	kvmppc_booke_exit();
 }

From fe5913e4e1700cbfc337f4b1da9ddb26f6a55586 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 17 May 2010 14:43:34 +0200
Subject: [PATCH 2/8] KVM: SVM: Handle MCEs early in the vmexit process

This patch moves handling of the MC vmexits to an earlier
point in the vmexit. The handle_exit function is too late
because the vcpu might alreadry have changed its physical
cpu.

Cc: stable@kernel.org
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/svm.c | 15 ++++++++++++++-
 1 file changed, 14 insertions(+), 1 deletion(-)

diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 96dc232bfc56..5e1ed033cd7e 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -1410,7 +1410,7 @@ static int nm_interception(struct vcpu_svm *svm)
 	return 1;
 }
 
-static int mc_interception(struct vcpu_svm *svm)
+static void svm_handle_mce(struct vcpu_svm *svm)
 {
 	/*
 	 * On an #MC intercept the MCE handler is not called automatically in
@@ -1420,6 +1420,11 @@ static int mc_interception(struct vcpu_svm *svm)
 		"int $0x12\n");
 	/* not sure if we ever come back to this point */
 
+	return;
+}
+
+static int mc_interception(struct vcpu_svm *svm)
+{
 	return 1;
 }
 
@@ -3088,6 +3093,14 @@ static void svm_vcpu_run(struct kvm_vcpu *vcpu)
 		vcpu->arch.regs_avail &= ~(1 << VCPU_EXREG_PDPTR);
 		vcpu->arch.regs_dirty &= ~(1 << VCPU_EXREG_PDPTR);
 	}
+
+	/*
+	 * We need to handle MC intercepts here before the vcpu has a chance to
+	 * change the physical cpu
+	 */
+	if (unlikely(svm->vmcb->control.exit_code ==
+		     SVM_EXIT_EXCP_BASE + MC_VECTOR))
+		svm_handle_mce(svm);
 }
 
 #undef R

From 67ec66077799f2fef84b21a643912b179c422281 Mon Sep 17 00:00:00 2001
From: Joerg Roedel <joerg.roedel@amd.com>
Date: Mon, 17 May 2010 14:43:35 +0200
Subject: [PATCH 3/8] KVM: SVM: Implement workaround for Erratum 383

This patch implements a workaround for AMD erratum 383 into
KVM. Without this erratum fix it is possible for a guest to
kill the host machine. This patch implements the suggested
workaround for hypervisors which will be published by the
next revision guide update.

[jan: fix overflow warning on i386]
[xiao: fix unused variable warning]

Cc: stable@kernel.org
Signed-off-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Signed-off-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/include/asm/msr-index.h |  1 +
 arch/x86/kvm/svm.c               | 81 ++++++++++++++++++++++++++++++++
 2 files changed, 82 insertions(+)

diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h
index b49d8ca228f6..8c7ae4318629 100644
--- a/arch/x86/include/asm/msr-index.h
+++ b/arch/x86/include/asm/msr-index.h
@@ -110,6 +110,7 @@
 #define MSR_AMD64_PATCH_LOADER		0xc0010020
 #define MSR_AMD64_OSVW_ID_LENGTH	0xc0010140
 #define MSR_AMD64_OSVW_STATUS		0xc0010141
+#define MSR_AMD64_DC_CFG		0xc0011022
 #define MSR_AMD64_IBSFETCHCTL		0xc0011030
 #define MSR_AMD64_IBSFETCHLINAD		0xc0011031
 #define MSR_AMD64_IBSFETCHPHYSAD	0xc0011032
diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c
index 5e1ed033cd7e..ce438e0fdd26 100644
--- a/arch/x86/kvm/svm.c
+++ b/arch/x86/kvm/svm.c
@@ -28,6 +28,7 @@
 #include <linux/ftrace_event.h>
 #include <linux/slab.h>
 
+#include <asm/tlbflush.h>
 #include <asm/desc.h>
 
 #include <asm/virtext.h>
@@ -56,6 +57,8 @@ MODULE_LICENSE("GPL");
 
 #define DEBUGCTL_RESERVED_BITS (~(0x3fULL))
 
+static bool erratum_383_found __read_mostly;
+
 static const u32 host_save_user_msrs[] = {
 #ifdef CONFIG_X86_64
 	MSR_STAR, MSR_LSTAR, MSR_CSTAR, MSR_SYSCALL_MASK, MSR_KERNEL_GS_BASE,
@@ -374,6 +377,31 @@ static void svm_queue_exception(struct kvm_vcpu *vcpu, unsigned nr,
 	svm->vmcb->control.event_inj_err = error_code;
 }
 
+static void svm_init_erratum_383(void)
+{
+	u32 low, high;
+	int err;
+	u64 val;
+
+	/* Only Fam10h is affected */
+	if (boot_cpu_data.x86 != 0x10)
+		return;
+
+	/* Use _safe variants to not break nested virtualization */
+	val = native_read_msr_safe(MSR_AMD64_DC_CFG, &err);
+	if (err)
+		return;
+
+	val |= (1ULL << 47);
+
+	low  = lower_32_bits(val);
+	high = upper_32_bits(val);
+
+	native_write_msr_safe(MSR_AMD64_DC_CFG, low, high);
+
+	erratum_383_found = true;
+}
+
 static int has_svm(void)
 {
 	const char *msg;
@@ -429,6 +457,8 @@ static int svm_hardware_enable(void *garbage)
 
 	wrmsrl(MSR_VM_HSAVE_PA, page_to_pfn(sd->save_area) << PAGE_SHIFT);
 
+	svm_init_erratum_383();
+
 	return 0;
 }
 
@@ -1410,8 +1440,59 @@ static int nm_interception(struct vcpu_svm *svm)
 	return 1;
 }
 
+static bool is_erratum_383(void)
+{
+	int err, i;
+	u64 value;
+
+	if (!erratum_383_found)
+		return false;
+
+	value = native_read_msr_safe(MSR_IA32_MC0_STATUS, &err);
+	if (err)
+		return false;
+
+	/* Bit 62 may or may not be set for this mce */
+	value &= ~(1ULL << 62);
+
+	if (value != 0xb600000000010015ULL)
+		return false;
+
+	/* Clear MCi_STATUS registers */
+	for (i = 0; i < 6; ++i)
+		native_write_msr_safe(MSR_IA32_MCx_STATUS(i), 0, 0);
+
+	value = native_read_msr_safe(MSR_IA32_MCG_STATUS, &err);
+	if (!err) {
+		u32 low, high;
+
+		value &= ~(1ULL << 2);
+		low    = lower_32_bits(value);
+		high   = upper_32_bits(value);
+
+		native_write_msr_safe(MSR_IA32_MCG_STATUS, low, high);
+	}
+
+	/* Flush tlb to evict multi-match entries */
+	__flush_tlb_all();
+
+	return true;
+}
+
 static void svm_handle_mce(struct vcpu_svm *svm)
 {
+	if (is_erratum_383()) {
+		/*
+		 * Erratum 383 triggered. Guest state is corrupt so kill the
+		 * guest.
+		 */
+		pr_err("KVM: Guest triggered AMD Erratum 383\n");
+
+		set_bit(KVM_REQ_TRIPLE_FAULT, &svm->vcpu.requests);
+
+		return;
+	}
+
 	/*
 	 * On an #MC intercept the MCE handler is not called automatically in
 	 * the host. So do it by hand here.

From 3be2264be3c00865116f997dc53ebcc90fe7fc4b Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Fri, 28 May 2010 09:44:59 -0300
Subject: [PATCH 4/8] KVM: MMU: invalidate and flush on spte small->large page
 size change

Always invalidate spte and flush TLBs when changing page size, to make
sure different sized translations for the same address are never cached
in a CPU's TLB.

Currently the only case where this occurs is when a non-leaf spte pointer is
overwritten by a leaf, large spte entry. This can happen after dirty
logging is disabled on a memslot, for example.

Noticed by Andrea.

KVM-Stable-Tag
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 81563e76e28f..6fbcb48d5a9b 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1870,6 +1870,8 @@ static void mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 
 			child = page_header(pte & PT64_BASE_ADDR_MASK);
 			mmu_page_remove_parent_pte(child, sptep);
+			__set_spte(sptep, shadow_trap_nonpresent_pte);
+			kvm_flush_remote_tlbs(vcpu->kvm);
 		} else if (pfn != spte_to_pfn(*sptep)) {
 			pgprintk("hfn old %lx new %lx\n",
 				 spte_to_pfn(*sptep), pfn);

From 69325a122580d3a7b26589e8efdd6663001c3297 Mon Sep 17 00:00:00 2001
From: Avi Kivity <avi@redhat.com>
Date: Thu, 27 May 2010 14:35:58 +0300
Subject: [PATCH 5/8] KVM: MMU: Remove user access when allowing kernel access
 to gpte.w=0 page

If cr0.wp=0, we have to allow the guest kernel access to a page with pte.w=0.
We do that by setting spte.w=1, since the host cr0.wp must remain set so the
host can write protect pages.  Once we allow write access, we must remove
user access otherwise we mistakenly allow the user to write the page.

Reviewed-by: Xiao Guangrong <xiaoguangrong@cn.fujitsu.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/x86/kvm/mmu.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c
index 6fbcb48d5a9b..a6f695d76928 100644
--- a/arch/x86/kvm/mmu.c
+++ b/arch/x86/kvm/mmu.c
@@ -1815,6 +1815,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep,
 
 		spte |= PT_WRITABLE_MASK;
 
+		if (!tdp_enabled && !(pte_access & ACC_WRITE_MASK))
+			spte &= ~PT_USER_MASK;
+
 		/*
 		 * Optimization: for pte sync, if spte was writable the hash
 		 * lookup is unnecessary (and expensive). Write protection

From 05b782ab951a896d7da41775999821f692dc9e01 Mon Sep 17 00:00:00 2001
From: Jan Kiszka <jan.kiszka@siemens.com>
Date: Wed, 26 May 2010 21:36:33 +0200
Subject: [PATCH 6/8] KVM: Fix order passed to iommu_unmap

This is obviously a left-over from the the old interface taking the
size. Apparently a mostly harmless issue with the current iommu_unmap
implementation.

Signed-off-by: Jan Kiszka <jan.kiszka@siemens.com>
Acked-by: Joerg Roedel <joerg.roedel@amd.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 virt/kvm/iommu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/virt/kvm/iommu.c b/virt/kvm/iommu.c
index d2f06be63354..96048ee9e39e 100644
--- a/virt/kvm/iommu.c
+++ b/virt/kvm/iommu.c
@@ -271,7 +271,7 @@ static void kvm_iommu_put_pages(struct kvm *kvm,
 		pfn  = phys >> PAGE_SHIFT;
 
 		/* Unmap address from IO address space */
-		order       = iommu_unmap(domain, gfn_to_gpa(gfn), PAGE_SIZE);
+		order       = iommu_unmap(domain, gfn_to_gpa(gfn), 0);
 		unmap_pages = 1ULL << order;
 
 		/* Unpin all pages we just unmapped to not leak any memory */

From 3499f4d0d1159a21245d6071f8af6a71b86a78bc Mon Sep 17 00:00:00 2001
From: Julia Lawall <julia@diku.dk>
Date: Wed, 26 May 2010 17:57:05 +0200
Subject: [PATCH 7/8] KVM: ia64: Add missing spin_unlock in
 kvm_arch_hardware_enable()

Add a spin_unlock missing on the error path.

The semantic match that finds this problem is as follows:
(http://coccinelle.lip6.fr/)

// <smpl>
@@
expression E1;
@@

* spin_lock(E1,...);
  <+... when != E1
  if (...) {
    ... when != E1
*   return ...;
  }
  ...+>
* spin_unlock(E1,...);
// </smpl>

Signed-off-by: Julia Lawall <julia@diku.dk>
Signed-off-by: Avi Kivity <avi@redhat.com>
---
 arch/ia64/kvm/kvm-ia64.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/ia64/kvm/kvm-ia64.c b/arch/ia64/kvm/kvm-ia64.c
index d5f4e9161201..21b701374f72 100644
--- a/arch/ia64/kvm/kvm-ia64.c
+++ b/arch/ia64/kvm/kvm-ia64.c
@@ -144,6 +144,7 @@ int kvm_arch_hardware_enable(void *garbage)
 				VP_INIT_ENV : VP_INIT_ENV_INITALIZE,
 			__pa(kvm_vm_buffer), KVM_VM_BUFFER_BASE, &tmp_base);
 	if (status != 0) {
+		spin_unlock(&vp_lock);
 		printk(KERN_WARNING"kvm: Failed to Enable VT Support!!!!\n");
 		return -EINVAL;
 	}

From 07dc7263b99e4ddad2b4c69765a428ccb7d48938 Mon Sep 17 00:00:00 2001
From: Marcelo Tosatti <mtosatti@redhat.com>
Date: Wed, 2 Jun 2010 11:26:26 -0300
Subject: [PATCH 8/8] KVM: read apic->irr with ioapic lock held

Read ioapic->irr inside ioapic->lock protected section.

KVM-Stable-Tag
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
---
 virt/kvm/ioapic.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/virt/kvm/ioapic.c b/virt/kvm/ioapic.c
index 7c79c1d76d0c..3500dee9cf2b 100644
--- a/virt/kvm/ioapic.c
+++ b/virt/kvm/ioapic.c
@@ -192,12 +192,13 @@ static int ioapic_deliver(struct kvm_ioapic *ioapic, int irq)
 
 int kvm_ioapic_set_irq(struct kvm_ioapic *ioapic, int irq, int level)
 {
-	u32 old_irr = ioapic->irr;
+	u32 old_irr;
 	u32 mask = 1 << irq;
 	union kvm_ioapic_redirect_entry entry;
 	int ret = 1;
 
 	spin_lock(&ioapic->lock);
+	old_irr = ioapic->irr;
 	if (irq >= 0 && irq < IOAPIC_NUM_PINS) {
 		entry = ioapic->redirtbl[irq];
 		level ^= entry.fields.polarity;