From 511c66818d87db2a8931e7f7f92c7904bdd84f72 Mon Sep 17 00:00:00 2001 From: Mihai Caraman Date: Wed, 18 Jun 2014 18:45:05 +0300 Subject: [PATCH 1/7] KVM: PPC: Book3E: Unlock mmu_lock when setting caching atttribute The patch 08c9a188d0d0fc0f0c5e17d89a06bb59c493110f kvm: powerpc: use caching attributes as per linux pte do not handle properly the error case, letting mmu_lock locked. The lock will further generate a RCU stall from kvmppc_e500_emul_tlbwe() caller. In case of an error go to out label. Signed-off-by: Mihai Caraman Signed-off-by: Alexander Graf --- arch/powerpc/kvm/e500_mmu_host.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index dd2cc03f406f..86903d3f5a03 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -473,7 +473,8 @@ static inline int kvmppc_e500_shadow_map(struct kvmppc_vcpu_e500 *vcpu_e500, if (printk_ratelimit()) pr_err("%s: pte not present: gfn %lx, pfn %lx\n", __func__, (long)gfn, pfn); - return -EINVAL; + ret = -EINVAL; + goto out; } kvmppc_e500_ref_setup(ref, gtlbe, pfn, wimg); From 341acbb3aabbcfbf069d7de4ad35f51b58176faf Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Mon, 16 Jun 2014 00:17:07 +0530 Subject: [PATCH 2/7] KVM: PPC: BOOK3S: HV: Use base page size when comparing against slb value With guests supporting Multiple page size per segment (MPSS), hpte_page_size returns the actual page size used. Add a new function to return base page size and use that to compare against the the page size calculated from SLB. Without this patch a hpte lookup can fail since we are comparing wrong page size in kvmppc_hv_find_lock_hpte. Signed-off-by: Aneesh Kumar K.V Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/kvm_book3s_64.h | 19 +++++++++++++++++-- arch/powerpc/kvm/book3s_64_mmu_hv.c | 2 +- arch/powerpc/kvm/book3s_hv_rm_mmu.c | 7 ++----- 3 files changed, 20 insertions(+), 8 deletions(-) diff --git a/arch/powerpc/include/asm/kvm_book3s_64.h b/arch/powerpc/include/asm/kvm_book3s_64.h index fddb72b48ce9..d645428a65a4 100644 --- a/arch/powerpc/include/asm/kvm_book3s_64.h +++ b/arch/powerpc/include/asm/kvm_book3s_64.h @@ -198,8 +198,10 @@ static inline unsigned long compute_tlbie_rb(unsigned long v, unsigned long r, return rb; } -static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) +static inline unsigned long __hpte_page_size(unsigned long h, unsigned long l, + bool is_base_size) { + int size, a_psize; /* Look at the 8 bit LP value */ unsigned int lp = (l >> LP_SHIFT) & ((1 << LP_BITS) - 1); @@ -214,14 +216,27 @@ static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) continue; a_psize = __hpte_actual_psize(lp, size); - if (a_psize != -1) + if (a_psize != -1) { + if (is_base_size) + return 1ul << mmu_psize_defs[size].shift; return 1ul << mmu_psize_defs[a_psize].shift; + } } } return 0; } +static inline unsigned long hpte_page_size(unsigned long h, unsigned long l) +{ + return __hpte_page_size(h, l, 0); +} + +static inline unsigned long hpte_base_page_size(unsigned long h, unsigned long l) +{ + return __hpte_page_size(h, l, 1); +} + static inline unsigned long hpte_rpn(unsigned long ptel, unsigned long psize) { return ((ptel & HPTE_R_RPN) & ~(psize - 1)) >> PAGE_SHIFT; diff --git a/arch/powerpc/kvm/book3s_64_mmu_hv.c b/arch/powerpc/kvm/book3s_64_mmu_hv.c index 80561074078d..68468d695f12 100644 --- a/arch/powerpc/kvm/book3s_64_mmu_hv.c +++ b/arch/powerpc/kvm/book3s_64_mmu_hv.c @@ -1562,7 +1562,7 @@ static ssize_t kvm_htab_write(struct file *file, const char __user *buf, goto out; } if (!rma_setup && is_vrma_hpte(v)) { - unsigned long psize = hpte_page_size(v, r); + unsigned long psize = hpte_base_page_size(v, r); unsigned long senc = slb_pgsize_encoding(psize); unsigned long lpcr; diff --git a/arch/powerpc/kvm/book3s_hv_rm_mmu.c b/arch/powerpc/kvm/book3s_hv_rm_mmu.c index 6e6224318c36..5a24d3c2b6b8 100644 --- a/arch/powerpc/kvm/book3s_hv_rm_mmu.c +++ b/arch/powerpc/kvm/book3s_hv_rm_mmu.c @@ -814,13 +814,10 @@ long kvmppc_hv_find_lock_hpte(struct kvm *kvm, gva_t eaddr, unsigned long slb_v, r = hpte[i+1]; /* - * Check the HPTE again, including large page size - * Since we don't currently allow any MPSS (mixed - * page-size segment) page sizes, it is sufficient - * to check against the actual page size. + * Check the HPTE again, including base page size */ if ((v & valid) && (v & mask) == val && - hpte_page_size(v, r) == (1ul << pshift)) + hpte_base_page_size(v, r) == (1ul << pshift)) /* Return with the HPTE still locked */ return (hash << 3) + (i >> 1); From 9715a2e8515217206ebf53040c979fdbeb805a21 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 26 Jun 2014 13:19:40 +0200 Subject: [PATCH 3/7] PPC: Add _GLOBAL_TOC for 32bit Commit ac5a8ee8 started using _GLOBAL_TOC on ppc32 code. Unfortunately it's only defined for 64bit targets though. Define it for ppc32 as well, fixing the build breakage that commit introduced. Signed-off-by: Alexander Graf --- arch/powerpc/include/asm/ppc_asm.h | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index 9ea266eae33e..7e4612528546 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -277,6 +277,8 @@ n: .globl n; \ n: +#define _GLOBAL_TOC(name) _GLOBAL(name) + #define _KPROBE(n) \ .section ".kprobes.text","a"; \ .globl n; \ From 6ed179b67ca1a05034728ab160905900416b1835 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 12 Jun 2014 18:16:53 +1000 Subject: [PATCH 4/7] KVM: PPC: Assembly functions exported to modules need _GLOBAL_TOC() Both kvmppc_hv_entry_trampoline and kvmppc_entry_trampoline are assembly functions that are exported to modules and also require a valid r2. As such we need to use _GLOBAL_TOC so we provide a global entry point that establishes the TOC (r2). Signed-off-by: Anton Blanchard Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 2 +- arch/powerpc/kvm/book3s_rmhandlers.S | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index 77356fd25ccc..8d9c5d21179d 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -48,7 +48,7 @@ * * LR = return address to continue at after eventually re-enabling MMU */ -_GLOBAL(kvmppc_hv_entry_trampoline) +_GLOBAL_TOC(kvmppc_hv_entry_trampoline) mflr r0 std r0, PPC_LR_STKOFF(r1) stdu r1, -112(r1) diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index 9eec675220e6..4850a224e5bf 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -146,7 +146,7 @@ kvmppc_handler_skip_ins: * On entry, r4 contains the guest shadow MSR * MSR.EE has to be 0 when calling this function */ -_GLOBAL(kvmppc_entry_trampoline) +_GLOBAL_TOC(kvmppc_entry_trampoline) mfmsr r5 LOAD_REG_ADDR(r7, kvmppc_handler_trampoline_enter) toreal(r7) From 55ab169b7b9276e6e1e01a88531bcf34803dcde2 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 16 Jun 2014 14:37:53 +0200 Subject: [PATCH 5/7] KVM: PPC: Book3S PR: Fix ABIv2 on LE We switched to ABIv2 on Little Endian systems now which gets rid of the dotted function names. Branch to the actual functions when we see such a system. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_interrupts.S | 4 ++++ arch/powerpc/kvm/book3s_rmhandlers.S | 4 ++++ 2 files changed, 8 insertions(+) diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index e2c29e381dc7..d044b8b7c69d 100644 --- a/arch/powerpc/kvm/book3s_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S @@ -25,7 +25,11 @@ #include #if defined(CONFIG_PPC_BOOK3S_64) +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define FUNC(name) name +#else #define FUNC(name) GLUE(.,name) +#endif #define GET_SHADOW_VCPU(reg) addi reg, r13, PACA_SVCPU #elif defined(CONFIG_PPC_BOOK3S_32) diff --git a/arch/powerpc/kvm/book3s_rmhandlers.S b/arch/powerpc/kvm/book3s_rmhandlers.S index 4850a224e5bf..16c4d88ba27d 100644 --- a/arch/powerpc/kvm/book3s_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_rmhandlers.S @@ -36,7 +36,11 @@ #if defined(CONFIG_PPC_BOOK3S_64) +#if defined(_CALL_ELF) && _CALL_ELF == 2 +#define FUNC(name) name +#else #define FUNC(name) GLUE(.,name) +#endif #elif defined(CONFIG_PPC_BOOK3S_32) From 19a44ecff52fd67d77d49fb4d43b289c53cdc392 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Mon, 7 Jul 2014 21:05:33 +0200 Subject: [PATCH 6/7] KVM: PPC: RTAS: Do byte swaps explicitly In commit b59d9d26b we introduced implicit byte swaps for RTAS calls. Unfortunately we messed up and didn't swizzle return values properly. Also the old approach wasn't "sparse" compatible - we were randomly reading __be32 values on an LE system. Let's just do all of the swizzling explicitly with byte swaps right where values get used. That way we can at least catch bugs using sparse. This patch fixes XICS RTAS emulation on little endian hosts for me. Signed-off-by: Alexander Graf --- arch/powerpc/kvm/book3s_rtas.c | 65 ++++++++++------------------------ 1 file changed, 18 insertions(+), 47 deletions(-) diff --git a/arch/powerpc/kvm/book3s_rtas.c b/arch/powerpc/kvm/book3s_rtas.c index edb14ba992b3..ef27fbd5d9c5 100644 --- a/arch/powerpc/kvm/book3s_rtas.c +++ b/arch/powerpc/kvm/book3s_rtas.c @@ -23,20 +23,20 @@ static void kvm_rtas_set_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) u32 irq, server, priority; int rc; - if (args->nargs != 3 || args->nret != 1) { + if (be32_to_cpu(args->nargs) != 3 || be32_to_cpu(args->nret) != 1) { rc = -3; goto out; } - irq = args->args[0]; - server = args->args[1]; - priority = args->args[2]; + irq = be32_to_cpu(args->args[0]); + server = be32_to_cpu(args->args[1]); + priority = be32_to_cpu(args->args[2]); rc = kvmppc_xics_set_xive(vcpu->kvm, irq, server, priority); if (rc) rc = -3; out: - args->rets[0] = rc; + args->rets[0] = cpu_to_be32(rc); } static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) @@ -44,12 +44,12 @@ static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) u32 irq, server, priority; int rc; - if (args->nargs != 1 || args->nret != 3) { + if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 3) { rc = -3; goto out; } - irq = args->args[0]; + irq = be32_to_cpu(args->args[0]); server = priority = 0; rc = kvmppc_xics_get_xive(vcpu->kvm, irq, &server, &priority); @@ -58,10 +58,10 @@ static void kvm_rtas_get_xive(struct kvm_vcpu *vcpu, struct rtas_args *args) goto out; } - args->rets[1] = server; - args->rets[2] = priority; + args->rets[1] = cpu_to_be32(server); + args->rets[2] = cpu_to_be32(priority); out: - args->rets[0] = rc; + args->rets[0] = cpu_to_be32(rc); } static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args) @@ -69,18 +69,18 @@ static void kvm_rtas_int_off(struct kvm_vcpu *vcpu, struct rtas_args *args) u32 irq; int rc; - if (args->nargs != 1 || args->nret != 1) { + if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 1) { rc = -3; goto out; } - irq = args->args[0]; + irq = be32_to_cpu(args->args[0]); rc = kvmppc_xics_int_off(vcpu->kvm, irq); if (rc) rc = -3; out: - args->rets[0] = rc; + args->rets[0] = cpu_to_be32(rc); } static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args) @@ -88,18 +88,18 @@ static void kvm_rtas_int_on(struct kvm_vcpu *vcpu, struct rtas_args *args) u32 irq; int rc; - if (args->nargs != 1 || args->nret != 1) { + if (be32_to_cpu(args->nargs) != 1 || be32_to_cpu(args->nret) != 1) { rc = -3; goto out; } - irq = args->args[0]; + irq = be32_to_cpu(args->args[0]); rc = kvmppc_xics_int_on(vcpu->kvm, irq); if (rc) rc = -3; out: - args->rets[0] = rc; + args->rets[0] = cpu_to_be32(rc); } #endif /* CONFIG_KVM_XICS */ @@ -205,32 +205,6 @@ int kvm_vm_ioctl_rtas_define_token(struct kvm *kvm, void __user *argp) return rc; } -static void kvmppc_rtas_swap_endian_in(struct rtas_args *args) -{ -#ifdef __LITTLE_ENDIAN__ - int i; - - args->token = be32_to_cpu(args->token); - args->nargs = be32_to_cpu(args->nargs); - args->nret = be32_to_cpu(args->nret); - for (i = 0; i < args->nargs; i++) - args->args[i] = be32_to_cpu(args->args[i]); -#endif -} - -static void kvmppc_rtas_swap_endian_out(struct rtas_args *args) -{ -#ifdef __LITTLE_ENDIAN__ - int i; - - for (i = 0; i < args->nret; i++) - args->args[i] = cpu_to_be32(args->args[i]); - args->token = cpu_to_be32(args->token); - args->nargs = cpu_to_be32(args->nargs); - args->nret = cpu_to_be32(args->nret); -#endif -} - int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) { struct rtas_token_definition *d; @@ -249,8 +223,6 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) if (rc) goto fail; - kvmppc_rtas_swap_endian_in(&args); - /* * args->rets is a pointer into args->args. Now that we've * copied args we need to fix it up to point into our copy, @@ -258,13 +230,13 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) * value so we can restore it on the way out. */ orig_rets = args.rets; - args.rets = &args.args[args.nargs]; + args.rets = &args.args[be32_to_cpu(args.nargs)]; mutex_lock(&vcpu->kvm->lock); rc = -ENOENT; list_for_each_entry(d, &vcpu->kvm->arch.rtas_tokens, list) { - if (d->token == args.token) { + if (d->token == be32_to_cpu(args.token)) { d->handler->handler(vcpu, &args); rc = 0; break; @@ -275,7 +247,6 @@ int kvmppc_rtas_hcall(struct kvm_vcpu *vcpu) if (rc == 0) { args.rets = orig_rets; - kvmppc_rtas_swap_endian_out(&args); rc = kvm_write_guest(vcpu->kvm, args_phys, &args, sizeof(args)); if (rc) goto fail; From 9242b5b60df8b13b469bc6b7be08ff6ebb551ad3 Mon Sep 17 00:00:00 2001 From: Bandan Das Date: Tue, 8 Jul 2014 00:30:23 -0400 Subject: [PATCH 7/7] KVM: x86: Check for nested events if there is an injectable interrupt With commit b6b8a1451fc40412c57d1 that introduced vmx_check_nested_events, checks for injectable interrupts happen at different points in time for L1 and L2 that could potentially cause a race. The regression occurs because KVM_REQ_EVENT is always set when nested_run_pending is set even if there's no pending interrupt. Consequently, there could be a small window when check_nested_events returns without exiting to L1, but an interrupt comes through soon after and it incorrectly, gets injected to L2 by inject_pending_event Fix this by adding a call to check for nested events too when a check for injectable interrupt returns true Signed-off-by: Bandan Das Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index f6449334ec45..ef432f891d30 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5887,6 +5887,18 @@ static int inject_pending_event(struct kvm_vcpu *vcpu, bool req_int_win) kvm_x86_ops->set_nmi(vcpu); } } else if (kvm_cpu_has_injectable_intr(vcpu)) { + /* + * Because interrupts can be injected asynchronously, we are + * calling check_nested_events again here to avoid a race condition. + * See https://lkml.org/lkml/2014/7/2/60 for discussion about this + * proposal and current concerns. Perhaps we should be setting + * KVM_REQ_EVENT only on certain events and not unconditionally? + */ + if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) { + r = kvm_x86_ops->check_nested_events(vcpu, req_int_win); + if (r != 0) + return r; + } if (kvm_x86_ops->interrupt_allowed(vcpu)) { kvm_queue_interrupt(vcpu, kvm_cpu_get_interrupt(vcpu), false);