diff --git a/Documentation/powerpc/transactional_memory.txt b/Documentation/powerpc/transactional_memory.txt index c907be41d60f..dc23e58ae264 100644 --- a/Documentation/powerpc/transactional_memory.txt +++ b/Documentation/powerpc/transactional_memory.txt @@ -147,6 +147,25 @@ Example signal handler: fix_the_problem(ucp->dar); } +When in an active transaction that takes a signal, we need to be careful with +the stack. It's possible that the stack has moved back up after the tbegin. +The obvious case here is when the tbegin is called inside a function that +returns before a tend. In this case, the stack is part of the checkpointed +transactional memory state. If we write over this non transactionally or in +suspend, we are in trouble because if we get a tm abort, the program counter and +stack pointer will be back at the tbegin but our in memory stack won't be valid +anymore. + +To avoid this, when taking a signal in an active transaction, we need to use +the stack pointer from the checkpointed state, rather than the speculated +state. This ensures that the signal context (written tm suspended) will be +written below the stack required for the rollback. The transaction is aborted +becuase of the treclaim, so any memory written between the tbegin and the +signal will be rolled back anyway. + +For signals taken in non-TM or suspended mode, we use the +normal/non-checkpointed stack pointer. + Failure cause codes used by kernel ================================== @@ -155,14 +174,18 @@ These are defined in , and distinguish different reasons why the kernel aborted a transaction: TM_CAUSE_RESCHED Thread was rescheduled. + TM_CAUSE_TLBI Software TLB invalide. TM_CAUSE_FAC_UNAV FP/VEC/VSX unavailable trap. TM_CAUSE_SYSCALL Currently unused; future syscalls that must abort transactions for consistency will use this. TM_CAUSE_SIGNAL Signal delivered. TM_CAUSE_MISC Currently unused. + TM_CAUSE_ALIGNMENT Alignment fault. + TM_CAUSE_EMULATE Emulation that touched memory. -These can be checked by the user program's abort handler as TEXASR[0:7]. - +These can be checked by the user program's abort handler as TEXASR[0:7]. If +bit 7 is set, it indicates that the error is consider persistent. For example +a TM_CAUSE_ALIGNMENT will be persistent while a TM_CAUSE_RESCHED will not.q GDB === diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index cf4df8e2139a..0c7f2bfcf134 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -264,6 +264,7 @@ #define H_GET_MPP 0x2D4 #define H_HOME_NODE_ASSOCIATIVITY 0x2EC #define H_BEST_ENERGY 0x2F4 +#define H_XIRR_X 0x2FC #define H_RANDOM 0x300 #define H_COP 0x304 #define H_GET_MPP_X 0x314 diff --git a/arch/powerpc/include/asm/ppc_asm.h b/arch/powerpc/include/asm/ppc_asm.h index cea8496091ff..2f1b6c5f8174 100644 --- a/arch/powerpc/include/asm/ppc_asm.h +++ b/arch/powerpc/include/asm/ppc_asm.h @@ -523,6 +523,17 @@ END_FTR_SECTION_NESTED(CPU_FTR_HAS_PPR,CPU_FTR_HAS_PPR,946) #define PPC440EP_ERR42 #endif +/* The following stops all load and store data streams associated with stream + * ID (ie. streams created explicitly). The embedded and server mnemonics for + * dcbt are different so we use machine "power4" here explicitly. + */ +#define DCBT_STOP_ALL_STREAM_IDS(scratch) \ +.machine push ; \ +.machine "power4" ; \ + lis scratch,0x60000000@h; \ + dcbt r0,scratch,0b01010; \ +.machine pop + /* * toreal/fromreal/tophys/tovirt macros. 32-bit BookE makes them * keep the address intact to be compatible with code shared with diff --git a/arch/powerpc/include/asm/processor.h b/arch/powerpc/include/asm/processor.h index 594db6bc093c..14a658363698 100644 --- a/arch/powerpc/include/asm/processor.h +++ b/arch/powerpc/include/asm/processor.h @@ -409,21 +409,16 @@ static inline void prefetchw(const void *x) #endif #ifdef CONFIG_PPC64 -static inline unsigned long get_clean_sp(struct pt_regs *regs, int is_32) +static inline unsigned long get_clean_sp(unsigned long sp, int is_32) { - unsigned long sp; - if (is_32) - sp = regs->gpr[1] & 0x0ffffffffUL; - else - sp = regs->gpr[1]; - + return sp & 0x0ffffffffUL; return sp; } #else -static inline unsigned long get_clean_sp(struct pt_regs *regs, int is_32) +static inline unsigned long get_clean_sp(unsigned long sp, int is_32) { - return regs->gpr[1]; + return sp; } #endif diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index a6136515c7f2..4a9e408644fe 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -111,17 +111,6 @@ #define MSR_TM_TRANSACTIONAL(x) (((x) & MSR_TS_MASK) == MSR_TS_T) #define MSR_TM_SUSPENDED(x) (((x) & MSR_TS_MASK) == MSR_TS_S) -/* Reason codes describing kernel causes for transaction aborts. By - convention, bit0 is copied to TEXASR[56] (IBM bit 7) which is set if - the failure is persistent. -*/ -#define TM_CAUSE_RESCHED 0xfe -#define TM_CAUSE_TLBI 0xfc -#define TM_CAUSE_FAC_UNAV 0xfa -#define TM_CAUSE_SYSCALL 0xf9 /* Persistent */ -#define TM_CAUSE_MISC 0xf6 -#define TM_CAUSE_SIGNAL 0xf4 - #if defined(CONFIG_PPC_BOOK3S_64) #define MSR_64BIT MSR_SF diff --git a/arch/powerpc/include/asm/signal.h b/arch/powerpc/include/asm/signal.h index fbe66c463891..9322c28aebd2 100644 --- a/arch/powerpc/include/asm/signal.h +++ b/arch/powerpc/include/asm/signal.h @@ -3,5 +3,8 @@ #define __ARCH_HAS_SA_RESTORER #include +#include + +extern unsigned long get_tm_stackpointer(struct pt_regs *regs); #endif /* _ASM_POWERPC_SIGNAL_H */ diff --git a/arch/powerpc/include/asm/tm.h b/arch/powerpc/include/asm/tm.h index 4b4449abf3f8..9dfbc34bdbf5 100644 --- a/arch/powerpc/include/asm/tm.h +++ b/arch/powerpc/include/asm/tm.h @@ -5,6 +5,8 @@ * Copyright 2012 Matt Evans & Michael Neuling, IBM Corporation. */ +#include + #ifdef CONFIG_PPC_TRANSACTIONAL_MEM extern void do_load_up_transact_fpu(struct thread_struct *thread); extern void do_load_up_transact_altivec(struct thread_struct *thread); diff --git a/arch/powerpc/include/uapi/asm/Kbuild b/arch/powerpc/include/uapi/asm/Kbuild index f7bca6370745..5182c8622b54 100644 --- a/arch/powerpc/include/uapi/asm/Kbuild +++ b/arch/powerpc/include/uapi/asm/Kbuild @@ -40,6 +40,7 @@ header-y += statfs.h header-y += swab.h header-y += termbits.h header-y += termios.h +header-y += tm.h header-y += types.h header-y += ucontext.h header-y += unistd.h diff --git a/arch/powerpc/include/uapi/asm/tm.h b/arch/powerpc/include/uapi/asm/tm.h new file mode 100644 index 000000000000..85059a00f560 --- /dev/null +++ b/arch/powerpc/include/uapi/asm/tm.h @@ -0,0 +1,18 @@ +#ifndef _ASM_POWERPC_TM_H +#define _ASM_POWERPC_TM_H + +/* Reason codes describing kernel causes for transaction aborts. By + * convention, bit0 is copied to TEXASR[56] (IBM bit 7) which is set if + * the failure is persistent. PAPR saves 0xff-0xe0 for the hypervisor. + */ +#define TM_CAUSE_PERSISTENT 0x01 +#define TM_CAUSE_RESCHED 0xde +#define TM_CAUSE_TLBI 0xdc +#define TM_CAUSE_FAC_UNAV 0xda +#define TM_CAUSE_SYSCALL 0xd8 /* future use */ +#define TM_CAUSE_MISC 0xd6 /* future use */ +#define TM_CAUSE_SIGNAL 0xd4 +#define TM_CAUSE_ALIGNMENT 0xd2 +#define TM_CAUSE_EMULATE 0xd0 + +#endif diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index c60bbec25c1f..1f0937d7d4b5 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -453,7 +453,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .icache_bsize = 128, .dcache_bsize = 128, .oprofile_type = PPC_OPROFILE_POWER4, - .oprofile_cpu_type = "ppc64/ibm-compat-v1", + .oprofile_cpu_type = 0, .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, .platform = "power8", @@ -482,7 +482,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .cpu_name = "POWER7+ (raw)", .cpu_features = CPU_FTRS_POWER7, .cpu_user_features = COMMON_USER_POWER7, - .cpu_user_features = COMMON_USER2_POWER7, + .cpu_user_features2 = COMMON_USER2_POWER7, .mmu_features = MMU_FTRS_POWER7, .icache_bsize = 128, .dcache_bsize = 128, @@ -506,7 +506,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .dcache_bsize = 128, .num_pmcs = 6, .pmc_type = PPC_PMC_IBM, - .oprofile_cpu_type = "ppc64/power8", + .oprofile_cpu_type = 0, .oprofile_type = PPC_OPROFILE_POWER4, .cpu_setup = __setup_cpu_power8, .cpu_restore = __restore_cpu_power8, diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S index d22e73e4618b..22b45a4955cd 100644 --- a/arch/powerpc/kernel/entry_32.S +++ b/arch/powerpc/kernel/entry_32.S @@ -849,7 +849,7 @@ resume_kernel: /* check current_thread_info, _TIF_EMULATE_STACK_STORE */ CURRENT_THREAD_INFO(r9, r1) lwz r8,TI_FLAGS(r9) - andis. r8,r8,_TIF_EMULATE_STACK_STORE@h + andis. r0,r8,_TIF_EMULATE_STACK_STORE@h beq+ 1f addi r8,r1,INT_FRAME_SIZE /* Get the kprobed function entry */ diff --git a/arch/powerpc/kernel/entry_64.S b/arch/powerpc/kernel/entry_64.S index 0e9095e47b5b..246b11c4fe7e 100644 --- a/arch/powerpc/kernel/entry_64.S +++ b/arch/powerpc/kernel/entry_64.S @@ -501,6 +501,13 @@ BEGIN_FTR_SECTION ldarx r6,0,r1 END_FTR_SECTION_IFSET(CPU_FTR_STCX_CHECKS_ADDRESS) +#ifdef CONFIG_PPC_BOOK3S +/* Cancel all explict user streams as they will have no use after context + * switch and will stop the HW from creating streams itself + */ + DCBT_STOP_ALL_STREAM_IDS(r6) +#endif + addi r6,r4,-THREAD /* Convert THREAD to 'current' */ std r6,PACACURRENT(r13) /* Set new 'current' */ diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index e9acf50dd5b2..7f2273cc3c7d 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -657,15 +657,6 @@ void pci_resource_to_user(const struct pci_dev *dev, int bar, * ranges. However, some machines (thanks Apple !) tend to split their * space into lots of small contiguous ranges. So we have to coalesce. * - * - We can only cope with all memory ranges having the same offset - * between CPU addresses and PCI addresses. Unfortunately, some bridges - * are setup for a large 1:1 mapping along with a small "window" which - * maps PCI address 0 to some arbitrary high address of the CPU space in - * order to give access to the ISA memory hole. - * The way out of here that I've chosen for now is to always set the - * offset based on the first resource found, then override it if we - * have a different offset and the previous was set by an ISA hole. - * * - Some busses have IO space not starting at 0, which causes trouble with * the way we do our IO resource renumbering. The code somewhat deals with * it for 64 bits but I would expect problems on 32 bits. @@ -680,10 +671,9 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose, int rlen; int pna = of_n_addr_cells(dev); int np = pna + 5; - int memno = 0, isa_hole = -1; + int memno = 0; u32 pci_space; unsigned long long pci_addr, cpu_addr, pci_next, cpu_next, size; - unsigned long long isa_mb = 0; struct resource *res; printk(KERN_INFO "PCI host bridge %s %s ranges:\n", @@ -777,8 +767,6 @@ void pci_process_bridge_OF_ranges(struct pci_controller *hose, } /* Handles ISA memory hole space here */ if (pci_addr == 0) { - isa_mb = cpu_addr; - isa_hole = memno; if (primary || isa_mem_base == 0) isa_mem_base = cpu_addr; hose->isa_mem_phys = cpu_addr; diff --git a/arch/powerpc/kernel/signal.c b/arch/powerpc/kernel/signal.c index 577a8aa69c6e..457e97aa2945 100644 --- a/arch/powerpc/kernel/signal.c +++ b/arch/powerpc/kernel/signal.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "signal.h" @@ -30,13 +31,13 @@ int show_unhandled_signals = 1; /* * Allocate space for the signal frame */ -void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, +void __user * get_sigframe(struct k_sigaction *ka, unsigned long sp, size_t frame_size, int is_32) { unsigned long oldsp, newsp; /* Default to using normal stack */ - oldsp = get_clean_sp(regs, is_32); + oldsp = get_clean_sp(sp, is_32); /* Check for alt stack */ if ((ka->sa.sa_flags & SA_ONSTACK) && @@ -175,3 +176,38 @@ void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags) user_enter(); } + +unsigned long get_tm_stackpointer(struct pt_regs *regs) +{ + /* When in an active transaction that takes a signal, we need to be + * careful with the stack. It's possible that the stack has moved back + * up after the tbegin. The obvious case here is when the tbegin is + * called inside a function that returns before a tend. In this case, + * the stack is part of the checkpointed transactional memory state. + * If we write over this non transactionally or in suspend, we are in + * trouble because if we get a tm abort, the program counter and stack + * pointer will be back at the tbegin but our in memory stack won't be + * valid anymore. + * + * To avoid this, when taking a signal in an active transaction, we + * need to use the stack pointer from the checkpointed state, rather + * than the speculated state. This ensures that the signal context + * (written tm suspended) will be written below the stack required for + * the rollback. The transaction is aborted becuase of the treclaim, + * so any memory written between the tbegin and the signal will be + * rolled back anyway. + * + * For signals taken in non-TM or suspended mode, we use the + * normal/non-checkpointed stack pointer. + */ + +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM + if (MSR_TM_ACTIVE(regs->msr)) { + tm_enable(); + tm_reclaim(¤t->thread, regs->msr, TM_CAUSE_SIGNAL); + if (MSR_TM_TRANSACTIONAL(regs->msr)) + return current->thread.ckpt_regs.gpr[1]; + } +#endif + return regs->gpr[1]; +} diff --git a/arch/powerpc/kernel/signal.h b/arch/powerpc/kernel/signal.h index ec84c901ceab..c69b9aeb9f23 100644 --- a/arch/powerpc/kernel/signal.h +++ b/arch/powerpc/kernel/signal.h @@ -12,7 +12,7 @@ extern void do_notify_resume(struct pt_regs *regs, unsigned long thread_info_flags); -extern void __user * get_sigframe(struct k_sigaction *ka, struct pt_regs *regs, +extern void __user * get_sigframe(struct k_sigaction *ka, unsigned long sp, size_t frame_size, int is_32); extern int handle_signal32(unsigned long sig, struct k_sigaction *ka, diff --git a/arch/powerpc/kernel/signal_32.c b/arch/powerpc/kernel/signal_32.c index 95068bf569ad..201385c3a1ae 100644 --- a/arch/powerpc/kernel/signal_32.c +++ b/arch/powerpc/kernel/signal_32.c @@ -503,12 +503,6 @@ static int save_tm_user_regs(struct pt_regs *regs, { unsigned long msr = regs->msr; - /* tm_reclaim rolls back all reg states, updating thread.ckpt_regs, - * thread.transact_fpr[], thread.transact_vr[], etc. - */ - tm_enable(); - tm_reclaim(¤t->thread, msr, TM_CAUSE_SIGNAL); - /* Make sure floating point registers are stored in regs */ flush_fp_to_thread(current); @@ -965,7 +959,7 @@ int handle_rt_signal32(unsigned long sig, struct k_sigaction *ka, /* Set up Signal Frame */ /* Put a Real Time Context onto stack */ - rt_sf = get_sigframe(ka, regs, sizeof(*rt_sf), 1); + rt_sf = get_sigframe(ka, get_tm_stackpointer(regs), sizeof(*rt_sf), 1); addr = rt_sf; if (unlikely(rt_sf == NULL)) goto badframe; @@ -1403,7 +1397,7 @@ int handle_signal32(unsigned long sig, struct k_sigaction *ka, unsigned long tramp; /* Set up Signal Frame */ - frame = get_sigframe(ka, regs, sizeof(*frame), 1); + frame = get_sigframe(ka, get_tm_stackpointer(regs), sizeof(*frame), 1); if (unlikely(frame == NULL)) goto badframe; sc = (struct sigcontext __user *) &frame->sctx; diff --git a/arch/powerpc/kernel/signal_64.c b/arch/powerpc/kernel/signal_64.c index c1794286098c..345947367ec0 100644 --- a/arch/powerpc/kernel/signal_64.c +++ b/arch/powerpc/kernel/signal_64.c @@ -154,11 +154,12 @@ static long setup_sigcontext(struct sigcontext __user *sc, struct pt_regs *regs, * As above, but Transactional Memory is in use, so deliver sigcontexts * containing checkpointed and transactional register states. * - * To do this, we treclaim to gather both sets of registers and set up the - * 'normal' sigcontext registers with rolled-back register values such that a - * simple signal handler sees a correct checkpointed register state. - * If interested, a TM-aware sighandler can examine the transactional registers - * in the 2nd sigcontext to determine the real origin of the signal. + * To do this, we treclaim (done before entering here) to gather both sets of + * registers and set up the 'normal' sigcontext registers with rolled-back + * register values such that a simple signal handler sees a correct + * checkpointed register state. If interested, a TM-aware sighandler can + * examine the transactional registers in the 2nd sigcontext to determine the + * real origin of the signal. */ static long setup_tm_sigcontexts(struct sigcontext __user *sc, struct sigcontext __user *tm_sc, @@ -184,16 +185,6 @@ static long setup_tm_sigcontexts(struct sigcontext __user *sc, BUG_ON(!MSR_TM_ACTIVE(regs->msr)); - /* tm_reclaim rolls back all reg states, saving checkpointed (older) - * GPRs to thread.ckpt_regs and (if used) FPRs to (newer) - * thread.transact_fp and/or VRs to (newer) thread.transact_vr. - * THEN we save out FP/VRs, if necessary, to the checkpointed (older) - * thread.fr[]/vr[]s. The transactional (newer) GPRs are on the - * stack, in *regs. - */ - tm_enable(); - tm_reclaim(¤t->thread, msr, TM_CAUSE_SIGNAL); - flush_fp_to_thread(current); #ifdef CONFIG_ALTIVEC @@ -711,7 +702,7 @@ int handle_rt_signal64(int signr, struct k_sigaction *ka, siginfo_t *info, unsigned long newsp = 0; long err = 0; - frame = get_sigframe(ka, regs, sizeof(*frame), 0); + frame = get_sigframe(ka, get_tm_stackpointer(regs), sizeof(*frame), 0); if (unlikely(frame == NULL)) goto badframe; diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index a7a648f6b750..f18c79c324ef 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -53,6 +53,7 @@ #ifdef CONFIG_PPC64 #include #include +#include #endif #include #include @@ -932,6 +933,28 @@ static int emulate_isel(struct pt_regs *regs, u32 instword) return 0; } +#ifdef CONFIG_PPC_TRANSACTIONAL_MEM +static inline bool tm_abort_check(struct pt_regs *regs, int cause) +{ + /* If we're emulating a load/store in an active transaction, we cannot + * emulate it as the kernel operates in transaction suspended context. + * We need to abort the transaction. This creates a persistent TM + * abort so tell the user what caused it with a new code. + */ + if (MSR_TM_TRANSACTIONAL(regs->msr)) { + tm_enable(); + tm_abort(cause); + return true; + } + return false; +} +#else +static inline bool tm_abort_check(struct pt_regs *regs, int reason) +{ + return false; +} +#endif + static int emulate_instruction(struct pt_regs *regs) { u32 instword; @@ -971,6 +994,9 @@ static int emulate_instruction(struct pt_regs *regs) /* Emulate load/store string insn. */ if ((instword & PPC_INST_STRING_GEN_MASK) == PPC_INST_STRING) { + if (tm_abort_check(regs, + TM_CAUSE_EMULATE | TM_CAUSE_PERSISTENT)) + return -EINVAL; PPC_WARN_EMULATED(string, regs); return emulate_string_inst(regs, instword); } @@ -1148,6 +1174,9 @@ void alignment_exception(struct pt_regs *regs) if (!arch_irq_disabled_regs(regs)) local_irq_enable(); + if (tm_abort_check(regs, TM_CAUSE_ALIGNMENT | TM_CAUSE_PERSISTENT)) + goto bail; + /* we don't implement logging of alignment exceptions */ if (!(current->thread.align_ctl & PR_UNALIGN_SIGBUS)) fixed = fix_alignment(regs); diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 9de24f8e03c7..550f5928b394 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -562,6 +562,8 @@ int kvmppc_pseries_do_hcall(struct kvm_vcpu *vcpu) case H_CPPR: case H_EOI: case H_IPI: + case H_IPOLL: + case H_XIRR_X: if (kvmppc_xics_enabled(vcpu)) { ret = kvmppc_xics_hcall(vcpu, req); break; diff --git a/arch/powerpc/kvm/book3s_pr_papr.c b/arch/powerpc/kvm/book3s_pr_papr.c index b24309c6c2d5..da0e0bc268bd 100644 --- a/arch/powerpc/kvm/book3s_pr_papr.c +++ b/arch/powerpc/kvm/book3s_pr_papr.c @@ -257,6 +257,8 @@ int kvmppc_h_pr(struct kvm_vcpu *vcpu, unsigned long cmd) case H_CPPR: case H_EOI: case H_IPI: + case H_IPOLL: + case H_XIRR_X: if (kvmppc_xics_enabled(vcpu)) return kvmppc_h_pr_xics_hcall(vcpu, cmd); break; diff --git a/arch/powerpc/kvm/book3s_xics.c b/arch/powerpc/kvm/book3s_xics.c index f7a103756618..94c1dd46b83d 100644 --- a/arch/powerpc/kvm/book3s_xics.c +++ b/arch/powerpc/kvm/book3s_xics.c @@ -650,6 +650,23 @@ static noinline int kvmppc_h_ipi(struct kvm_vcpu *vcpu, unsigned long server, return H_SUCCESS; } +static int kvmppc_h_ipoll(struct kvm_vcpu *vcpu, unsigned long server) +{ + union kvmppc_icp_state state; + struct kvmppc_icp *icp; + + icp = vcpu->arch.icp; + if (icp->server_num != server) { + icp = kvmppc_xics_find_server(vcpu->kvm, server); + if (!icp) + return H_PARAMETER; + } + state = ACCESS_ONCE(icp->state); + kvmppc_set_gpr(vcpu, 4, ((u32)state.cppr << 24) | state.xisr); + kvmppc_set_gpr(vcpu, 5, state.mfrr); + return H_SUCCESS; +} + static noinline void kvmppc_h_cppr(struct kvm_vcpu *vcpu, unsigned long cppr) { union kvmppc_icp_state old_state, new_state; @@ -787,6 +804,18 @@ int kvmppc_xics_hcall(struct kvm_vcpu *vcpu, u32 req) if (!xics || !vcpu->arch.icp) return H_HARDWARE; + /* These requests don't have real-mode implementations at present */ + switch (req) { + case H_XIRR_X: + res = kvmppc_h_xirr(vcpu); + kvmppc_set_gpr(vcpu, 4, res); + kvmppc_set_gpr(vcpu, 5, get_tb()); + return rc; + case H_IPOLL: + rc = kvmppc_h_ipoll(vcpu, kvmppc_get_gpr(vcpu, 4)); + return rc; + } + /* Check for real mode returning too hard */ if (xics->real_mode) return kvmppc_xics_rm_complete(vcpu, req); diff --git a/arch/powerpc/lib/copypage_power7.S b/arch/powerpc/lib/copypage_power7.S index 0ef75bf0695c..395c594722a2 100644 --- a/arch/powerpc/lib/copypage_power7.S +++ b/arch/powerpc/lib/copypage_power7.S @@ -28,13 +28,14 @@ _GLOBAL(copypage_power7) * aligned we don't need to clear the bottom 7 bits of either * address. */ - ori r9,r3,1 /* stream=1 */ + ori r9,r3,1 /* stream=1 => to */ #ifdef CONFIG_PPC_64K_PAGES - lis r7,0x0E01 /* depth=7, units=512 */ + lis r7,0x0E01 /* depth=7 + * units/cachelines=512 */ #else lis r7,0x0E00 /* depth=7 */ - ori r7,r7,0x1000 /* units=32 */ + ori r7,r7,0x1000 /* units/cachelines=32 */ #endif ori r10,r7,1 /* stream=1 */ @@ -43,12 +44,14 @@ _GLOBAL(copypage_power7) .machine push .machine "power4" - dcbt r0,r4,0b01000 - dcbt r0,r7,0b01010 - dcbtst r0,r9,0b01000 - dcbtst r0,r10,0b01010 + /* setup read stream 0 */ + dcbt r0,r4,0b01000 /* addr from */ + dcbt r0,r7,0b01010 /* length and depth from */ + /* setup write stream 1 */ + dcbtst r0,r9,0b01000 /* addr to */ + dcbtst r0,r10,0b01010 /* length and depth to */ eieio - dcbt r0,r8,0b01010 /* GO */ + dcbt r0,r8,0b01010 /* all streams GO */ .machine pop #ifdef CONFIG_ALTIVEC diff --git a/arch/powerpc/lib/copyuser_power7.S b/arch/powerpc/lib/copyuser_power7.S index 0d24ff15f5f6..d1f11795a7ad 100644 --- a/arch/powerpc/lib/copyuser_power7.S +++ b/arch/powerpc/lib/copyuser_power7.S @@ -318,12 +318,14 @@ err1; stb r0,0(r3) .machine push .machine "power4" - dcbt r0,r6,0b01000 - dcbt r0,r7,0b01010 - dcbtst r0,r9,0b01000 - dcbtst r0,r10,0b01010 + /* setup read stream 0 */ + dcbt r0,r6,0b01000 /* addr from */ + dcbt r0,r7,0b01010 /* length and depth from */ + /* setup write stream 1 */ + dcbtst r0,r9,0b01000 /* addr to */ + dcbtst r0,r10,0b01010 /* length and depth to */ eieio - dcbt r0,r8,0b01010 /* GO */ + dcbt r0,r8,0b01010 /* all streams GO */ .machine pop beq cr1,.Lunwind_stack_nonvmx_copy diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 6a2aead5b0e5..4c122c3f1623 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -336,11 +336,18 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, hpte_v = hptep->v; actual_psize = hpte_actual_psize(hptep, psize); + /* + * We need to invalidate the TLB always because hpte_remove doesn't do + * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less + * random entry from it. When we do that we don't invalidate the TLB + * (hpte_remove) because we assume the old translation is still + * technically "valid". + */ if (actual_psize < 0) { - native_unlock_hpte(hptep); - return -1; + actual_psize = psize; + ret = -1; + goto err_out; } - /* Even if we miss, we need to invalidate the TLB */ if (!HPTE_V_COMPARE(hpte_v, want_v)) { DBG_LOW(" -> miss\n"); ret = -1; @@ -350,6 +357,7 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) | (newpp & (HPTE_R_PP | HPTE_R_N | HPTE_R_C)); } +err_out: native_unlock_hpte(hptep); /* Ensure it is out of the tlb too. */ @@ -409,7 +417,7 @@ static void native_hpte_updateboltedpp(unsigned long newpp, unsigned long ea, hptep = htab_address + slot; actual_psize = hpte_actual_psize(hptep, psize); if (actual_psize < 0) - return; + actual_psize = psize; /* Update the HPTE */ hptep->r = (hptep->r & ~(HPTE_R_PP | HPTE_R_N)) | @@ -437,21 +445,27 @@ static void native_hpte_invalidate(unsigned long slot, unsigned long vpn, hpte_v = hptep->v; actual_psize = hpte_actual_psize(hptep, psize); + /* + * We need to invalidate the TLB always because hpte_remove doesn't do + * a tlb invalidate. If a hash bucket gets full, we "evict" a more/less + * random entry from it. When we do that we don't invalidate the TLB + * (hpte_remove) because we assume the old translation is still + * technically "valid". + */ if (actual_psize < 0) { + actual_psize = psize; native_unlock_hpte(hptep); - local_irq_restore(flags); - return; + goto err_out; } - /* Even if we miss, we need to invalidate the TLB */ if (!HPTE_V_COMPARE(hpte_v, want_v)) native_unlock_hpte(hptep); else /* Invalidate the hpte. NOTE: this also unlocks it */ hptep->v = 0; +err_out: /* Invalidate the TLB */ tlbie(vpn, psize, actual_psize, ssize, local); - local_irq_restore(flags); } diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 426180b84978..845c867444e6 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -110,7 +110,7 @@ static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {} static bool regs_use_siar(struct pt_regs *regs) { - return !!(regs->result & 1); + return !!regs->result; } /* @@ -136,22 +136,30 @@ static inline unsigned long perf_ip_adjust(struct pt_regs *regs) * If we're not doing instruction sampling, give them the SDAR * (sampled data address). If we are doing instruction sampling, then * only give them the SDAR if it corresponds to the instruction - * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC or - * the [POWER7P_]MMCRA_SDAR_VALID bit in MMCRA. + * pointed to by SIAR; this is indicated by the [POWER6_]MMCRA_SDSYNC, the + * [POWER7P_]MMCRA_SDAR_VALID bit in MMCRA, or the SDAR_VALID bit in SIER. */ static inline void perf_get_data_addr(struct pt_regs *regs, u64 *addrp) { unsigned long mmcra = regs->dsisr; - unsigned long sdsync; + bool sdar_valid; - if (ppmu->flags & PPMU_SIAR_VALID) - sdsync = POWER7P_MMCRA_SDAR_VALID; - else if (ppmu->flags & PPMU_ALT_SIPR) - sdsync = POWER6_MMCRA_SDSYNC; - else - sdsync = MMCRA_SDSYNC; + if (ppmu->flags & PPMU_HAS_SIER) + sdar_valid = regs->dar & SIER_SDAR_VALID; + else { + unsigned long sdsync; - if (!(mmcra & MMCRA_SAMPLE_ENABLE) || (mmcra & sdsync)) + if (ppmu->flags & PPMU_SIAR_VALID) + sdsync = POWER7P_MMCRA_SDAR_VALID; + else if (ppmu->flags & PPMU_ALT_SIPR) + sdsync = POWER6_MMCRA_SDSYNC; + else + sdsync = MMCRA_SDSYNC; + + sdar_valid = mmcra & sdsync; + } + + if (!(mmcra & MMCRA_SAMPLE_ENABLE) || sdar_valid) *addrp = mfspr(SPRN_SDAR); } @@ -181,11 +189,6 @@ static bool regs_sipr(struct pt_regs *regs) return !!(regs->dsisr & sipr); } -static bool regs_no_sipr(struct pt_regs *regs) -{ - return !!(regs->result & 2); -} - static inline u32 perf_flags_from_msr(struct pt_regs *regs) { if (regs->msr & MSR_PR) @@ -208,7 +211,7 @@ static inline u32 perf_get_misc_flags(struct pt_regs *regs) * SIAR which should give slightly more reliable * results */ - if (regs_no_sipr(regs)) { + if (ppmu->flags & PPMU_NO_SIPR) { unsigned long siar = mfspr(SPRN_SIAR); if (siar >= PAGE_OFFSET) return PERF_RECORD_MISC_KERNEL; @@ -239,22 +242,9 @@ static inline void perf_read_regs(struct pt_regs *regs) int use_siar; regs->dsisr = mmcra; - regs->result = 0; - - if (ppmu->flags & PPMU_NO_SIPR) - regs->result |= 2; - - /* - * On power8 if we're in random sampling mode, the SIER is updated. - * If we're in continuous sampling mode, we don't have SIPR. - */ - if (ppmu->flags & PPMU_HAS_SIER) { - if (marked) - regs->dar = mfspr(SPRN_SIER); - else - regs->result |= 2; - } + if (ppmu->flags & PPMU_HAS_SIER) + regs->dar = mfspr(SPRN_SIER); /* * If this isn't a PMU exception (eg a software event) the SIAR is @@ -279,12 +269,12 @@ static inline void perf_read_regs(struct pt_regs *regs) use_siar = 1; else if ((ppmu->flags & PPMU_NO_CONT_SAMPLING)) use_siar = 0; - else if (!regs_no_sipr(regs) && regs_sipr(regs)) + else if (!(ppmu->flags & PPMU_NO_SIPR) && regs_sipr(regs)) use_siar = 0; else use_siar = 1; - regs->result |= use_siar; + regs->result = use_siar; } /* @@ -308,8 +298,13 @@ static inline int siar_valid(struct pt_regs *regs) unsigned long mmcra = regs->dsisr; int marked = mmcra & MMCRA_SAMPLE_ENABLE; - if ((ppmu->flags & PPMU_SIAR_VALID) && marked) - return mmcra & POWER7P_MMCRA_SIAR_VALID; + if (marked) { + if (ppmu->flags & PPMU_HAS_SIER) + return regs->dar & SIER_SIAR_VALID; + + if (ppmu->flags & PPMU_SIAR_VALID) + return mmcra & POWER7P_MMCRA_SIAR_VALID; + } return 1; } diff --git a/arch/powerpc/platforms/pseries/Kconfig b/arch/powerpc/platforms/pseries/Kconfig index 023b288f895b..4459eff7a75a 100644 --- a/arch/powerpc/platforms/pseries/Kconfig +++ b/arch/powerpc/platforms/pseries/Kconfig @@ -19,6 +19,8 @@ config PPC_PSERIES select ZLIB_DEFLATE select PPC_DOORBELL select HAVE_CONTEXT_TRACKING + select HOTPLUG if SMP + select HOTPLUG_CPU if SMP default y config PPC_SPLPAR diff --git a/arch/powerpc/sysdev/mpic.c b/arch/powerpc/sysdev/mpic.c index 0a13ecb270c7..3cc2f9159ab1 100644 --- a/arch/powerpc/sysdev/mpic.c +++ b/arch/powerpc/sysdev/mpic.c @@ -54,7 +54,7 @@ static DEFINE_RAW_SPINLOCK(mpic_lock); #ifdef CONFIG_PPC32 /* XXX for now */ #ifdef CONFIG_IRQ_ALL_CPUS -#define distribute_irqs (!(mpic->flags & MPIC_SINGLE_DEST_CPU)) +#define distribute_irqs (1) #else #define distribute_irqs (0) #endif @@ -1703,7 +1703,7 @@ void mpic_setup_this_cpu(void) * it differently, then we should make sure we also change the default * values of irq_desc[].affinity in irq.c. */ - if (distribute_irqs) { + if (distribute_irqs && !(mpic->flags & MPIC_SINGLE_DEST_CPU)) { for (i = 0; i < mpic->num_sources ; i++) mpic_irq_write(i, MPIC_INFO(IRQ_DESTINATION), mpic_irq_read(i, MPIC_INFO(IRQ_DESTINATION)) | msk);