diff --git a/arch/powerpc/include/asm/atomic.h b/arch/powerpc/include/asm/atomic.h index 2b90335194a7..a2cc8010cd72 100644 --- a/arch/powerpc/include/asm/atomic.h +++ b/arch/powerpc/include/asm/atomic.h @@ -560,7 +560,7 @@ static __inline__ int atomic64_add_unless(atomic64_t *v, long a, long u) * Atomically increments @v by 1, so long as @v is non-zero. * Returns non-zero if @v was non-zero, and zero otherwise. */ -static __inline__ long atomic64_inc_not_zero(atomic64_t *v) +static __inline__ int atomic64_inc_not_zero(atomic64_t *v) { long t1, t2; @@ -579,7 +579,7 @@ static __inline__ long atomic64_inc_not_zero(atomic64_t *v) : "r" (&v->counter) : "cc", "xer", "memory"); - return t1; + return t1 != 0; } #endif /* __powerpc64__ */ diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index ef930ba500f9..3130a73652c7 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -876,6 +876,15 @@ struct OpalIoPhb4ErrorData { enum { OPAL_REINIT_CPUS_HILE_BE = (1 << 0), OPAL_REINIT_CPUS_HILE_LE = (1 << 1), + + /* These two define the base MMU mode of the host on P9 + * + * On P9 Nimbus DD2.0 and Cumlus (and later), KVM can still + * create hash guests in "radix" mode with care (full core + * switch only). + */ + OPAL_REINIT_CPUS_MMU_HASH = (1 << 2), + OPAL_REINIT_CPUS_MMU_RADIX = (1 << 3), }; typedef struct oppanel_line { diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 7e50e47375d6..a3b6575c7842 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1303,7 +1303,7 @@ static inline void msr_check_and_clear(unsigned long bits) " .llong 0\n" \ ".previous" \ : "=r" (rval) \ - : "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL)); \ + : "i" (CPU_FTR_CELL_TB_BUG), "i" (SPRN_TBRL) : "cr0"); \ rval;}) #else #define mftb() ({unsigned long rval; \ diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S index 10cb2896b2ae..610955fe8b81 100644 --- a/arch/powerpc/kernel/cpu_setup_power.S +++ b/arch/powerpc/kernel/cpu_setup_power.S @@ -218,13 +218,20 @@ __init_tlb_power8: ptesync 1: blr +/* + * Flush the TLB in hash mode. Hash must flush with RIC=2 once for process + * and one for partition scope to clear process and partition table entries. + */ __init_tlb_power9: - li r6,POWER9_TLB_SETS_HASH + li r6,POWER9_TLB_SETS_HASH - 1 mtctr r6 li r7,0xc00 /* IS field = 0b11 */ + li r8,0 ptesync -2: tlbiel r7 - addi r7,r7,0x1000 + PPC_TLBIEL(7, 8, 2, 1, 0) + PPC_TLBIEL(7, 8, 2, 0, 0) +2: addi r7,r7,0x1000 + PPC_TLBIEL(7, 8, 0, 0, 0) bdnz 2b ptesync 1: blr diff --git a/arch/powerpc/kernel/dt_cpu_ftrs.c b/arch/powerpc/kernel/dt_cpu_ftrs.c index 4c7656dc4e04..1df770e8cbe0 100644 --- a/arch/powerpc/kernel/dt_cpu_ftrs.c +++ b/arch/powerpc/kernel/dt_cpu_ftrs.c @@ -94,9 +94,6 @@ static void (*init_pmu_registers)(void); static void cpufeatures_flush_tlb(void) { - unsigned long rb; - unsigned int i, num_sets; - /* * This is a temporary measure to keep equivalent TLB flush as the * cputable based setup code. @@ -105,24 +102,15 @@ static void cpufeatures_flush_tlb(void) case PVR_POWER8: case PVR_POWER8E: case PVR_POWER8NVL: - num_sets = POWER8_TLB_SETS; + __flush_tlb_power8(POWER8_TLB_SETS); break; case PVR_POWER9: - num_sets = POWER9_TLB_SETS_HASH; + __flush_tlb_power9(POWER9_TLB_SETS_HASH); break; default: - num_sets = 1; pr_err("unknown CPU version for boot TLB flush\n"); break; } - - asm volatile("ptesync" : : : "memory"); - rb = TLBIEL_INVAL_SET; - for (i = 0; i < num_sets; i++) { - asm volatile("tlbiel %0" : : "r" (rb)); - rb += 1 << TLBIEL_INVAL_SET_SHIFT; - } - asm volatile("ptesync" : : : "memory"); } static void __restore_cpu_cpufeatures(void) diff --git a/arch/powerpc/kernel/mce_power.c b/arch/powerpc/kernel/mce_power.c index d24e689e893f..b76ca198e09c 100644 --- a/arch/powerpc/kernel/mce_power.c +++ b/arch/powerpc/kernel/mce_power.c @@ -53,6 +53,60 @@ static void flush_tlb_206(unsigned int num_sets, unsigned int action) asm volatile("ptesync" : : : "memory"); } +static void flush_tlb_300(unsigned int num_sets, unsigned int action) +{ + unsigned long rb; + unsigned int i; + unsigned int r; + + switch (action) { + case TLB_INVAL_SCOPE_GLOBAL: + rb = TLBIEL_INVAL_SET; + break; + case TLB_INVAL_SCOPE_LPID: + rb = TLBIEL_INVAL_SET_LPID; + break; + default: + BUG(); + break; + } + + asm volatile("ptesync" : : : "memory"); + + if (early_radix_enabled()) + r = 1; + else + r = 0; + + /* + * First flush table/PWC caches with set 0, then flush the + * rest of the sets, partition scope. Radix must then do it + * all again with process scope. Hash just has to flush + * process table. + */ + asm volatile(PPC_TLBIEL(%0, %1, %2, %3, %4) : : + "r"(rb), "r"(0), "i"(2), "i"(0), "r"(r)); + for (i = 1; i < num_sets; i++) { + unsigned long set = i * (1<user + * protection, but on hash key 0 is reserved + * ideally we want to enter with a clean state. + * NOTE, we rely on r0 being 0 from above. + */ + mtspr SPRN_IAMR,r0 + mtspr SPRN_AMOR,r0 +END_FTR_SECTION_IFSET(CPU_FTR_ARCH_300) + /* save regs for local vars on new stack. * yes, we won't go back, but ... */ diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 33117f8a0882..ee33327686ae 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -683,8 +683,10 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs, case 19: switch ((instr >> 1) & 0x3ff) { case 0: /* mcrf */ - rd = (instr >> 21) & 0x1c; - ra = (instr >> 16) & 0x1c; + rd = 7 - ((instr >> 23) & 0x7); + ra = 7 - ((instr >> 18) & 0x7); + rd *= 4; + ra *= 4; val = (regs->ccr >> ra) & 0xf; regs->ccr = (regs->ccr & ~(0xfUL << rd)) | (val << rd); goto instr_done; @@ -964,6 +966,19 @@ int analyse_instr(struct instruction_op *op, struct pt_regs *regs, #endif case 19: /* mfcr */ + if ((instr >> 20) & 1) { + imm = 0xf0000000UL; + for (sh = 0; sh < 8; ++sh) { + if (instr & (0x80000 >> sh)) { + regs->gpr[rd] = regs->ccr & imm; + break; + } + imm >>= 4; + } + + goto instr_done; + } + regs->gpr[rd] = regs->ccr; regs->gpr[rd] &= 0xffffffffUL; goto instr_done; diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index 71de2c6d88f3..abed1fe6992f 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -138,6 +138,14 @@ static int radix__init_new_context(struct mm_struct *mm) rts_field = radix__get_tree_size(); process_tb[index].prtb0 = cpu_to_be64(rts_field | __pa(mm->pgd) | RADIX_PGD_INDEX_SIZE); + /* + * Order the above store with subsequent update of the PID + * register (at which point HW can start loading/caching + * the entry) and the corresponding load by the MMU from + * the L2 cache. + */ + asm volatile("ptesync;isync" : : : "memory"); + mm->context.npu_context = NULL; return index; @@ -223,9 +231,15 @@ void destroy_context(struct mm_struct *mm) mm->context.cop_lockp = NULL; #endif /* CONFIG_PPC_ICSWX */ - if (radix_enabled()) - process_tb[mm->context.id].prtb1 = 0; - else + if (radix_enabled()) { + /* + * Radix doesn't have a valid bit in the process table + * entries. However we know that at least P9 implementation + * will avoid caching an entry with an invalid RTS field, + * and 0 is invalid. So this will do. + */ + process_tb[mm->context.id].prtb0 = 0; + } else subpage_prot_free(mm); destroy_pagetable_page(mm); __destroy_context(mm->context.id); diff --git a/arch/powerpc/perf/isa207-common.c b/arch/powerpc/perf/isa207-common.c index 8125160be7bc..3f3aa9a7063a 100644 --- a/arch/powerpc/perf/isa207-common.c +++ b/arch/powerpc/perf/isa207-common.c @@ -90,13 +90,15 @@ static void mmcra_sdar_mode(u64 event, unsigned long *mmcra) * MMCRA[SDAR_MODE] will be set to 0b01 * For rest * MMCRA[SDAR_MODE] will be set from event code. + * If sdar_mode from event is zero, default to 0b01. Hardware + * requires that we set a non-zero value. */ if (cpu_has_feature(CPU_FTR_ARCH_300)) { if (is_event_marked(event) || (*mmcra & MMCRA_SAMPLE_ENABLE)) *mmcra &= MMCRA_SDAR_MODE_NO_UPDATES; - else if (!cpu_has_feature(CPU_FTR_POWER9_DD1)) + else if (!cpu_has_feature(CPU_FTR_POWER9_DD1) && p9_SDAR_MODE(event)) *mmcra |= p9_SDAR_MODE(event) << MMCRA_SDAR_MODE_SHIFT; - else if (cpu_has_feature(CPU_FTR_POWER9_DD1)) + else *mmcra |= MMCRA_SDAR_MODE_TLB; } else *mmcra |= MMCRA_SDAR_MODE_TLB; diff --git a/arch/powerpc/perf/power9-events-list.h b/arch/powerpc/perf/power9-events-list.h index 80204e064362..50689180a6c1 100644 --- a/arch/powerpc/perf/power9-events-list.h +++ b/arch/powerpc/perf/power9-events-list.h @@ -51,8 +51,12 @@ EVENT(PM_DTLB_MISS, 0x300fc) EVENT(PM_ITLB_MISS, 0x400fc) /* Run_Instructions */ EVENT(PM_RUN_INST_CMPL, 0x500fa) +/* Alternate event code for PM_RUN_INST_CMPL */ +EVENT(PM_RUN_INST_CMPL_ALT, 0x400fa) /* Run_cycles */ EVENT(PM_RUN_CYC, 0x600f4) +/* Alternate event code for Run_cycles */ +EVENT(PM_RUN_CYC_ALT, 0x200f4) /* Instruction Dispatched */ EVENT(PM_INST_DISP, 0x200f2) EVENT(PM_INST_DISP_ALT, 0x300f2) diff --git a/arch/powerpc/perf/power9-pmu.c b/arch/powerpc/perf/power9-pmu.c index f17435e4a489..2280cf87ff9c 100644 --- a/arch/powerpc/perf/power9-pmu.c +++ b/arch/powerpc/perf/power9-pmu.c @@ -107,6 +107,8 @@ extern struct attribute_group isa207_pmu_format_group; /* Table of alternatives, sorted by column 0 */ static const unsigned int power9_event_alternatives[][MAX_ALT] = { { PM_INST_DISP, PM_INST_DISP_ALT }, + { PM_RUN_CYC_ALT, PM_RUN_CYC }, + { PM_RUN_INST_CMPL_ALT, PM_RUN_INST_CMPL }, }; static int power9_get_alternatives(u64 event, unsigned int flags, u64 alt[]) diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 59684b4af4d1..9b87abb178f0 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -59,6 +59,8 @@ static struct task_struct *kopald_tsk; void opal_configure_cores(void) { + u64 reinit_flags = 0; + /* Do the actual re-init, This will clobber all FPRs, VRs, etc... * * It will preserve non volatile GPRs and HSPRG0/1. It will @@ -66,11 +68,24 @@ void opal_configure_cores(void) * but it might clobber a bunch. */ #ifdef __BIG_ENDIAN__ - opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE); + reinit_flags |= OPAL_REINIT_CPUS_HILE_BE; #else - opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_LE); + reinit_flags |= OPAL_REINIT_CPUS_HILE_LE; #endif + /* + * POWER9 always support running hash: + * ie. Host hash supports hash guests + * Host radix supports hash/radix guests + */ + if (cpu_has_feature(CPU_FTR_ARCH_300)) { + reinit_flags |= OPAL_REINIT_CPUS_MMU_HASH; + if (early_radix_enabled()) + reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX; + } + + opal_reinit_cpus(reinit_flags); + /* Restore some bits */ if (cur_cpu_spec->cpu_restore) cur_cpu_spec->cpu_restore(); diff --git a/arch/powerpc/platforms/powernv/setup.c b/arch/powerpc/platforms/powernv/setup.c index 2dc7e5fb86c3..897aa1400eb8 100644 --- a/arch/powerpc/platforms/powernv/setup.c +++ b/arch/powerpc/platforms/powernv/setup.c @@ -225,6 +225,8 @@ static void pnv_kexec_wait_secondaries_down(void) static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) { + u64 reinit_flags; + if (xive_enabled()) xive_kexec_teardown_cpu(secondary); else @@ -254,8 +256,15 @@ static void pnv_kexec_cpu_down(int crash_shutdown, int secondary) * We might be running as little-endian - now that interrupts * are disabled, reset the HILE bit to big-endian so we don't * take interrupts in the wrong endian later + * + * We reinit to enable both radix and hash on P9 to ensure + * the mode used by the next kernel is always supported. */ - opal_reinit_cpus(OPAL_REINIT_CPUS_HILE_BE); + reinit_flags = OPAL_REINIT_CPUS_HILE_BE; + if (cpu_has_feature(CPU_FTR_ARCH_300)) + reinit_flags |= OPAL_REINIT_CPUS_MMU_RADIX | + OPAL_REINIT_CPUS_MMU_HASH; + opal_reinit_cpus(reinit_flags); } } #endif /* CONFIG_KEXEC_CORE */