From e148bd17f48bd17fca2f4f089ec879fa6e47e34c Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 14 Feb 2017 14:46:42 +0530 Subject: [PATCH 01/15] powerpc: Emulation support for load/store instructions on LE emulate_step() uses a number of underlying kernel functions that were initially not enabled for LE. This has been rectified since. So, fix emulate_step() for LE for the corresponding instructions. Cc: stable@vger.kernel.org # v3.18+ Reported-by: Anton Blanchard Signed-off-by: Ravi Bangoria Signed-off-by: Michael Ellerman --- arch/powerpc/lib/sstep.c | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/arch/powerpc/lib/sstep.c b/arch/powerpc/lib/sstep.c index 846dba2c6360..9c542ec70c5b 100644 --- a/arch/powerpc/lib/sstep.c +++ b/arch/powerpc/lib/sstep.c @@ -1799,8 +1799,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto instr_done; case LARX: - if (regs->msr & MSR_LE) - return 0; if (op.ea & (size - 1)) break; /* can't handle misaligned */ if (!address_ok(regs, op.ea, size)) @@ -1823,8 +1821,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto ldst_done; case STCX: - if (regs->msr & MSR_LE) - return 0; if (op.ea & (size - 1)) break; /* can't handle misaligned */ if (!address_ok(regs, op.ea, size)) @@ -1849,8 +1845,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto ldst_done; case LOAD: - if (regs->msr & MSR_LE) - return 0; err = read_mem(®s->gpr[op.reg], op.ea, size, regs); if (!err) { if (op.type & SIGNEXT) @@ -1862,8 +1856,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #ifdef CONFIG_PPC_FPU case LOAD_FP: - if (regs->msr & MSR_LE) - return 0; if (size == 4) err = do_fp_load(op.reg, do_lfs, op.ea, size, regs); else @@ -1872,15 +1864,11 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #endif #ifdef CONFIG_ALTIVEC case LOAD_VMX: - if (regs->msr & MSR_LE) - return 0; err = do_vec_load(op.reg, do_lvx, op.ea & ~0xfUL, regs); goto ldst_done; #endif #ifdef CONFIG_VSX case LOAD_VSX: - if (regs->msr & MSR_LE) - return 0; err = do_vsx_load(op.reg, do_lxvd2x, op.ea, regs); goto ldst_done; #endif @@ -1903,8 +1891,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) goto instr_done; case STORE: - if (regs->msr & MSR_LE) - return 0; if ((op.type & UPDATE) && size == sizeof(long) && op.reg == 1 && op.update_reg == 1 && !(regs->msr & MSR_PR) && @@ -1917,8 +1903,6 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #ifdef CONFIG_PPC_FPU case STORE_FP: - if (regs->msr & MSR_LE) - return 0; if (size == 4) err = do_fp_store(op.reg, do_stfs, op.ea, size, regs); else @@ -1927,15 +1911,11 @@ int __kprobes emulate_step(struct pt_regs *regs, unsigned int instr) #endif #ifdef CONFIG_ALTIVEC case STORE_VMX: - if (regs->msr & MSR_LE) - return 0; err = do_vec_store(op.reg, do_stvx, op.ea & ~0xfUL, regs); goto ldst_done; #endif #ifdef CONFIG_VSX case STORE_VSX: - if (regs->msr & MSR_LE) - return 0; err = do_vsx_store(op.reg, do_stxvd2x, op.ea, regs); goto ldst_done; #endif From 4ceae137bdab2232e57b2e6fd5631a22a0160938 Mon Sep 17 00:00:00 2001 From: Ravi Bangoria Date: Tue, 14 Feb 2017 14:46:43 +0530 Subject: [PATCH 02/15] powerpc: emulate_step() tests for load/store instructions Add new selftest that test emulate_step for Normal, Floating Point, Vector and Vector Scalar - load/store instructions. Test should run at boot time if CONFIG_KPROBES_SANITY_TEST and CONFIG_PPC64 is set. Sample log: emulate_step_test: ld : PASS emulate_step_test: lwz : PASS emulate_step_test: lwzx : PASS emulate_step_test: std : PASS emulate_step_test: ldarx / stdcx. : PASS emulate_step_test: lfsx : PASS emulate_step_test: stfsx : PASS emulate_step_test: lfdx : PASS emulate_step_test: stfdx : PASS emulate_step_test: lvx : PASS emulate_step_test: stvx : PASS emulate_step_test: lxvd2x : PASS emulate_step_test: stxvd2x : PASS Signed-off-by: Ravi Bangoria [mpe: Drop start/complete lines, make it all __init] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/ppc-opcode.h | 7 + arch/powerpc/lib/Makefile | 1 + arch/powerpc/lib/test_emulate_step.c | 434 ++++++++++++++++++++++++++ 3 files changed, 442 insertions(+) create mode 100644 arch/powerpc/lib/test_emulate_step.c diff --git a/arch/powerpc/include/asm/ppc-opcode.h b/arch/powerpc/include/asm/ppc-opcode.h index d99bd442aacb..e7d6d86563ee 100644 --- a/arch/powerpc/include/asm/ppc-opcode.h +++ b/arch/powerpc/include/asm/ppc-opcode.h @@ -284,6 +284,13 @@ #define PPC_INST_BRANCH_COND 0x40800000 #define PPC_INST_LBZCIX 0x7c0006aa #define PPC_INST_STBCIX 0x7c0007aa +#define PPC_INST_LWZX 0x7c00002e +#define PPC_INST_LFSX 0x7c00042e +#define PPC_INST_STFSX 0x7c00052e +#define PPC_INST_LFDX 0x7c0004ae +#define PPC_INST_STFDX 0x7c0005ae +#define PPC_INST_LVX 0x7c0000ce +#define PPC_INST_STVX 0x7c0001ce /* macros to insert fields into opcodes */ #define ___PPC_RA(a) (((a) & 0x1f) << 16) diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile index 0e649d72fe8d..2b5e09020cfe 100644 --- a/arch/powerpc/lib/Makefile +++ b/arch/powerpc/lib/Makefile @@ -20,6 +20,7 @@ obj64-y += copypage_64.o copyuser_64.o usercopy_64.o mem_64.o hweight_64.o \ obj64-$(CONFIG_SMP) += locks.o obj64-$(CONFIG_ALTIVEC) += vmx-helper.o +obj64-$(CONFIG_KPROBES_SANITY_TEST) += test_emulate_step.o obj-y += checksum_$(BITS).o checksum_wrappers.o diff --git a/arch/powerpc/lib/test_emulate_step.c b/arch/powerpc/lib/test_emulate_step.c new file mode 100644 index 000000000000..2534c1447554 --- /dev/null +++ b/arch/powerpc/lib/test_emulate_step.c @@ -0,0 +1,434 @@ +/* + * Simple sanity test for emulate_step load/store instructions. + * + * Copyright IBM Corp. 2016 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#define pr_fmt(fmt) "emulate_step_test: " fmt + +#include +#include +#include + +#define IMM_L(i) ((uintptr_t)(i) & 0xffff) + +/* + * Defined with TEST_ prefix so it does not conflict with other + * definitions. + */ +#define TEST_LD(r, base, i) (PPC_INST_LD | ___PPC_RT(r) | \ + ___PPC_RA(base) | IMM_L(i)) +#define TEST_LWZ(r, base, i) (PPC_INST_LWZ | ___PPC_RT(r) | \ + ___PPC_RA(base) | IMM_L(i)) +#define TEST_LWZX(t, a, b) (PPC_INST_LWZX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_STD(r, base, i) (PPC_INST_STD | ___PPC_RS(r) | \ + ___PPC_RA(base) | ((i) & 0xfffc)) +#define TEST_LDARX(t, a, b, eh) (PPC_INST_LDARX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b) | \ + __PPC_EH(eh)) +#define TEST_STDCX(s, a, b) (PPC_INST_STDCX | ___PPC_RS(s) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_LFSX(t, a, b) (PPC_INST_LFSX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_STFSX(s, a, b) (PPC_INST_STFSX | ___PPC_RS(s) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_LFDX(t, a, b) (PPC_INST_LFDX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_STFDX(s, a, b) (PPC_INST_STFDX | ___PPC_RS(s) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_LVX(t, a, b) (PPC_INST_LVX | ___PPC_RT(t) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_STVX(s, a, b) (PPC_INST_STVX | ___PPC_RS(s) | \ + ___PPC_RA(a) | ___PPC_RB(b)) +#define TEST_LXVD2X(s, a, b) (PPC_INST_LXVD2X | VSX_XX1((s), R##a, R##b)) +#define TEST_STXVD2X(s, a, b) (PPC_INST_STXVD2X | VSX_XX1((s), R##a, R##b)) + + +static void __init init_pt_regs(struct pt_regs *regs) +{ + static unsigned long msr; + static bool msr_cached; + + memset(regs, 0, sizeof(struct pt_regs)); + + if (likely(msr_cached)) { + regs->msr = msr; + return; + } + + asm volatile("mfmsr %0" : "=r"(regs->msr)); + + regs->msr |= MSR_FP; + regs->msr |= MSR_VEC; + regs->msr |= MSR_VSX; + + msr = regs->msr; + msr_cached = true; +} + +static void __init show_result(char *ins, char *result) +{ + pr_info("%-14s : %s\n", ins, result); +} + +static void __init test_ld(void) +{ + struct pt_regs regs; + unsigned long a = 0x23; + int stepped = -1; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long) &a; + + /* ld r5, 0(r3) */ + stepped = emulate_step(®s, TEST_LD(5, 3, 0)); + + if (stepped == 1 && regs.gpr[5] == a) + show_result("ld", "PASS"); + else + show_result("ld", "FAIL"); +} + +static void __init test_lwz(void) +{ + struct pt_regs regs; + unsigned int a = 0x4545; + int stepped = -1; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long) &a; + + /* lwz r5, 0(r3) */ + stepped = emulate_step(®s, TEST_LWZ(5, 3, 0)); + + if (stepped == 1 && regs.gpr[5] == a) + show_result("lwz", "PASS"); + else + show_result("lwz", "FAIL"); +} + +static void __init test_lwzx(void) +{ + struct pt_regs regs; + unsigned int a[3] = {0x0, 0x0, 0x1234}; + int stepped = -1; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long) a; + regs.gpr[4] = 8; + regs.gpr[5] = 0x8765; + + /* lwzx r5, r3, r4 */ + stepped = emulate_step(®s, TEST_LWZX(5, 3, 4)); + if (stepped == 1 && regs.gpr[5] == a[2]) + show_result("lwzx", "PASS"); + else + show_result("lwzx", "FAIL"); +} + +static void __init test_std(void) +{ + struct pt_regs regs; + unsigned long a = 0x1234; + int stepped = -1; + + init_pt_regs(®s); + regs.gpr[3] = (unsigned long) &a; + regs.gpr[5] = 0x5678; + + /* std r5, 0(r3) */ + stepped = emulate_step(®s, TEST_STD(5, 3, 0)); + if (stepped == 1 || regs.gpr[5] == a) + show_result("std", "PASS"); + else + show_result("std", "FAIL"); +} + +static void __init test_ldarx_stdcx(void) +{ + struct pt_regs regs; + unsigned long a = 0x1234; + int stepped = -1; + unsigned long cr0_eq = 0x1 << 29; /* eq bit of CR0 */ + + init_pt_regs(®s); + asm volatile("mfcr %0" : "=r"(regs.ccr)); + + + /*** ldarx ***/ + + regs.gpr[3] = (unsigned long) &a; + regs.gpr[4] = 0; + regs.gpr[5] = 0x5678; + + /* ldarx r5, r3, r4, 0 */ + stepped = emulate_step(®s, TEST_LDARX(5, 3, 4, 0)); + + /* + * Don't touch 'a' here. Touching 'a' can do Load/store + * of 'a' which result in failure of subsequent stdcx. + * Instead, use hardcoded value for comparison. + */ + if (stepped <= 0 || regs.gpr[5] != 0x1234) { + show_result("ldarx / stdcx.", "FAIL (ldarx)"); + return; + } + + + /*** stdcx. ***/ + + regs.gpr[5] = 0x9ABC; + + /* stdcx. r5, r3, r4 */ + stepped = emulate_step(®s, TEST_STDCX(5, 3, 4)); + + /* + * Two possible scenarios that indicates successful emulation + * of stdcx. : + * 1. Reservation is active and store is performed. In this + * case cr0.eq bit will be set to 1. + * 2. Reservation is not active and store is not performed. + * In this case cr0.eq bit will be set to 0. + */ + if (stepped == 1 && ((regs.gpr[5] == a && (regs.ccr & cr0_eq)) + || (regs.gpr[5] != a && !(regs.ccr & cr0_eq)))) + show_result("ldarx / stdcx.", "PASS"); + else + show_result("ldarx / stdcx.", "FAIL (stdcx.)"); +} + +#ifdef CONFIG_PPC_FPU +static void __init test_lfsx_stfsx(void) +{ + struct pt_regs regs; + union { + float a; + int b; + } c; + int cached_b; + int stepped = -1; + + init_pt_regs(®s); + + + /*** lfsx ***/ + + c.a = 123.45; + cached_b = c.b; + + regs.gpr[3] = (unsigned long) &c.a; + regs.gpr[4] = 0; + + /* lfsx frt10, r3, r4 */ + stepped = emulate_step(®s, TEST_LFSX(10, 3, 4)); + + if (stepped == 1) + show_result("lfsx", "PASS"); + else + show_result("lfsx", "FAIL"); + + + /*** stfsx ***/ + + c.a = 678.91; + + /* stfsx frs10, r3, r4 */ + stepped = emulate_step(®s, TEST_STFSX(10, 3, 4)); + + if (stepped == 1 && c.b == cached_b) + show_result("stfsx", "PASS"); + else + show_result("stfsx", "FAIL"); +} + +static void __init test_lfdx_stfdx(void) +{ + struct pt_regs regs; + union { + double a; + long b; + } c; + long cached_b; + int stepped = -1; + + init_pt_regs(®s); + + + /*** lfdx ***/ + + c.a = 123456.78; + cached_b = c.b; + + regs.gpr[3] = (unsigned long) &c.a; + regs.gpr[4] = 0; + + /* lfdx frt10, r3, r4 */ + stepped = emulate_step(®s, TEST_LFDX(10, 3, 4)); + + if (stepped == 1) + show_result("lfdx", "PASS"); + else + show_result("lfdx", "FAIL"); + + + /*** stfdx ***/ + + c.a = 987654.32; + + /* stfdx frs10, r3, r4 */ + stepped = emulate_step(®s, TEST_STFDX(10, 3, 4)); + + if (stepped == 1 && c.b == cached_b) + show_result("stfdx", "PASS"); + else + show_result("stfdx", "FAIL"); +} +#else +static void __init test_lfsx_stfsx(void) +{ + show_result("lfsx", "SKIP (CONFIG_PPC_FPU is not set)"); + show_result("stfsx", "SKIP (CONFIG_PPC_FPU is not set)"); +} + +static void __init test_lfdx_stfdx(void) +{ + show_result("lfdx", "SKIP (CONFIG_PPC_FPU is not set)"); + show_result("stfdx", "SKIP (CONFIG_PPC_FPU is not set)"); +} +#endif /* CONFIG_PPC_FPU */ + +#ifdef CONFIG_ALTIVEC +static void __init test_lvx_stvx(void) +{ + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c; + u32 cached_b[4]; + int stepped = -1; + + init_pt_regs(®s); + + + /*** lvx ***/ + + cached_b[0] = c.b[0] = 923745; + cached_b[1] = c.b[1] = 2139478; + cached_b[2] = c.b[2] = 9012; + cached_b[3] = c.b[3] = 982134; + + regs.gpr[3] = (unsigned long) &c.a; + regs.gpr[4] = 0; + + /* lvx vrt10, r3, r4 */ + stepped = emulate_step(®s, TEST_LVX(10, 3, 4)); + + if (stepped == 1) + show_result("lvx", "PASS"); + else + show_result("lvx", "FAIL"); + + + /*** stvx ***/ + + c.b[0] = 4987513; + c.b[1] = 84313948; + c.b[2] = 71; + c.b[3] = 498532; + + /* stvx vrs10, r3, r4 */ + stepped = emulate_step(®s, TEST_STVX(10, 3, 4)); + + if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] && + cached_b[2] == c.b[2] && cached_b[3] == c.b[3]) + show_result("stvx", "PASS"); + else + show_result("stvx", "FAIL"); +} +#else +static void __init test_lvx_stvx(void) +{ + show_result("lvx", "SKIP (CONFIG_ALTIVEC is not set)"); + show_result("stvx", "SKIP (CONFIG_ALTIVEC is not set)"); +} +#endif /* CONFIG_ALTIVEC */ + +#ifdef CONFIG_VSX +static void __init test_lxvd2x_stxvd2x(void) +{ + struct pt_regs regs; + union { + vector128 a; + u32 b[4]; + } c; + u32 cached_b[4]; + int stepped = -1; + + init_pt_regs(®s); + + + /*** lxvd2x ***/ + + cached_b[0] = c.b[0] = 18233; + cached_b[1] = c.b[1] = 34863571; + cached_b[2] = c.b[2] = 834; + cached_b[3] = c.b[3] = 6138911; + + regs.gpr[3] = (unsigned long) &c.a; + regs.gpr[4] = 0; + + /* lxvd2x vsr39, r3, r4 */ + stepped = emulate_step(®s, TEST_LXVD2X(39, 3, 4)); + + if (stepped == 1) + show_result("lxvd2x", "PASS"); + else + show_result("lxvd2x", "FAIL"); + + + /*** stxvd2x ***/ + + c.b[0] = 21379463; + c.b[1] = 87; + c.b[2] = 374234; + c.b[3] = 4; + + /* stxvd2x vsr39, r3, r4 */ + stepped = emulate_step(®s, TEST_STXVD2X(39, 3, 4)); + + if (stepped == 1 && cached_b[0] == c.b[0] && cached_b[1] == c.b[1] && + cached_b[2] == c.b[2] && cached_b[3] == c.b[3]) + show_result("stxvd2x", "PASS"); + else + show_result("stxvd2x", "FAIL"); +} +#else +static void __init test_lxvd2x_stxvd2x(void) +{ + show_result("lxvd2x", "SKIP (CONFIG_VSX is not set)"); + show_result("stxvd2x", "SKIP (CONFIG_VSX is not set)"); +} +#endif /* CONFIG_VSX */ + +static int __init test_emulate_step(void) +{ + test_ld(); + test_lwz(); + test_lwzx(); + test_std(); + test_ldarx_stdcx(); + test_lfsx_stfsx(); + test_lfdx_stfdx(); + test_lvx_stvx(); + test_lxvd2x_stxvd2x(); + + return 0; +} +late_initcall(test_emulate_step); From 7a70d7288c926ae88e0c773fbb506aa374e99c2d Mon Sep 17 00:00:00 2001 From: Paul Mackerras Date: Mon, 27 Feb 2017 14:32:41 +1100 Subject: [PATCH 03/15] powerpc/64: Invalidate process table caching after setting process table The POWER9 MMU reads and caches entries from the process table. When we kexec from one kernel to another, the second kernel sets its process table pointer but doesn't currently do anything to make the CPU invalidate any cached entries from the old process table. This adds a tlbie (TLB invalidate entry) instruction with parameters to invalidate caching of the process table after the new process table is installed. Signed-off-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/mm/pgtable-radix.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index feeda90cd06d..01c94f141164 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -186,6 +186,10 @@ static void __init radix_init_pgtable(void) */ register_process_table(__pa(process_tb), 0, PRTB_SIZE_SHIFT - 12); pr_info("Process table %p and radix root for kernel: %p\n", process_tb, init_mm.pgd); + asm volatile("ptesync" : : : "memory"); + asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : : + "r" (TLBIEL_INVAL_SET_LPID), "r" (0)); + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); } static void __init radix_init_partition_table(void) From 424f8acd328a111319ae30bf384e5dfb9bc8f8cb Mon Sep 17 00:00:00 2001 From: "Gautham R. Shenoy" Date: Mon, 27 Feb 2017 11:10:07 +0530 Subject: [PATCH 04/15] powerpc/powernv: Fix bug due to labeling ambiguity in power_enter_stop Commit 09206b600c76 ("powernv: Pass PSSCR value and mask to power9_idle_stop") added additional code in power_enter_stop() to distinguish between stop requests whose PSSCR had ESL=EC=1 from those which did not. When ESL=EC=1, we do a forward-jump to a location labelled by "1", which had the code to handle the ESL=EC=1 case. Unfortunately just a couple of instructions before this label, is the macro IDLE_STATE_ENTER_SEQ() which also has a label "1" in its expansion. As a result, the current code can result in directly executing stop instruction for deep stop requests with PSSCR ESL=EC=1, without saving the hypervisor state. Fix this BUG by labeling the location that handles ESL=EC=1 case with a more descriptive label ".Lhandle_esl_ec_set" (local label suggestion a la .Lxx from Anton Blanchard). While at it, rename the label "2" labelling the location of the code handling entry into deep stop states with ".Lhandle_deep_stop". For a good measure, change the label in IDLE_STATE_ENTER_SEQ() macro to an not-so commonly used value in order to avoid similar mishaps in the future. Fixes: 09206b600c76 ("powernv: Pass PSSCR value and mask to power9_idle_stop") Signed-off-by: Gautham R. Shenoy Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/cpuidle.h | 4 ++-- arch/powerpc/kernel/idle_book3s.S | 10 ++++++---- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/arch/powerpc/include/asm/cpuidle.h b/arch/powerpc/include/asm/cpuidle.h index fd321eb423cb..155731557c9b 100644 --- a/arch/powerpc/include/asm/cpuidle.h +++ b/arch/powerpc/include/asm/cpuidle.h @@ -70,8 +70,8 @@ static inline void report_invalid_psscr_val(u64 psscr_val, int err) std r0,0(r1); \ ptesync; \ ld r0,0(r1); \ -1: cmpd cr0,r0,r0; \ - bne 1b; \ +236: cmpd cr0,r0,r0; \ + bne 236b; \ IDLE_INST; \ #define IDLE_STATE_ENTER_SEQ_NORET(IDLE_INST) \ diff --git a/arch/powerpc/kernel/idle_book3s.S b/arch/powerpc/kernel/idle_book3s.S index 5f61cc0349c0..995728736677 100644 --- a/arch/powerpc/kernel/idle_book3s.S +++ b/arch/powerpc/kernel/idle_book3s.S @@ -276,19 +276,21 @@ power_enter_stop: */ andis. r4,r3,PSSCR_EC_ESL_MASK_SHIFTED clrldi r3,r3,60 /* r3 = Bits[60:63] = Requested Level (RL) */ - bne 1f + bne .Lhandle_esl_ec_set IDLE_STATE_ENTER_SEQ(PPC_STOP) li r3,0 /* Since we didn't lose state, return 0 */ b pnv_wakeup_noloss + +.Lhandle_esl_ec_set: /* * Check if the requested state is a deep idle state. */ -1: LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) + LOAD_REG_ADDRBASE(r5,pnv_first_deep_stop_state) ld r4,ADDROFF(pnv_first_deep_stop_state)(r5) cmpd r3,r4 - bge 2f + bge .Lhandle_deep_stop IDLE_STATE_ENTER_SEQ_NORET(PPC_STOP) -2: +.Lhandle_deep_stop: /* * Entering deep idle state. * Clear thread bit in PACA_CORE_IDLE_STATE, save SPRs to From a6d8a21596df041f36f4c2ccc260c459e3e851f1 Mon Sep 17 00:00:00 2001 From: Sachin Sant Date: Sun, 26 Feb 2017 11:38:39 +0530 Subject: [PATCH 05/15] selftest/powerpc: Fix false failures for skipped tests Tests under alignment subdirectory are skipped when executed on previous generation hardware, but harness still marks them as failed. test: test_copy_unaligned tags: git_version:unknown [SKIP] Test skipped on line 26 skip: test_copy_unaligned selftests: copy_unaligned [FAIL] The MAGIC_SKIP_RETURN_VALUE value assigned to rc variable is retained till the program exit which causes the test to be marked as failed. This patch resets the value before returning to the main() routine. With this patch the test o/p is as follows: test: test_copy_unaligned tags: git_version:unknown [SKIP] Test skipped on line 26 skip: test_copy_unaligned selftests: copy_unaligned [PASS] Signed-off-by: Sachin Sant Signed-off-by: Michael Ellerman --- tools/testing/selftests/powerpc/harness.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c index 248a820048df..66d31de60b9a 100644 --- a/tools/testing/selftests/powerpc/harness.c +++ b/tools/testing/selftests/powerpc/harness.c @@ -114,9 +114,11 @@ int test_harness(int (test_function)(void), char *name) rc = run_test(test_function, name); - if (rc == MAGIC_SKIP_RETURN_VALUE) + if (rc == MAGIC_SKIP_RETURN_VALUE) { test_skip(name); - else + /* so that skipped test is not marked as failed */ + rc = 0; + } else test_finish(name, rc); return rc; From 4dc831aa88132f835cefe876aa0206977c4d7710 Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Sun, 27 Nov 2016 13:46:20 +1100 Subject: [PATCH 06/15] powerpc: Fix compiling a BE kernel with a powerpc64le toolchain GCC can compile with either endian, but the default ABI version is set based on the default endianness of the toolchain. Alan Modra says: you need both -mbig and -mabi=elfv1 to make a powerpc64le gcc generate powerpc64 code The opposite is true for powerpc64 when generating -mlittle it requires -mabi=elfv2 to generate v2 ABI, which we were already doing. This change adds ABI annotations together with endianness for all cases, LE and BE. This fixes the case of building a BE kernel with a toolchain that is LE by default. Signed-off-by: Nicholas Piggin Tested-by: Naveen N. Rao Signed-off-by: Michael Ellerman --- arch/powerpc/Makefile | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/Makefile b/arch/powerpc/Makefile index 31286fa7873c..19b0d1a81959 100644 --- a/arch/powerpc/Makefile +++ b/arch/powerpc/Makefile @@ -72,8 +72,15 @@ GNUTARGET := powerpc MULTIPLEWORD := -mmultiple endif -cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian) +ifdef CONFIG_PPC64 +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mabi=elfv1) +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mcall-aixdesc) +aflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mabi=elfv1) +aflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mabi=elfv2 +endif + cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mlittle-endian +cflags-$(CONFIG_CPU_BIG_ENDIAN) += $(call cc-option,-mbig-endian) ifneq ($(cc-name),clang) cflags-$(CONFIG_CPU_LITTLE_ENDIAN) += -mno-strict-align endif @@ -113,7 +120,9 @@ ifeq ($(CONFIG_CPU_LITTLE_ENDIAN),y) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2,$(call cc-option,-mcall-aixdesc)) AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv2) else +CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcall-aixdesc) +AFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mabi=elfv1) endif CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mcmodel=medium,$(call cc-option,-mminimal-toc)) CFLAGS-$(CONFIG_PPC64) += $(call cc-option,-mno-pointers-to-nested-functions) From 3fb66a70a4ae886445743354e4b60e54058bb3ff Mon Sep 17 00:00:00 2001 From: Laurentiu Tudor Date: Thu, 16 Feb 2017 09:11:29 -0600 Subject: [PATCH 07/15] powerpc/booke: Fix boot crash due to null hugepd On 32-bit book-e machines, hugepd_ok() no longer takes into account null hugepd values, causing this crash at boot: Unable to handle kernel paging request for data at address 0x80000000 ... NIP [c0018378] follow_huge_addr+0x38/0xf0 LR [c001836c] follow_huge_addr+0x2c/0xf0 Call Trace: follow_huge_addr+0x2c/0xf0 (unreliable) follow_page_mask+0x40/0x3e0 __get_user_pages+0xc8/0x450 get_user_pages_remote+0x8c/0x250 copy_strings+0x110/0x390 copy_strings_kernel+0x2c/0x50 do_execveat_common+0x478/0x630 do_execve+0x2c/0x40 try_to_run_init_process+0x18/0x60 kernel_init+0xbc/0x110 ret_from_kernel_thread+0x5c/0x64 This impacts all nxp (ex-freescale) 32-bit booke platforms. This was caused by the change of hugepd_t.pd from signed to unsigned, and the update to the nohash version of hugepd_ok(). Previously hugepd_ok() could exclude all non-huge and NULL pgds using > 0, whereas now we need to explicitly check that the value is not zero and also that PD_HUGE is *clear*. This isn't protected by the pgd_none() check in __find_linux_pte_or_hugepte() because on 32-bit we use pgtable-nopud.h, which causes the pgd_none() check to be always false. Fixes: 20717e1ff526 ("powerpc/mm: Fix little-endian 4K hugetlb") Cc: stable@vger.kernel.org # v4.7+ Reported-by: Madalin-Cristian Bucur Signed-off-by: Laurentiu Tudor [mpe: Flesh out change log details.] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/nohash/pgtable.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/nohash/pgtable.h b/arch/powerpc/include/asm/nohash/pgtable.h index 0cd8a3852763..e5805ad78e12 100644 --- a/arch/powerpc/include/asm/nohash/pgtable.h +++ b/arch/powerpc/include/asm/nohash/pgtable.h @@ -230,7 +230,7 @@ static inline int hugepd_ok(hugepd_t hpd) return ((hpd_val(hpd) & 0x4) != 0); #else /* We clear the top bit to indicate hugepd */ - return ((hpd_val(hpd) & PD_HUGE) == 0); + return (hpd_val(hpd) && (hpd_val(hpd) & PD_HUGE) == 0); #endif } From 2a9c4f40ab2c281f95d41577ff0f7f78668feb73 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Thu, 2 Mar 2017 17:41:24 +1100 Subject: [PATCH 08/15] powerpc/powernv: Fix opal tracepoints with JUMP_LABEL=n The recent commit to allow calling OPAL calls in real mode, commit ab9bad0ead9a ("powerpc/powernv: Remove separate entry for OPAL real mode calls"), introduced a bug when CONFIG_JUMP_LABEL=n. The commit moved the "mfmsr r12" prior to the call to OPAL_BRANCH, but we missed that OPAL_BRANCH clobbers r12 when jump labels are disabled. This leads to us using the tracepoint refcount as the MSR value, typically zero, and saving that into PACASAVEDMSR. When we return from OPAL we use that value as the MSR value for rfid, meaning we switch to 32-bit BE real mode - hilarity ensues. Fix it by using r11 in OPAL_BRANCH, which is not live at the time the macro is used in OPAL_CALL. Fixes: ab9bad0ead9a ("powerpc/powernv: Remove separate entry for OPAL real mode calls") Suggested-by: Paul Mackerras Signed-off-by: Alexey Kardashevskiy Signed-off-by: Michael Ellerman --- arch/powerpc/platforms/powernv/opal-wrappers.S | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 6693f75e93d1..da8a0f7a035c 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -39,8 +39,8 @@ opal_tracepoint_refcount: BEGIN_FTR_SECTION; \ b 1f; \ END_FTR_SECTION(0, 1); \ - ld r12,opal_tracepoint_refcount@toc(r2); \ - cmpdi r12,0; \ + ld r11,opal_tracepoint_refcount@toc(r2); \ + cmpdi r11,0; \ bne- LABEL; \ 1: From 6ad966d7303b70165228dba1ee8da1a05c10eefe Mon Sep 17 00:00:00 2001 From: Shile Zhang Date: Sat, 4 Feb 2017 17:03:40 +0800 Subject: [PATCH 09/15] powerpc/64: Fix checksum folding in csum_add() Paul's patch to fix checksum folding, commit b492f7e4e07a ("powerpc/64: Fix checksum folding in csum_tcpudp_nofold and ip_fast_csum_nofold") missed a case in csum_add(). Fix it. Signed-off-by: Shile Zhang Acked-by: Paul Mackerras Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/checksum.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h index 4e63787dc3be..842124b199b5 100644 --- a/arch/powerpc/include/asm/checksum.h +++ b/arch/powerpc/include/asm/checksum.h @@ -112,7 +112,7 @@ static inline __wsum csum_add(__wsum csum, __wsum addend) #ifdef __powerpc64__ res += (__force u64)addend; - return (__force __wsum)((u32)res + (res >> 32)); + return (__force __wsum) from64to32(res); #else asm("addc %0,%0,%1;" "addze %0,%0;" From a69e2fb70350a66f91175cd2625f1e8215c5b6e9 Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Fri, 3 Mar 2017 11:58:44 +1100 Subject: [PATCH 10/15] powerpc/xics: Work around limitations of OPAL XICS priority handling The CPPR (Current Processor Priority Register) of a XICS interrupt presentation controller contains a value N, such that only interrupts with a priority "more favoured" than N will be received by the CPU, where "more favoured" means "less than". So if the CPPR has the value 5 then only interrupts with a priority of 0-4 inclusive will be received. In theory the CPPR can support a value of 0 to 255 inclusive. In practice Linux only uses values of 0, 4, 5 and 0xff. Setting the CPPR to 0 rejects all interrupts, setting it to 0xff allows all interrupts. The values 4 and 5 are used to differentiate IPIs from external interrupts. Setting the CPPR to 5 allows IPIs to be received but not external interrupts. The CPPR emulation in the OPAL XICS implementation only directly supports priorities 0 and 0xff. All other priorities are considered equivalent, and mapped to a single priority value internally. This means when using icp-opal we can not allow IPIs but not externals. This breaks Linux's use of priority values when a CPU is hot unplugged. After migrating IRQs away from the CPU that is being offlined, we set the priority to 5, meaning we still want the offline CPU to receive IPIs. But the effect of the OPAL XICS emulation's use of a single priority value is that all interrupts are rejected by the CPU. With the CPU offline, and not receiving IPIs, we may not be able to wake it up to bring it back online. The first part of the fix is in icp_opal_set_cpu_priority(). CPPR values of 0 to 4 inclusive will correctly cause all interrupts to be rejected, so we pass those CPPR values through to OPAL. However if we are called with a CPPR of 5 or greater, the caller is expecting to be able to allow IPIs but not external interrupts. We know this doesn't work, so instead of rejecting all interrupts we choose the opposite which is to allow all interrupts. This is still not correct behaviour, but we know for the only existing caller (xics_migrate_irqs_away()), that it is the better option. The other part of the fix is in xics_migrate_irqs_away(). Instead of setting priority (CPPR) to 0, and then back to 5 before migrating IRQs, we migrate the IRQs before setting the priority back to 5. This should have no effect on an ICP backend with a working set_priority(), and on icp-opal it means we will keep all interrupts blocked until after we've finished doing the IRQ migration. Additionally we wait for 5ms after doing the migration to make sure there are no IRQs in flight. Fixes: d74361881f0d ("powerpc/xics: Add ICP OPAL backend") Cc: stable@vger.kernel.org # v4.8+ Suggested-by: Michael Ellerman Reported-by: Vaidyanathan Srinivasan Tested-by: Vaidyanathan Srinivasan Signed-off-by: Balbir Singh [mpe: Rewrote comments and change log, change delay to 5ms] Signed-off-by: Michael Ellerman --- arch/powerpc/sysdev/xics/icp-opal.c | 10 ++++++++++ arch/powerpc/sysdev/xics/xics-common.c | 17 ++++++++++++++--- 2 files changed, 24 insertions(+), 3 deletions(-) diff --git a/arch/powerpc/sysdev/xics/icp-opal.c b/arch/powerpc/sysdev/xics/icp-opal.c index f9670eabfcfa..b53f80f0b4d8 100644 --- a/arch/powerpc/sysdev/xics/icp-opal.c +++ b/arch/powerpc/sysdev/xics/icp-opal.c @@ -91,6 +91,16 @@ static unsigned int icp_opal_get_irq(void) static void icp_opal_set_cpu_priority(unsigned char cppr) { + /* + * Here be dragons. The caller has asked to allow only IPI's and not + * external interrupts. But OPAL XIVE doesn't support that. So instead + * of allowing no interrupts allow all. That's still not right, but + * currently the only caller who does this is xics_migrate_irqs_away() + * and it works in that case. + */ + if (cppr >= DEFAULT_PRIORITY) + cppr = LOWEST_PRIORITY; + xics_set_base_cppr(cppr); opal_int_set_cppr(cppr); iosync(); diff --git a/arch/powerpc/sysdev/xics/xics-common.c b/arch/powerpc/sysdev/xics/xics-common.c index 69d858e51ac7..23efe4e42172 100644 --- a/arch/powerpc/sysdev/xics/xics-common.c +++ b/arch/powerpc/sysdev/xics/xics-common.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -198,9 +199,6 @@ void xics_migrate_irqs_away(void) /* Remove ourselves from the global interrupt queue */ xics_set_cpu_giq(xics_default_distrib_server, 0); - /* Allow IPIs again... */ - icp_ops->set_priority(DEFAULT_PRIORITY); - for_each_irq_desc(virq, desc) { struct irq_chip *chip; long server; @@ -255,6 +253,19 @@ void xics_migrate_irqs_away(void) unlock: raw_spin_unlock_irqrestore(&desc->lock, flags); } + + /* Allow "sufficient" time to drop any inflight IRQ's */ + mdelay(5); + + /* + * Allow IPIs again. This is done at the very end, after migrating all + * interrupts, the expectation is that we'll only get woken up by an IPI + * interrupt beyond this point, but leave externals masked just to be + * safe. If we're using icp-opal this may actually allow all + * interrupts anyway, but that should be OK. + */ + icp_ops->set_priority(DEFAULT_PRIORITY); + } #endif /* CONFIG_HOTPLUG_CPU */ From 12cc9fd6b2d8ee307a735b3b9faed0d17b719463 Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Tue, 28 Feb 2017 17:03:47 +1100 Subject: [PATCH 11/15] powerpc: Parse the command line before calling CAS On POWER9 the hypervisor requires the guest to decide whether it would like to use a hash or radix mmu model at the time it calls ibm,client-architecture-support (CAS) based on what the hypervisor has said it's allowed to do. It is possible to disable radix by passing "disable_radix" on the command line. The next patch will add support for the new CAS format, thus we need to parse the command line before calling CAS so we can correctly select which mmu we would like to use. Signed-off-by: Suraj Jitindar Singh Reviewed-by: Paul Mackerras Acked-by: Balbir Singh Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/prom_init.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index a3944540fe0d..bf3966d12565 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -2993,6 +2993,11 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, */ prom_check_initrd(r3, r4); + /* + * Do early parsing of command line + */ + early_cmdline_parse(); + #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) /* * On pSeries, inform the firmware about our capabilities @@ -3008,11 +3013,6 @@ unsigned long __init prom_init(unsigned long r3, unsigned long r4, if (of_platform != PLATFORM_POWERMAC) copy_and_flush(0, kbase, 0x100, 0); - /* - * Do early parsing of command line - */ - early_cmdline_parse(); - /* * Initialize memory management within prom_init */ From 014d02cbf16b3106dc8e93281d2a9c189751ed5e Mon Sep 17 00:00:00 2001 From: Suraj Jitindar Singh Date: Tue, 28 Feb 2017 17:03:48 +1100 Subject: [PATCH 12/15] powerpc: Update to new option-vector-5 format for CAS On POWER9 the ibm,client-architecture-support (CAS) negotiation process has been updated to change how the host to guest negotiation is done for the new hash/radix mmu as well as the nest mmu, process tables and guest translation shootdown (GTSE). This is documented in the unreleased PAPR ACR "CAS option vector additions for P9". The host tells the guest which options it supports in ibm,arch-vec-5-platform-support. The guest then chooses a subset of these to request in the CAS call and these are agreed to in the ibm,architecture-vec-5 property of the chosen node. Thus we read ibm,arch-vec-5-platform-support and make our selection before calling CAS. We then parse the ibm,architecture-vec-5 property of the chosen node to check whether we should run as hash or radix. ibm,arch-vec-5-platform-support format: index value pairs: ... index: Option vector 5 byte number val: Some representation of supported values Signed-off-by: Suraj Jitindar Singh Acked-by: Paul Mackerras [mpe: Don't print about unknown options, be consistent with OV5_FEAT] Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/prom.h | 18 ++++-- arch/powerpc/kernel/prom_init.c | 110 +++++++++++++++++++++++++++++++- arch/powerpc/mm/init_64.c | 42 +++++++++--- 3 files changed, 153 insertions(+), 17 deletions(-) diff --git a/arch/powerpc/include/asm/prom.h b/arch/powerpc/include/asm/prom.h index 4a90634e8322..35c00d7a0cf8 100644 --- a/arch/powerpc/include/asm/prom.h +++ b/arch/powerpc/include/asm/prom.h @@ -160,12 +160,18 @@ struct of_drconf_cell { #define OV5_PFO_HW_ENCR 0x1120 /* PFO Encryption Accelerator */ #define OV5_SUB_PROCESSORS 0x1501 /* 1,2,or 4 Sub-Processors supported */ #define OV5_XIVE_EXPLOIT 0x1701 /* XIVE exploitation supported */ -#define OV5_MMU_RADIX_300 0x1880 /* ISA v3.00 radix MMU supported */ -#define OV5_MMU_HASH_300 0x1840 /* ISA v3.00 hash MMU supported */ -#define OV5_MMU_SEGM_RADIX 0x1820 /* radix mode (no segmentation) */ -#define OV5_MMU_PROC_TBL 0x1810 /* hcall selects SLB or proc table */ -#define OV5_MMU_SLB 0x1800 /* always use SLB */ -#define OV5_MMU_GTSE 0x1808 /* Guest translation shootdown */ +/* MMU Base Architecture */ +#define OV5_MMU_SUPPORT 0x18C0 /* MMU Mode Support Mask */ +#define OV5_MMU_HASH 0x1800 /* Hash MMU Only */ +#define OV5_MMU_RADIX 0x1840 /* Radix MMU Only */ +#define OV5_MMU_EITHER 0x1880 /* Hash or Radix Supported */ +#define OV5_MMU_DYNAMIC 0x18C0 /* Hash or Radix Can Switch Later */ +#define OV5_NMMU 0x1820 /* Nest MMU Available */ +/* Hash Table Extensions */ +#define OV5_HASH_SEG_TBL 0x1980 /* In Memory Segment Tables Available */ +#define OV5_HASH_GTSE 0x1940 /* Guest Translation Shoot Down Avail */ +/* Radix Table Extensions */ +#define OV5_RADIX_GTSE 0x1A40 /* Guest Translation Shoot Down Avail */ /* Option Vector 6: IBM PAPR hints */ #define OV6_LINUX 0x02 /* Linux is our OS */ diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c index bf3966d12565..1c1b44ec7642 100644 --- a/arch/powerpc/kernel/prom_init.c +++ b/arch/powerpc/kernel/prom_init.c @@ -168,6 +168,14 @@ static unsigned long __initdata prom_tce_alloc_start; static unsigned long __initdata prom_tce_alloc_end; #endif +static bool __initdata prom_radix_disable; + +struct platform_support { + bool hash_mmu; + bool radix_mmu; + bool radix_gtse; +}; + /* Platforms codes are now obsolete in the kernel. Now only used within this * file and ultimately gone too. Feel free to change them if you need, they * are not shared with anything outside of this file anymore @@ -626,6 +634,12 @@ static void __init early_cmdline_parse(void) prom_memory_limit = ALIGN(prom_memory_limit, 0x1000000); #endif } + + opt = strstr(prom_cmd_line, "disable_radix"); + if (opt) { + prom_debug("Radix disabled from cmdline\n"); + prom_radix_disable = true; + } } #if defined(CONFIG_PPC_PSERIES) || defined(CONFIG_PPC_POWERNV) @@ -695,6 +709,8 @@ struct option_vector5 { u8 byte22; u8 intarch; u8 mmu; + u8 hash_ext; + u8 radix_ext; } __packed; struct option_vector6 { @@ -850,8 +866,9 @@ struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = { .reserved3 = 0, .subprocessors = 1, .intarch = 0, - .mmu = OV5_FEAT(OV5_MMU_RADIX_300) | OV5_FEAT(OV5_MMU_HASH_300) | - OV5_FEAT(OV5_MMU_PROC_TBL) | OV5_FEAT(OV5_MMU_GTSE), + .mmu = 0, + .hash_ext = 0, + .radix_ext = 0, }, /* option vector 6: IBM PAPR hints */ @@ -990,6 +1007,92 @@ static int __init prom_count_smt_threads(void) } +static void __init prom_parse_mmu_model(u8 val, + struct platform_support *support) +{ + switch (val) { + case OV5_FEAT(OV5_MMU_DYNAMIC): + case OV5_FEAT(OV5_MMU_EITHER): /* Either Available */ + prom_debug("MMU - either supported\n"); + support->radix_mmu = !prom_radix_disable; + support->hash_mmu = true; + break; + case OV5_FEAT(OV5_MMU_RADIX): /* Only Radix */ + prom_debug("MMU - radix only\n"); + if (prom_radix_disable) { + /* + * If we __have__ to do radix, we're better off ignoring + * the command line rather than not booting. + */ + prom_printf("WARNING: Ignoring cmdline option disable_radix\n"); + } + support->radix_mmu = true; + break; + case OV5_FEAT(OV5_MMU_HASH): + prom_debug("MMU - hash only\n"); + support->hash_mmu = true; + break; + default: + prom_debug("Unknown mmu support option: 0x%x\n", val); + break; + } +} + +static void __init prom_parse_platform_support(u8 index, u8 val, + struct platform_support *support) +{ + switch (index) { + case OV5_INDX(OV5_MMU_SUPPORT): /* MMU Model */ + prom_parse_mmu_model(val & OV5_FEAT(OV5_MMU_SUPPORT), support); + break; + case OV5_INDX(OV5_RADIX_GTSE): /* Radix Extensions */ + if (val & OV5_FEAT(OV5_RADIX_GTSE)) { + prom_debug("Radix - GTSE supported\n"); + support->radix_gtse = true; + } + break; + } +} + +static void __init prom_check_platform_support(void) +{ + struct platform_support supported = { + .hash_mmu = false, + .radix_mmu = false, + .radix_gtse = false + }; + int prop_len = prom_getproplen(prom.chosen, + "ibm,arch-vec-5-platform-support"); + if (prop_len > 1) { + int i; + u8 vec[prop_len]; + prom_debug("Found ibm,arch-vec-5-platform-support, len: %d\n", + prop_len); + prom_getprop(prom.chosen, "ibm,arch-vec-5-platform-support", + &vec, sizeof(vec)); + for (i = 0; i < prop_len; i += 2) { + prom_debug("%d: index = 0x%x val = 0x%x\n", i / 2 + , vec[i] + , vec[i + 1]); + prom_parse_platform_support(vec[i], vec[i + 1], + &supported); + } + } + + if (supported.radix_mmu && supported.radix_gtse) { + /* Radix preferred - but we require GTSE for now */ + prom_debug("Asking for radix with GTSE\n"); + ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_RADIX); + ibm_architecture_vec.vec5.radix_ext = OV5_FEAT(OV5_RADIX_GTSE); + } else if (supported.hash_mmu) { + /* Default to hash mmu (if we can) */ + prom_debug("Asking for hash\n"); + ibm_architecture_vec.vec5.mmu = OV5_FEAT(OV5_MMU_HASH); + } else { + /* We're probably on a legacy hypervisor */ + prom_debug("Assuming legacy hash support\n"); + } +} static void __init prom_send_capabilities(void) { @@ -997,6 +1100,9 @@ static void __init prom_send_capabilities(void) prom_arg_t ret; u32 cores; + /* Check ibm,arch-vec-5-platform-support and fixup vec5 if required */ + prom_check_platform_support(); + root = call_prom("open", 1, 1, ADDR("/")); if (root != 0) { /* We need to tell the FW about the number of cores we support. diff --git a/arch/powerpc/mm/init_64.c b/arch/powerpc/mm/init_64.c index 6aa3b76aa0d6..9be992083d2a 100644 --- a/arch/powerpc/mm/init_64.c +++ b/arch/powerpc/mm/init_64.c @@ -356,18 +356,42 @@ static void early_check_vec5(void) unsigned long root, chosen; int size; const u8 *vec5; + u8 mmu_supported; root = of_get_flat_dt_root(); chosen = of_get_flat_dt_subnode_by_name(root, "chosen"); - if (chosen == -FDT_ERR_NOTFOUND) - return; - vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size); - if (!vec5) - return; - if (size <= OV5_INDX(OV5_MMU_RADIX_300) || - !(vec5[OV5_INDX(OV5_MMU_RADIX_300)] & OV5_FEAT(OV5_MMU_RADIX_300))) - /* Hypervisor doesn't support radix */ + if (chosen == -FDT_ERR_NOTFOUND) { cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + return; + } + vec5 = of_get_flat_dt_prop(chosen, "ibm,architecture-vec-5", &size); + if (!vec5) { + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + return; + } + if (size <= OV5_INDX(OV5_MMU_SUPPORT)) { + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + return; + } + + /* Check for supported configuration */ + mmu_supported = vec5[OV5_INDX(OV5_MMU_SUPPORT)] & + OV5_FEAT(OV5_MMU_SUPPORT); + if (mmu_supported == OV5_FEAT(OV5_MMU_RADIX)) { + /* Hypervisor only supports radix - check enabled && GTSE */ + if (!early_radix_enabled()) { + pr_warn("WARNING: Ignoring cmdline option disable_radix\n"); + } + if (!(vec5[OV5_INDX(OV5_RADIX_GTSE)] & + OV5_FEAT(OV5_RADIX_GTSE))) { + pr_warn("WARNING: Hypervisor doesn't support RADIX with GTSE\n"); + } + /* Do radix anyway - the hypervisor said we had to */ + cur_cpu_spec->mmu_features |= MMU_FTR_TYPE_RADIX; + } else if (mmu_supported == OV5_FEAT(OV5_MMU_HASH)) { + /* Hypervisor only supports hash - disable radix */ + cur_cpu_spec->mmu_features &= ~MMU_FTR_TYPE_RADIX; + } } void __init mmu_early_init_devtree(void) @@ -383,7 +407,7 @@ void __init mmu_early_init_devtree(void) * even though the ibm,architecture-vec-5 property created by * skiboot doesn't have the necessary bits set. */ - if (early_radix_enabled() && !(mfmsr() & MSR_HV)) + if (!(mfmsr() & MSR_HV)) early_check_vec5(); if (early_radix_enabled()) From 6ba422c75facb1b1e0e206c464ee121b8073f7e0 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Sun, 5 Mar 2017 10:54:34 +1100 Subject: [PATCH 13/15] powerpc/64: Avoid panic during boot due to divide by zero in init_cache_info() I see a panic in early boot when building with a recent gcc toolchain. The issue is a divide by zero, which is undefined. Older toolchains let us get away with it: int foo(int a) { return a / 0; } foo: li 9,0 divw 3,3,9 extsw 3,3 blr But newer ones catch it: foo: trap Add a check to avoid the divide by zero. Fixes: e2827fe5c156 ("powerpc/64: Clean up ppc64_caches using a struct per cache") Signed-off-by: Anton Blanchard Acked-by: Benjamin Herrenschmidt Signed-off-by: Michael Ellerman --- arch/powerpc/kernel/setup_64.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index adf2084f214b..9cfaa8b69b5f 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -408,7 +408,10 @@ static void init_cache_info(struct ppc_cache_info *info, u32 size, u32 lsize, info->line_size = lsize; info->block_size = bsize; info->log_block_size = __ilog2(bsize); - info->blocks_per_page = PAGE_SIZE / bsize; + if (bsize) + info->blocks_per_page = PAGE_SIZE / bsize; + else + info->blocks_per_page = 0; if (sets == 0) info->assoc = 0xffff; From 9c7a00868c3a77c86ab07a2c51f3bb4897bd8550 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 6 Mar 2017 21:51:32 +1100 Subject: [PATCH 14/15] powerpc/64: Fix L1D cache shape vector reporting L1I values It seems we didn't pay quite enough attention when testing the new cache shape vectors, which means we didn't notice the bug where the vector for the L1D was using the L1I values. Fix it, resulting in eg: L1I cache size: 0x8000 32768B 32K L1I line size: 0x80 8-way associative L1D cache size: 0x10000 65536B 64K L1D line size: 0x80 8-way associative Fixes: 98a5f361b862 ("powerpc: Add new cache geometry aux vectors") Cut-and-paste-bug-by: Benjamin Herrenschmidt Badly-reviewed-by: Michael Ellerman Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/elf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/powerpc/include/asm/elf.h b/arch/powerpc/include/asm/elf.h index 93b9b84568e8..09bde6e34f5d 100644 --- a/arch/powerpc/include/asm/elf.h +++ b/arch/powerpc/include/asm/elf.h @@ -144,8 +144,8 @@ extern int arch_setup_additional_pages(struct linux_binprm *bprm, #define ARCH_DLINFO_CACHE_GEOMETRY \ NEW_AUX_ENT(AT_L1I_CACHESIZE, ppc64_caches.l1i.size); \ NEW_AUX_ENT(AT_L1I_CACHEGEOMETRY, get_cache_geometry(l1i)); \ - NEW_AUX_ENT(AT_L1D_CACHESIZE, ppc64_caches.l1i.size); \ - NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY, get_cache_geometry(l1i)); \ + NEW_AUX_ENT(AT_L1D_CACHESIZE, ppc64_caches.l1d.size); \ + NEW_AUX_ENT(AT_L1D_CACHEGEOMETRY, get_cache_geometry(l1d)); \ NEW_AUX_ENT(AT_L2_CACHESIZE, ppc64_caches.l2.size); \ NEW_AUX_ENT(AT_L2_CACHEGEOMETRY, get_cache_geometry(l2)); \ NEW_AUX_ENT(AT_L3_CACHESIZE, ppc64_caches.l3.size); \ From a7d2475af7aedcb9b5c6343989a8bfadbf84429b Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Mon, 6 Mar 2017 22:53:59 +1100 Subject: [PATCH 15/15] powerpc: Sort the selects under CONFIG_PPC We have a big list of selects under CONFIG_PPC, and currently they're completely unsorted. This means people tend to add new selects at the bottom of the list, and so two commits which both add a new select will often conflict. Instead sort it alphabetically. This is nicer in and of itself, but also means two commits that add a new select will have a greater chance of not conflicting. Add a note at the top and bottom asking people to keep it sorted. And while we're here pad out the 'if' expressions to make them stand out. Suggested-by: Stephen Rothwell Signed-off-by: Michael Ellerman --- arch/powerpc/Kconfig | 166 ++++++++++++++++++++++--------------------- 1 file changed, 86 insertions(+), 80 deletions(-) diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig index 494091762bd7..97a8bc8a095c 100644 --- a/arch/powerpc/Kconfig +++ b/arch/powerpc/Kconfig @@ -80,93 +80,99 @@ config ARCH_HAS_DMA_SET_COHERENT_MASK config PPC bool default y - select BUILDTIME_EXTABLE_SORT + # + # Please keep this list sorted alphabetically. + # + select ARCH_HAS_DEVMEM_IS_ALLOWED + select ARCH_HAS_DMA_SET_COHERENT_MASK + select ARCH_HAS_ELF_RANDOMIZE + select ARCH_HAS_GCOV_PROFILE_ALL + select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE + select ARCH_HAS_SG_CHAIN + select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST + select ARCH_HAS_UBSAN_SANITIZE_ALL + select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO + select ARCH_SUPPORTS_ATOMIC_RMW + select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT + select ARCH_USE_BUILTIN_BSWAP + select ARCH_USE_CMPXCHG_LOCKREF if PPC64 + select ARCH_WANT_IPC_PARSE_VERSION select BINFMT_ELF - select ARCH_HAS_ELF_RANDOMIZE + select BUILDTIME_EXTABLE_SORT + select CLONE_BACKWARDS + select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN + select EDAC_ATOMIC_SCRUB + select EDAC_SUPPORT + select GENERIC_ATOMIC64 if PPC32 + select GENERIC_CLOCKEVENTS + select GENERIC_CLOCKEVENTS_BROADCAST if SMP + select GENERIC_CMOS_UPDATE + select GENERIC_CPU_AUTOPROBE + select GENERIC_IRQ_SHOW + select GENERIC_IRQ_SHOW_LEVEL + select GENERIC_SMP_IDLE_THREAD + select GENERIC_STRNCPY_FROM_USER + select GENERIC_STRNLEN_USER + select GENERIC_TIME_VSYSCALL_OLD + select HAVE_ARCH_AUDITSYSCALL + select HAVE_ARCH_HARDENED_USERCOPY + select HAVE_ARCH_JUMP_LABEL + select HAVE_ARCH_KGDB + select HAVE_ARCH_SECCOMP_FILTER + select HAVE_ARCH_TRACEHOOK + select HAVE_CBPF_JIT if !PPC64 + select HAVE_CONTEXT_TRACKING if PPC64 + select HAVE_DEBUG_KMEMLEAK + select HAVE_DEBUG_STACKOVERFLOW + select HAVE_DMA_API_DEBUG + select HAVE_DYNAMIC_FTRACE + select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL + select HAVE_EBPF_JIT if PPC64 + select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) + select HAVE_FTRACE_MCOUNT_RECORD + select HAVE_FUNCTION_GRAPH_TRACER + select HAVE_FUNCTION_TRACER + select HAVE_GCC_PLUGINS + select HAVE_GENERIC_RCU_GUP + select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) + select HAVE_IDE + select HAVE_IOREMAP_PROT + select HAVE_IRQ_EXIT_ON_IRQ_STACK + select HAVE_KERNEL_GZIP + select HAVE_KPROBES + select HAVE_KRETPROBES + select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS + select HAVE_MEMBLOCK + select HAVE_MEMBLOCK_NODE_MAP + select HAVE_MOD_ARCH_SPECIFIC + select HAVE_NMI if PERF_EVENTS + select HAVE_OPROFILE + select HAVE_OPTPROBES if PPC64 + select HAVE_PERF_EVENTS + select HAVE_PERF_EVENTS_NMI if PPC64 + select HAVE_PERF_REGS + select HAVE_PERF_USER_STACK_DUMP + select HAVE_RCU_TABLE_FREE if SMP + select HAVE_REGS_AND_STACK_ACCESS_API + select HAVE_SYSCALL_TRACEPOINTS + select HAVE_VIRT_CPU_ACCOUNTING + select IRQ_DOMAIN + select IRQ_FORCED_THREADING + select MODULES_USE_ELF_RELA + select NO_BOOTMEM select OF select OF_EARLY_FLATTREE select OF_RESERVED_MEM - select HAVE_FTRACE_MCOUNT_RECORD - select HAVE_DYNAMIC_FTRACE - select HAVE_DYNAMIC_FTRACE_WITH_REGS if MPROFILE_KERNEL - select HAVE_FUNCTION_TRACER - select HAVE_FUNCTION_GRAPH_TRACER - select HAVE_GCC_PLUGINS - select SYSCTL_EXCEPTION_TRACE - select VIRT_TO_BUS if !PPC64 - select HAVE_IDE - select HAVE_IOREMAP_PROT - select HAVE_EFFICIENT_UNALIGNED_ACCESS if !(CPU_LITTLE_ENDIAN && POWER7_CPU) - select HAVE_KPROBES - select HAVE_OPTPROBES if PPC64 - select HAVE_ARCH_KGDB - select HAVE_KRETPROBES - select HAVE_ARCH_TRACEHOOK - select HAVE_MEMBLOCK - select HAVE_MEMBLOCK_NODE_MAP - select HAVE_DMA_API_DEBUG - select HAVE_OPROFILE - select HAVE_DEBUG_KMEMLEAK - select ARCH_HAS_SG_CHAIN - select GENERIC_ATOMIC64 if PPC32 - select HAVE_PERF_EVENTS - select HAVE_PERF_REGS - select HAVE_PERF_USER_STACK_DUMP - select HAVE_REGS_AND_STACK_ACCESS_API - select HAVE_HW_BREAKPOINT if PERF_EVENTS && (PPC_BOOK3S || PPC_8xx) - select ARCH_WANT_IPC_PARSE_VERSION - select SPARSE_IRQ - select IRQ_DOMAIN - select GENERIC_IRQ_SHOW - select GENERIC_IRQ_SHOW_LEVEL - select IRQ_FORCED_THREADING - select HAVE_RCU_TABLE_FREE if SMP - select HAVE_SYSCALL_TRACEPOINTS - select HAVE_CBPF_JIT if !PPC64 - select HAVE_EBPF_JIT if PPC64 - select HAVE_ARCH_JUMP_LABEL - select ARCH_HAVE_NMI_SAFE_CMPXCHG - select ARCH_HAS_GCOV_PROFILE_ALL - select GENERIC_SMP_IDLE_THREAD - select GENERIC_CMOS_UPDATE - select GENERIC_TIME_VSYSCALL_OLD - select GENERIC_CLOCKEVENTS - select GENERIC_CLOCKEVENTS_BROADCAST if SMP - select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST - select GENERIC_STRNCPY_FROM_USER - select GENERIC_STRNLEN_USER - select HAVE_MOD_ARCH_SPECIFIC - select MODULES_USE_ELF_RELA - select CLONE_BACKWARDS - select ARCH_USE_BUILTIN_BSWAP + select OLD_SIGACTION if PPC32 select OLD_SIGSUSPEND - select OLD_SIGACTION if PPC32 - select HAVE_DEBUG_STACKOVERFLOW - select HAVE_IRQ_EXIT_ON_IRQ_STACK - select ARCH_USE_CMPXCHG_LOCKREF if PPC64 - select HAVE_ARCH_AUDITSYSCALL - select ARCH_SUPPORTS_ATOMIC_RMW - select DCACHE_WORD_ACCESS if PPC64 && CPU_LITTLE_ENDIAN - select NO_BOOTMEM - select HAVE_GENERIC_RCU_GUP - select HAVE_PERF_EVENTS_NMI if PPC64 - select HAVE_NMI if PERF_EVENTS - select EDAC_SUPPORT - select EDAC_ATOMIC_SCRUB - select ARCH_HAS_DMA_SET_COHERENT_MASK - select ARCH_HAS_DEVMEM_IS_ALLOWED - select HAVE_ARCH_SECCOMP_FILTER - select ARCH_HAS_UBSAN_SANITIZE_ALL - select ARCH_SUPPORTS_DEFERRED_STRUCT_PAGE_INIT - select HAVE_LIVEPATCH if HAVE_DYNAMIC_FTRACE_WITH_REGS - select GENERIC_CPU_AUTOPROBE - select HAVE_VIRT_CPU_ACCOUNTING - select ARCH_HAS_SCALED_CPUTIME if VIRT_CPU_ACCOUNTING_NATIVE - select HAVE_ARCH_HARDENED_USERCOPY - select HAVE_KERNEL_GZIP - select HAVE_CONTEXT_TRACKING if PPC64 + select SPARSE_IRQ + select SYSCTL_EXCEPTION_TRACE + select VIRT_TO_BUS if !PPC64 + # + # Please keep this list sorted alphabetically. + # config GENERIC_CSUM def_bool n