From 5da5f47e6c65eda83e5433bd905c4df03be98596 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 6 Apr 2020 20:21:05 -0700 Subject: [PATCH 1/3] linux-user/ppc: Fix padding in mcontext_t for ppc64 The padding that was added in 95cda4c44ee was added to a union, and so it had no effect. This fixes misalignment errors detected by clang sanitizers for ppc64 and ppc64le. In addition, only ppc64 allocates space for VSX registers, so do not save them for ppc32. The kernel only has references to CONFIG_SPE in signal_32.c, so do not attempt to save them for ppc64. Fixes: 95cda4c44ee Signed-off-by: Richard Henderson Message-Id: <20200407032105.26711-1-richard.henderson@linaro.org> Acked-by: Laurent Vivier Signed-off-by: David Gibson --- linux-user/ppc/signal.c | 69 +++++++++++++++++------------------------ 1 file changed, 29 insertions(+), 40 deletions(-) diff --git a/linux-user/ppc/signal.c b/linux-user/ppc/signal.c index ecd99736b7..20a02c197c 100644 --- a/linux-user/ppc/signal.c +++ b/linux-user/ppc/signal.c @@ -35,12 +35,26 @@ struct target_mcontext { target_ulong mc_gregs[48]; /* Includes fpscr. */ uint64_t mc_fregs[33]; + #if defined(TARGET_PPC64) /* Pointer to the vector regs */ target_ulong v_regs; + /* + * On ppc64, this mcontext structure is naturally *unaligned*, + * or rather it is aligned on a 8 bytes boundary but not on + * a 16 byte boundary. This pad fixes it up. This is why we + * cannot use ppc_avr_t, which would force alignment. This is + * also why the vector regs are referenced in the ABI by the + * v_regs pointer above so any amount of padding can be added here. + */ + target_ulong pad; + /* VSCR and VRSAVE are saved separately. Also reserve space for VSX. */ + struct { + uint64_t altivec[34 + 16][2]; + } mc_vregs; #else target_ulong mc_pad[2]; -#endif + /* We need to handle Altivec and SPE at the same time, which no kernel needs to do. Fortunately, the kernel defines this bit to be Altivec-register-large all the time, rather than trying to @@ -48,32 +62,14 @@ struct target_mcontext { union { /* SPE vector registers. One extra for SPEFSCR. */ uint32_t spe[33]; - /* Altivec vector registers. The packing of VSCR and VRSAVE - varies depending on whether we're PPC64 or not: PPC64 splits - them apart; PPC32 stuffs them together. - We also need to account for the VSX registers on PPC64 - */ -#if defined(TARGET_PPC64) -#define QEMU_NVRREG (34 + 16) - /* On ppc64, this mcontext structure is naturally *unaligned*, - * or rather it is aligned on a 8 bytes boundary but not on - * a 16 bytes one. This pad fixes it up. This is also why the - * vector regs are referenced by the v_regs pointer above so - * any amount of padding can be added here + /* + * Altivec vector registers. One extra for VRSAVE. + * On ppc32, we are already aligned to 16 bytes. We could + * use ppc_avr_t, but choose to share the same type as ppc64. */ - target_ulong pad; -#else - /* On ppc32, we are already aligned to 16 bytes */ -#define QEMU_NVRREG 33 -#endif - /* We cannot use ppc_avr_t here as we do *not* want the implied - * 16-bytes alignment that would result from it. This would have - * the effect of making the whole struct target_mcontext aligned - * which breaks the layout of struct target_ucontext on ppc64. - */ - uint64_t altivec[QEMU_NVRREG][2]; -#undef QEMU_NVRREG + uint64_t altivec[33][2]; } mc_vregs; +#endif }; /* See arch/powerpc/include/asm/sigcontext.h. */ @@ -278,6 +274,7 @@ static void save_user_regs(CPUPPCState *env, struct target_mcontext *frame) __put_user((uint32_t)env->spr[SPR_VRSAVE], vrsave); } +#if defined(TARGET_PPC64) /* Save VSX second halves */ if (env->insns_flags2 & PPC2_VSX) { uint64_t *vsregs = (uint64_t *)&frame->mc_vregs.altivec[34]; @@ -286,6 +283,7 @@ static void save_user_regs(CPUPPCState *env, struct target_mcontext *frame) __put_user(*vsrl, &vsregs[i]); } } +#endif /* Save floating point registers. */ if (env->insns_flags & PPC_FLOAT) { @@ -296,22 +294,18 @@ static void save_user_regs(CPUPPCState *env, struct target_mcontext *frame) __put_user((uint64_t) env->fpscr, &frame->mc_fregs[32]); } +#if !defined(TARGET_PPC64) /* Save SPE registers. The kernel only saves the high half. */ if (env->insns_flags & PPC_SPE) { -#if defined(TARGET_PPC64) - for (i = 0; i < ARRAY_SIZE(env->gpr); i++) { - __put_user(env->gpr[i] >> 32, &frame->mc_vregs.spe[i]); - } -#else for (i = 0; i < ARRAY_SIZE(env->gprh); i++) { __put_user(env->gprh[i], &frame->mc_vregs.spe[i]); } -#endif /* Set MSR_SPE in the saved MSR value to indicate that frame->mc_vregs contains valid data. */ msr |= MSR_SPE; __put_user(env->spe_fscr, &frame->mc_vregs.spe[32]); } +#endif /* Store MSR. */ __put_user(msr, &frame->mc_gregs[TARGET_PT_MSR]); @@ -392,6 +386,7 @@ static void restore_user_regs(CPUPPCState *env, __get_user(env->spr[SPR_VRSAVE], vrsave); } +#if defined(TARGET_PPC64) /* Restore VSX second halves */ if (env->insns_flags2 & PPC2_VSX) { uint64_t *vsregs = (uint64_t *)&frame->mc_vregs.altivec[34]; @@ -400,6 +395,7 @@ static void restore_user_regs(CPUPPCState *env, __get_user(*vsrl, &vsregs[i]); } } +#endif /* Restore floating point registers. */ if (env->insns_flags & PPC_FLOAT) { @@ -412,22 +408,15 @@ static void restore_user_regs(CPUPPCState *env, env->fpscr = (uint32_t) fpscr; } +#if !defined(TARGET_PPC64) /* Save SPE registers. The kernel only saves the high half. */ if (env->insns_flags & PPC_SPE) { -#if defined(TARGET_PPC64) - for (i = 0; i < ARRAY_SIZE(env->gpr); i++) { - uint32_t hi; - - __get_user(hi, &frame->mc_vregs.spe[i]); - env->gpr[i] = ((uint64_t)hi << 32) | ((uint32_t) env->gpr[i]); - } -#else for (i = 0; i < ARRAY_SIZE(env->gprh); i++) { __get_user(env->gprh[i], &frame->mc_vregs.spe[i]); } -#endif __get_user(env->spe_fscr, &frame->mc_vregs.spe[32]); } +#endif } #if !defined(TARGET_PPC64) From 211a7784b9a80e42841223d8ea5252567ebe0e9e Mon Sep 17 00:00:00 2001 From: Ganesh Goudar Date: Wed, 8 Apr 2020 22:39:44 +0530 Subject: [PATCH 2/3] target/ppc: Fix wrong interpretation of the disposition flag. Bitwise AND with kvm_run->flags to evaluate if we recovered from MCE or not is not correct, As disposition in kvm_run->flags is a two-bit integer value and not a bit map, So check for equality instead of bitwise AND. Without the fix qemu treats any unrecoverable mce error as recoverable and ends up in a mce loop inside the guest, Below are the MCE logs before and after the fix. Before fix: [ 66.775757] MCE: CPU0: Initiator CPU [ 66.775891] MCE: CPU0: Unknown [ 66.776587] MCE: CPU0: machine check (Harmless) Host UE Indeterminate [Recovered] [ 66.776857] MCE: CPU0: NIP: [c0080000000e00b8] mcetest_tlbie+0xb0/0x128 [mcetest_tlbie] After fix: [ 20.650577] CPU: 0 PID: 1415 Comm: insmod Tainted: G M O 5.6.0-fwnmi-arv+ #11 [ 20.650618] NIP: c0080000023a00e8 LR: c0080000023a00d8 CTR: c000000000021fe0 [ 20.650660] REGS: c0000001fffd3d70 TRAP: 0200 Tainted: G M O (5.6.0-fwnmi-arv+) [ 20.650708] MSR: 8000000002a0b033 CR: 42000222 XER: 20040000 [ 20.650758] CFAR: c00000000000b940 DAR: c0080000025e00e0 DSISR: 00000200 IRQMASK: 0 [ 20.650758] GPR00: c0080000023a00d8 c0000001fddd79a0 c0080000023a8500 0000000000000039 [ 20.650758] GPR04: 0000000000000001 0000000000000000 0000000000000000 0000000000000007 [ 20.650758] GPR08: 0000000000000007 c0080000025e00e0 0000000000000000 00000000000000f7 [ 20.650758] GPR12: 0000000000000000 c000000001900000 c00000000101f398 c0080000025c052f [ 20.650758] GPR16: 00000000000003a8 c0080000025c0000 c0000001fddd7d70 c0000000015b7940 [ 20.650758] GPR20: 000000000000fff1 c000000000f72c28 c0080000025a0988 0000000000000000 [ 20.650758] GPR24: 0000000000000100 c0080000023a05d0 c0000000001f1d70 0000000000000000 [ 20.650758] GPR28: c0000001fde20000 c0000001fd02b2e0 c0080000023a0000 c0080000025e0000 [ 20.651178] NIP [c0080000023a00e8] mcetest_tlbie+0xe8/0xf0 [mcetest_tlbie] [ 20.651220] LR [c0080000023a00d8] mcetest_tlbie+0xd8/0xf0 [mcetest_tlbie] [ 20.651262] Call Trace: [ 20.651280] [c0000001fddd79a0] [c0080000023a00d8] mcetest_tlbie+0xd8/0xf0 [mcetest_tlbie] (unreliable) [ 20.651340] [c0000001fddd7a10] [c00000000001091c] do_one_initcall+0x6c/0x2c0 [ 20.651390] [c0000001fddd7af0] [c0000000001f7998] do_init_module+0x90/0x298 [ 20.651433] [c0000001fddd7b80] [c0000000001f61a8] load_module+0x1f58/0x27a0 [ 20.651476] [c0000001fddd7d40] [c0000000001f6c70] __do_sys_finit_module+0xe0/0x100 [ 20.651526] [c0000001fddd7e20] [c00000000000b9d0] system_call+0x5c/0x68 [ 20.651567] Instruction dump: [ 20.651594] e8410018 3c620000 e8638020 480000cd e8410018 3c620000 e8638028 480000bd [ 20.651646] e8410018 7be904e4 39400000 612900e0 <7d434a64> 4bffff74 3c4c0001 38428410 [ 20.651699] ---[ end trace 4c40897f016b4340 ]--- [ 20.653310] Bus error [ 20.655575] MCE: CPU0: machine check (Harmless) Host UE Indeterminate [Not recovered] [ 20.655575] MCE: CPU0: NIP: [c0080000023a00e8] mcetest_tlbie+0xe8/0xf0 [mcetest_tlbie] [ 20.655576] MCE: CPU0: Initiator CPU [ 20.655576] MCE: CPU0: Unknown Signed-off-by: Ganesh Goudar Message-Id: <20200408170944.16003-1-ganeshgr@linux.ibm.com> Signed-off-by: David Gibson --- target/ppc/kvm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/target/ppc/kvm.c b/target/ppc/kvm.c index 03d0667e8f..2692f76130 100644 --- a/target/ppc/kvm.c +++ b/target/ppc/kvm.c @@ -2816,11 +2816,11 @@ int kvm_arch_msi_data_to_gsi(uint32_t data) #if defined(TARGET_PPC64) int kvm_handle_nmi(PowerPCCPU *cpu, struct kvm_run *run) { - bool recovered = run->flags & KVM_RUN_PPC_NMI_DISP_FULLY_RECOV; + uint16_t flags = run->flags & KVM_RUN_PPC_NMI_DISP_MASK; cpu_synchronize_state(CPU(cpu)); - spapr_mce_req_event(cpu, recovered); + spapr_mce_req_event(cpu, flags == KVM_RUN_PPC_NMI_DISP_FULLY_RECOV); return 0; } From 5ed195065cc6895f61b9d59bfa0a0536ed5ed51e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Tue, 14 Apr 2020 21:11:31 +1000 Subject: [PATCH 3/3] target/ppc: Fix mtmsr(d) L=1 variant that loses interrupts MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If mtmsr L=1 sets MSR[EE] while there is a maskable exception pending, it does not cause an interrupt. This causes the test case to hang: https://lists.gnu.org/archive/html/qemu-ppc/2019-10/msg00826.html More recently, Linux reduced the occurance of operations (e.g., rfi) which stop translation and allow pending interrupts to be processed. This started causing hangs in Linux boot in long-running kernel tests, running with '-d int' shows the decrementer stops firing despite DEC wrapping and MSR[EE]=1. https://lists.ozlabs.org/pipermail/linuxppc-dev/2020-April/208301.html The cause is the broken mtmsr L=1 behaviour, which is contrary to the architecture. From Power ISA v3.0B, p.977, Move To Machine State Register, Programming Note states: If MSR[EE]=0 and an External, Decrementer, or Performance Monitor exception is pending, executing an mtmsrd instruction that sets MSR[EE] to 1 will cause the interrupt to occur before the next instruction is executed, if no higher priority exception exists Fix this by handling L=1 exactly the same way as L=0, modulo the MSR bits altered. The confusion arises from L=0 being "context synchronizing" whereas L=1 is "execution synchronizing", which is a weaker semantic. However this is not a relaxation of the requirement that these exceptions cause interrupts when MSR[EE]=1 (e.g., when mtmsr executes to completion as TCG is doing here), rather it specifies how a pipelined processor can have multiple instructions in flight where one may influence how another behaves. Cc: qemu-stable@nongnu.org Reported-by: Anton Blanchard Reported-by: Nathan Chancellor Tested-by: Nathan Chancellor Signed-off-by: Nicholas Piggin Message-Id: <20200414111131.465560-1-npiggin@gmail.com> Reviewed-by: Cédric Le Goater Tested-by: Cédric Le Goater Signed-off-by: David Gibson --- target/ppc/translate.c | 46 +++++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 19 deletions(-) diff --git a/target/ppc/translate.c b/target/ppc/translate.c index b207fb5386..9959259dba 100644 --- a/target/ppc/translate.c +++ b/target/ppc/translate.c @@ -4361,30 +4361,34 @@ static void gen_mtmsrd(DisasContext *ctx) CHK_SV; #if !defined(CONFIG_USER_ONLY) + if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) { + gen_io_start(); + } if (ctx->opcode & 0x00010000) { - /* Special form that does not need any synchronisation */ + /* L=1 form only updates EE and RI */ TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); tcg_gen_andi_tl(t0, cpu_gpr[rS(ctx->opcode)], (1 << MSR_RI) | (1 << MSR_EE)); - tcg_gen_andi_tl(cpu_msr, cpu_msr, + tcg_gen_andi_tl(t1, cpu_msr, ~(target_ulong)((1 << MSR_RI) | (1 << MSR_EE))); - tcg_gen_or_tl(cpu_msr, cpu_msr, t0); + tcg_gen_or_tl(t1, t1, t0); + + gen_helper_store_msr(cpu_env, t1); tcg_temp_free(t0); + tcg_temp_free(t1); + } else { /* * XXX: we need to update nip before the store if we enter * power saving mode, we will exit the loop directly from * ppc_store_msr */ - if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) { - gen_io_start(); - } gen_update_nip(ctx, ctx->base.pc_next); gen_helper_store_msr(cpu_env, cpu_gpr[rS(ctx->opcode)]); - /* Must stop the translation as machine state (may have) changed */ - /* Note that mtmsr is not always defined as context-synchronizing */ - gen_stop_exception(ctx); } + /* Must stop the translation as machine state (may have) changed */ + gen_stop_exception(ctx); #endif /* !defined(CONFIG_USER_ONLY) */ } #endif /* defined(TARGET_PPC64) */ @@ -4394,15 +4398,23 @@ static void gen_mtmsr(DisasContext *ctx) CHK_SV; #if !defined(CONFIG_USER_ONLY) - if (ctx->opcode & 0x00010000) { - /* Special form that does not need any synchronisation */ + if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) { + gen_io_start(); + } + if (ctx->opcode & 0x00010000) { + /* L=1 form only updates EE and RI */ TCGv t0 = tcg_temp_new(); + TCGv t1 = tcg_temp_new(); tcg_gen_andi_tl(t0, cpu_gpr[rS(ctx->opcode)], (1 << MSR_RI) | (1 << MSR_EE)); - tcg_gen_andi_tl(cpu_msr, cpu_msr, + tcg_gen_andi_tl(t1, cpu_msr, ~(target_ulong)((1 << MSR_RI) | (1 << MSR_EE))); - tcg_gen_or_tl(cpu_msr, cpu_msr, t0); + tcg_gen_or_tl(t1, t1, t0); + + gen_helper_store_msr(cpu_env, t1); tcg_temp_free(t0); + tcg_temp_free(t1); + } else { TCGv msr = tcg_temp_new(); @@ -4411,9 +4423,6 @@ static void gen_mtmsr(DisasContext *ctx) * power saving mode, we will exit the loop directly from * ppc_store_msr */ - if (tb_cflags(ctx->base.tb) & CF_USE_ICOUNT) { - gen_io_start(); - } gen_update_nip(ctx, ctx->base.pc_next); #if defined(TARGET_PPC64) tcg_gen_deposit_tl(msr, cpu_msr, cpu_gpr[rS(ctx->opcode)], 0, 32); @@ -4422,10 +4431,9 @@ static void gen_mtmsr(DisasContext *ctx) #endif gen_helper_store_msr(cpu_env, msr); tcg_temp_free(msr); - /* Must stop the translation as machine state (may have) changed */ - /* Note that mtmsr is not always defined as context-synchronizing */ - gen_stop_exception(ctx); } + /* Must stop the translation as machine state (may have) changed */ + gen_stop_exception(ctx); #endif }