From 3a5f6805c7ca7deb8d1abaf0153936eeb51d074e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 17 Oct 2022 07:28:30 +0300 Subject: [PATCH 01/13] tcg/sparc: Remove support for sparc32plus MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since 9b9c37c36439, we have only supported sparc64 cpus. Debian and Gentoo now only support 64-bit sparc64 userland, so it is time to drop the 32-bit sparc64 userland: sparc32plus. Reviewed-by: Peter Maydell Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- tcg/sparc/tcg-target.c.inc | 166 +++++++------------------------------ tcg/sparc/tcg-target.h | 11 --- tcg/tcg.c | 75 +---------------- 3 files changed, 33 insertions(+), 219 deletions(-) diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc/tcg-target.c.inc index 72d9552fd0..097bcfcd12 100644 --- a/tcg/sparc/tcg-target.c.inc +++ b/tcg/sparc/tcg-target.c.inc @@ -22,6 +22,11 @@ * THE SOFTWARE. */ +/* We only support generating code for 64-bit mode. */ +#ifndef __arch64__ +#error "unsupported code generation mode" +#endif + #include "../tcg-pool.c.inc" #ifdef CONFIG_DEBUG_TCG @@ -61,12 +66,6 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { }; #endif -#ifdef __arch64__ -# define SPARC64 1 -#else -# define SPARC64 0 -#endif - #define TCG_CT_CONST_S11 0x100 #define TCG_CT_CONST_S13 0x200 #define TCG_CT_CONST_ZERO 0x400 @@ -91,11 +90,7 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { * high bits of the %i and %l registers garbage at all times. */ #define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32) -#if SPARC64 # define ALL_GENERAL_REGS64 ALL_GENERAL_REGS -#else -# define ALL_GENERAL_REGS64 MAKE_64BIT_MASK(0, 16) -#endif #define ALL_QLDST_REGS (ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS) #define ALL_QLDST_REGS64 (ALL_GENERAL_REGS64 & ~SOFTMMU_RESERVE_REGS) @@ -306,11 +301,7 @@ static bool check_fit_i32(int32_t val, unsigned int bits) } #define check_fit_tl check_fit_i64 -#if SPARC64 -# define check_fit_ptr check_fit_i64 -#else -# define check_fit_ptr check_fit_i32 -#endif +#define check_fit_ptr check_fit_i64 static bool patch_reloc(tcg_insn_unit *src_rw, int type, intptr_t value, intptr_t addend) @@ -573,11 +564,6 @@ static void tcg_out_sety(TCGContext *s, TCGReg rs) tcg_out32(s, WRY | INSN_RS1(TCG_REG_G0) | INSN_RS2(rs)); } -static void tcg_out_rdy(TCGContext *s, TCGReg rd) -{ - tcg_out32(s, RDY | INSN_RD(rd)); -} - static void tcg_out_div32(TCGContext *s, TCGReg rd, TCGReg rs1, int32_t val2, int val2const, int uns) { @@ -914,9 +900,7 @@ static void emit_extend(TCGContext *s, TCGReg r, int op) tcg_out_arithi(s, r, r, 16, SHIFT_SRL); break; case MO_32: - if (SPARC64) { - tcg_out_arith(s, r, r, 0, SHIFT_SRL); - } + tcg_out_arith(s, r, r, 0, SHIFT_SRL); break; case MO_64: break; @@ -948,7 +932,6 @@ static void build_trampolines(TCGContext *s) }; int i; - TCGReg ra; for (i = 0; i < ARRAY_SIZE(qemu_ld_helpers); ++i) { if (qemu_ld_helpers[i] == NULL) { @@ -961,16 +944,8 @@ static void build_trampolines(TCGContext *s) } qemu_ld_trampoline[i] = tcg_splitwx_to_rx(s->code_ptr); - if (SPARC64 || TARGET_LONG_BITS == 32) { - ra = TCG_REG_O3; - } else { - /* Install the high part of the address. */ - tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O2, 32, SHIFT_SRLX); - ra = TCG_REG_O4; - } - /* Set the retaddr operand. */ - tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7); + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O3, TCG_REG_O7); /* Tail call. */ tcg_out_jmpl_const(s, qemu_ld_helpers[i], true, true); /* delay slot -- set the env argument */ @@ -988,37 +963,10 @@ static void build_trampolines(TCGContext *s) } qemu_st_trampoline[i] = tcg_splitwx_to_rx(s->code_ptr); - if (SPARC64) { - emit_extend(s, TCG_REG_O2, i); - ra = TCG_REG_O4; - } else { - ra = TCG_REG_O1; - if (TARGET_LONG_BITS == 64) { - /* Install the high part of the address. */ - tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX); - ra += 2; - } else { - ra += 1; - } - if ((i & MO_SIZE) == MO_64) { - /* Install the high part of the data. */ - tcg_out_arithi(s, ra, ra + 1, 32, SHIFT_SRLX); - ra += 2; - } else { - emit_extend(s, ra, i); - ra += 1; - } - /* Skip the oi argument. */ - ra += 1; - } - + emit_extend(s, TCG_REG_O2, i); + /* Set the retaddr operand. */ - if (ra >= TCG_REG_O6) { - tcg_out_st(s, TCG_TYPE_PTR, TCG_REG_O7, TCG_REG_CALL_STACK, - TCG_TARGET_CALL_STACK_OFFSET); - } else { - tcg_out_mov(s, TCG_TYPE_PTR, ra, TCG_REG_O7); - } + tcg_out_mov(s, TCG_TYPE_PTR, TCG_REG_O4, TCG_REG_O7); /* Tail call. */ tcg_out_jmpl_const(s, qemu_st_helpers[i], true, true); @@ -1047,11 +995,6 @@ static void build_trampolines(TCGContext *s) qemu_unalign_st_trampoline = tcg_splitwx_to_rx(s->code_ptr); } - if (!SPARC64 && TARGET_LONG_BITS == 64) { - /* Install the high part of the address. */ - tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O2, 32, SHIFT_SRLX); - } - /* Tail call. */ tcg_out_jmpl_const(s, helper, true, true); /* delay slot -- set the env argument */ @@ -1182,7 +1125,7 @@ static TCGReg tcg_out_tlb_load(TCGContext *s, TCGReg addr, int mem_index, tcg_out_cmp(s, r0, r2, 0); /* If the guest address must be zero-extended, do so now. */ - if (SPARC64 && TARGET_LONG_BITS == 32) { + if (TARGET_LONG_BITS == 32) { tcg_out_arithi(s, r0, addr, 0, SHIFT_SRL); return r0; } @@ -1231,7 +1174,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, #ifdef CONFIG_SOFTMMU unsigned memi = get_mmuidx(oi); - TCGReg addrz, param; + TCGReg addrz; const tcg_insn_unit *func; addrz = tcg_out_tlb_load(s, addr, memi, memop, @@ -1251,12 +1194,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, /* TLB Miss. */ - param = TCG_REG_O1; - if (!SPARC64 && TARGET_LONG_BITS == 64) { - /* Skip the high-part; we'll perform the extract in the trampoline. */ - param++; - } - tcg_out_mov(s, TCG_TYPE_REG, param++, addrz); + tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O1, addrz); /* We use the helpers to extend SB and SW data, leaving the case of SL needing explicit extending below. */ @@ -1268,30 +1206,13 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, tcg_debug_assert(func != NULL); tcg_out_call_nodelay(s, func, false); /* delay slot */ - tcg_out_movi(s, TCG_TYPE_I32, param, oi); + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_O2, oi); - /* Recall that all of the helpers return 64-bit results. - Which complicates things for sparcv8plus. */ - if (SPARC64) { - /* We let the helper sign-extend SB and SW, but leave SL for here. */ - if (is_64 && (memop & MO_SSIZE) == MO_SL) { - tcg_out_arithi(s, data, TCG_REG_O0, 0, SHIFT_SRA); - } else { - tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0); - } + /* We let the helper sign-extend SB and SW, but leave SL for here. */ + if (is_64 && (memop & MO_SSIZE) == MO_SL) { + tcg_out_arithi(s, data, TCG_REG_O0, 0, SHIFT_SRA); } else { - if ((memop & MO_SIZE) == MO_64) { - tcg_out_arithi(s, TCG_REG_O0, TCG_REG_O0, 32, SHIFT_SLLX); - tcg_out_arithi(s, TCG_REG_O1, TCG_REG_O1, 0, SHIFT_SRL); - tcg_out_arith(s, data, TCG_REG_O0, TCG_REG_O1, ARITH_OR); - } else if (is_64) { - /* Re-extend from 32-bit rather than reassembling when we - know the high register must be an extension. */ - tcg_out_arithi(s, data, TCG_REG_O1, 0, - memop & MO_SIGN ? SHIFT_SRA : SHIFT_SRL); - } else { - tcg_out_mov(s, TCG_TYPE_I32, data, TCG_REG_O1); - } + tcg_out_mov(s, TCG_TYPE_REG, data, TCG_REG_O0); } *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr)); @@ -1301,7 +1222,7 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, unsigned s_bits = memop & MO_SIZE; unsigned t_bits; - if (SPARC64 && TARGET_LONG_BITS == 32) { + if (TARGET_LONG_BITS == 32) { tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL); addr = TCG_REG_T1; } @@ -1337,10 +1258,9 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg data, TCGReg addr, * operation in the delay slot, and failure need only invoke the * handler for SIGBUS. */ - TCGReg arg_low = TCG_REG_O1 + (!SPARC64 && TARGET_LONG_BITS == 64); tcg_out_call_nodelay(s, qemu_unalign_ld_trampoline, false); /* delay slot -- move to low part of argument reg */ - tcg_out_mov_delay(s, arg_low, addr); + tcg_out_mov_delay(s, TCG_REG_O1, addr); } else { /* Underalignment: load by pieces of minimum alignment. */ int ld_opc, a_size, s_size, i; @@ -1400,7 +1320,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, #ifdef CONFIG_SOFTMMU unsigned memi = get_mmuidx(oi); - TCGReg addrz, param; + TCGReg addrz; const tcg_insn_unit *func; addrz = tcg_out_tlb_load(s, addr, memi, memop, @@ -1418,23 +1338,14 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, /* TLB Miss. */ - param = TCG_REG_O1; - if (!SPARC64 && TARGET_LONG_BITS == 64) { - /* Skip the high-part; we'll perform the extract in the trampoline. */ - param++; - } - tcg_out_mov(s, TCG_TYPE_REG, param++, addrz); - if (!SPARC64 && (memop & MO_SIZE) == MO_64) { - /* Skip the high-part; we'll perform the extract in the trampoline. */ - param++; - } - tcg_out_mov(s, TCG_TYPE_REG, param++, data); + tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O1, addrz); + tcg_out_mov(s, TCG_TYPE_REG, TCG_REG_O2, data); func = qemu_st_trampoline[memop & (MO_BSWAP | MO_SIZE)]; tcg_debug_assert(func != NULL); tcg_out_call_nodelay(s, func, false); /* delay slot */ - tcg_out_movi(s, TCG_TYPE_I32, param, oi); + tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_O3, oi); *label_ptr |= INSN_OFF19(tcg_ptr_byte_diff(s->code_ptr, label_ptr)); #else @@ -1443,7 +1354,7 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, unsigned s_bits = memop & MO_SIZE; unsigned t_bits; - if (SPARC64 && TARGET_LONG_BITS == 32) { + if (TARGET_LONG_BITS == 32) { tcg_out_arithi(s, TCG_REG_T1, addr, 0, SHIFT_SRL); addr = TCG_REG_T1; } @@ -1479,10 +1390,9 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg data, TCGReg addr, * operation in the delay slot, and failure need only invoke the * handler for SIGBUS. */ - TCGReg arg_low = TCG_REG_O1 + (!SPARC64 && TARGET_LONG_BITS == 64); tcg_out_call_nodelay(s, qemu_unalign_st_trampoline, false); /* delay slot -- move to low part of argument reg */ - tcg_out_mov_delay(s, arg_low, addr); + tcg_out_mov_delay(s, TCG_REG_O1, addr); } else { /* Underalignment: store by pieces of minimum alignment. */ int st_opc, a_size, s_size, i; @@ -1719,14 +1629,9 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, case INDEX_op_muls2_i32: c = ARITH_SMUL; do_mul2: - /* The 32-bit multiply insns produce a full 64-bit result. If the - destination register can hold it, we can avoid the slower RDY. */ + /* The 32-bit multiply insns produce a full 64-bit result. */ tcg_out_arithc(s, a0, a2, args[3], const_args[3], c); - if (SPARC64 || a0 <= TCG_REG_O7) { - tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX); - } else { - tcg_out_rdy(s, a1); - } + tcg_out_arithi(s, a1, a0, 32, SHIFT_SRLX); break; case INDEX_op_qemu_ld_i32: @@ -1984,16 +1889,11 @@ static void tcg_target_init(TCGContext *s) tcg_regset_set_reg(s->reserved_regs, TCG_REG_T2); /* for internal use */ } -#if SPARC64 -# define ELF_HOST_MACHINE EM_SPARCV9 -#else -# define ELF_HOST_MACHINE EM_SPARC32PLUS -# define ELF_HOST_FLAGS EF_SPARC_32PLUS -#endif +#define ELF_HOST_MACHINE EM_SPARCV9 typedef struct { DebugFrameHeader h; - uint8_t fde_def_cfa[SPARC64 ? 4 : 2]; + uint8_t fde_def_cfa[4]; uint8_t fde_win_save; uint8_t fde_ret_save[3]; } DebugFrame; @@ -2010,12 +1910,8 @@ static const DebugFrame debug_frame = { .h.fde.len = sizeof(DebugFrame) - offsetof(DebugFrame, h.fde.cie_offset), .fde_def_cfa = { -#if SPARC64 12, 30, /* DW_CFA_def_cfa i6, 2047 */ (2047 & 0x7f) | 0x80, (2047 >> 7) -#else - 13, 30 /* DW_CFA_def_cfa_register i6 */ -#endif }, .fde_win_save = 0x2d, /* DW_CFA_GNU_window_save */ .fde_ret_save = { 9, 15, 31 }, /* DW_CFA_register o7, i7 */ diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc/tcg-target.h index c050763049..8655acdbe5 100644 --- a/tcg/sparc/tcg-target.h +++ b/tcg/sparc/tcg-target.h @@ -25,8 +25,6 @@ #ifndef SPARC_TCG_TARGET_H #define SPARC_TCG_TARGET_H -#define TCG_TARGET_REG_BITS 64 - #define TCG_TARGET_INSN_UNIT_SIZE 4 #define TCG_TARGET_TLB_DISPLACEMENT_BITS 32 #define TCG_TARGET_NB_REGS 32 @@ -70,19 +68,10 @@ typedef enum { /* used for function call generation */ #define TCG_REG_CALL_STACK TCG_REG_O6 -#ifdef __arch64__ #define TCG_TARGET_STACK_BIAS 2047 #define TCG_TARGET_STACK_ALIGN 16 #define TCG_TARGET_CALL_STACK_OFFSET (128 + 6*8 + TCG_TARGET_STACK_BIAS) -#else -#define TCG_TARGET_STACK_BIAS 0 -#define TCG_TARGET_STACK_ALIGN 8 -#define TCG_TARGET_CALL_STACK_OFFSET (64 + 4 + 6*4) -#endif - -#ifdef __arch64__ #define TCG_TARGET_EXTEND_ARGS 1 -#endif #if defined(__VIS__) && __VIS__ >= 0x300 #define use_vis3_instructions 1 diff --git a/tcg/tcg.c b/tcg/tcg.c index 612a12f58f..c9e664ee31 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -1487,39 +1487,7 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) } #endif -#if defined(__sparc__) && !defined(__arch64__) \ - && !defined(CONFIG_TCG_INTERPRETER) - /* We have 64-bit values in one register, but need to pass as two - separate parameters. Split them. */ - int orig_typemask = typemask; - int orig_nargs = nargs; - TCGv_i64 retl, reth; - TCGTemp *split_args[MAX_OPC_PARAM]; - - retl = NULL; - reth = NULL; - typemask = 0; - for (i = real_args = 0; i < nargs; ++i) { - int argtype = extract32(orig_typemask, (i + 1) * 3, 3); - bool is_64bit = (argtype & ~1) == dh_typecode_i64; - - if (is_64bit) { - TCGv_i64 orig = temp_tcgv_i64(args[i]); - TCGv_i32 h = tcg_temp_new_i32(); - TCGv_i32 l = tcg_temp_new_i32(); - tcg_gen_extr_i64_i32(l, h, orig); - split_args[real_args++] = tcgv_i32_temp(h); - typemask |= dh_typecode_i32 << (real_args * 3); - split_args[real_args++] = tcgv_i32_temp(l); - typemask |= dh_typecode_i32 << (real_args * 3); - } else { - split_args[real_args++] = args[i]; - typemask |= argtype << (real_args * 3); - } - } - nargs = real_args; - args = split_args; -#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 +#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 for (i = 0; i < nargs; ++i) { int argtype = extract32(typemask, (i + 1) * 3, 3); bool is_32bit = (argtype & ~1) == dh_typecode_i32; @@ -1542,22 +1510,6 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) pi = 0; if (ret != NULL) { -#if defined(__sparc__) && !defined(__arch64__) \ - && !defined(CONFIG_TCG_INTERPRETER) - if ((typemask & 6) == dh_typecode_i64) { - /* The 32-bit ABI is going to return the 64-bit value in - the %o0/%o1 register pair. Prepare for this by using - two return temporaries, and reassemble below. */ - retl = tcg_temp_new_i64(); - reth = tcg_temp_new_i64(); - op->args[pi++] = tcgv_i64_arg(reth); - op->args[pi++] = tcgv_i64_arg(retl); - nb_rets = 2; - } else { - op->args[pi++] = temp_arg(ret); - nb_rets = 1; - } -#else if (TCG_TARGET_REG_BITS < 64 && (typemask & 6) == dh_typecode_i64) { #if HOST_BIG_ENDIAN op->args[pi++] = temp_arg(ret + 1); @@ -1571,7 +1523,6 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) op->args[pi++] = temp_arg(ret); nb_rets = 1; } -#endif } else { nb_rets = 0; } @@ -1634,29 +1585,7 @@ void tcg_gen_callN(void *func, TCGTemp *ret, int nargs, TCGTemp **args) tcg_debug_assert(TCGOP_CALLI(op) == real_args); tcg_debug_assert(pi <= ARRAY_SIZE(op->args)); -#if defined(__sparc__) && !defined(__arch64__) \ - && !defined(CONFIG_TCG_INTERPRETER) - /* Free all of the parts we allocated above. */ - for (i = real_args = 0; i < orig_nargs; ++i) { - int argtype = extract32(orig_typemask, (i + 1) * 3, 3); - bool is_64bit = (argtype & ~1) == dh_typecode_i64; - - if (is_64bit) { - tcg_temp_free_internal(args[real_args++]); - tcg_temp_free_internal(args[real_args++]); - } else { - real_args++; - } - } - if ((orig_typemask & 6) == dh_typecode_i64) { - /* The 32-bit ABI returned two 32-bit pieces. Re-assemble them. - Note that describing these as TCGv_i64 eliminates an unnecessary - zero-extension that tcg_gen_concat_i32_i64 would create. */ - tcg_gen_concat32_i64(temp_tcgv_i64(ret), retl, reth); - tcg_temp_free_i64(retl); - tcg_temp_free_i64(reth); - } -#elif defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 +#if defined(TCG_TARGET_EXTEND_ARGS) && TCG_TARGET_REG_BITS == 64 for (i = 0; i < nargs; ++i) { int argtype = extract32(typemask, (i + 1) * 3, 3); bool is_32bit = (argtype & ~1) == dh_typecode_i32; From 6d0b52ed889f47fa8e39e9611d7bce15cc533369 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 17 Oct 2022 08:00:57 +0300 Subject: [PATCH 02/13] tcg/sparc64: Rename from tcg/sparc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Emphasize that we only support full 64-bit code generation. Reviewed-by: Peter Maydell Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- MAINTAINERS | 2 +- meson.build | 4 +--- tcg/{sparc => sparc64}/tcg-target-con-set.h | 0 tcg/{sparc => sparc64}/tcg-target-con-str.h | 0 tcg/{sparc => sparc64}/tcg-target.c.inc | 0 tcg/{sparc => sparc64}/tcg-target.h | 0 6 files changed, 2 insertions(+), 4 deletions(-) rename tcg/{sparc => sparc64}/tcg-target-con-set.h (100%) rename tcg/{sparc => sparc64}/tcg-target-con-str.h (100%) rename tcg/{sparc => sparc64}/tcg-target.c.inc (100%) rename tcg/{sparc => sparc64}/tcg-target.h (100%) diff --git a/MAINTAINERS b/MAINTAINERS index c41d8d65e2..62bbbba214 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3370,7 +3370,7 @@ L: qemu-s390x@nongnu.org SPARC TCG target S: Odd Fixes -F: tcg/sparc/ +F: tcg/sparc64/ F: disas/sparc.c TCI TCG target diff --git a/meson.build b/meson.build index 1c1afcc9b8..d809d51791 100644 --- a/meson.build +++ b/meson.build @@ -49,7 +49,7 @@ qapi_trace_events = [] bsd_oses = ['gnu/kfreebsd', 'freebsd', 'netbsd', 'openbsd', 'dragonfly', 'darwin'] supported_oses = ['windows', 'freebsd', 'netbsd', 'openbsd', 'darwin', 'sunos', 'linux'] supported_cpus = ['ppc', 'ppc64', 's390x', 'riscv', 'x86', 'x86_64', - 'arm', 'aarch64', 'loongarch64', 'mips', 'mips64', 'sparc', 'sparc64'] + 'arm', 'aarch64', 'loongarch64', 'mips', 'mips64', 'sparc64'] cpu = host_machine.cpu_family() @@ -469,8 +469,6 @@ if get_option('tcg').allowed() endif if get_option('tcg_interpreter') tcg_arch = 'tci' - elif host_arch == 'sparc64' - tcg_arch = 'sparc' elif host_arch == 'x86_64' tcg_arch = 'i386' elif host_arch == 'ppc64' diff --git a/tcg/sparc/tcg-target-con-set.h b/tcg/sparc64/tcg-target-con-set.h similarity index 100% rename from tcg/sparc/tcg-target-con-set.h rename to tcg/sparc64/tcg-target-con-set.h diff --git a/tcg/sparc/tcg-target-con-str.h b/tcg/sparc64/tcg-target-con-str.h similarity index 100% rename from tcg/sparc/tcg-target-con-str.h rename to tcg/sparc64/tcg-target-con-str.h diff --git a/tcg/sparc/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc similarity index 100% rename from tcg/sparc/tcg-target.c.inc rename to tcg/sparc64/tcg-target.c.inc diff --git a/tcg/sparc/tcg-target.h b/tcg/sparc64/tcg-target.h similarity index 100% rename from tcg/sparc/tcg-target.h rename to tcg/sparc64/tcg-target.h From a59a293126604183dd63bf8b890393e32e7702c4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 17 Oct 2022 08:17:45 +0300 Subject: [PATCH 03/13] tcg/sparc64: Remove sparc32plus constraints With sparc64 we need not distinguish between registers that can hold 32-bit values and those that can hold 64-bit values. Reviewed-by: Peter Maydell Signed-off-by: Richard Henderson --- tcg/sparc64/tcg-target-con-set.h | 16 +---- tcg/sparc64/tcg-target-con-str.h | 3 - tcg/sparc64/tcg-target.c.inc | 109 ++++++++++++------------------- 3 files changed, 44 insertions(+), 84 deletions(-) diff --git a/tcg/sparc64/tcg-target-con-set.h b/tcg/sparc64/tcg-target-con-set.h index 3b751dc3fb..31e6fea1fc 100644 --- a/tcg/sparc64/tcg-target-con-set.h +++ b/tcg/sparc64/tcg-target-con-set.h @@ -11,22 +11,12 @@ */ C_O0_I1(r) C_O0_I2(rZ, r) -C_O0_I2(RZ, r) C_O0_I2(rZ, rJ) -C_O0_I2(RZ, RJ) -C_O0_I2(sZ, A) -C_O0_I2(SZ, A) -C_O1_I1(r, A) -C_O1_I1(R, A) +C_O0_I2(sZ, s) +C_O1_I1(r, s) C_O1_I1(r, r) -C_O1_I1(r, R) -C_O1_I1(R, r) -C_O1_I1(R, R) -C_O1_I2(R, R, R) +C_O1_I2(r, r, r) C_O1_I2(r, rZ, rJ) -C_O1_I2(R, RZ, RJ) C_O1_I4(r, rZ, rJ, rI, 0) -C_O1_I4(R, RZ, RJ, RI, 0) C_O2_I2(r, r, rZ, rJ) -C_O2_I4(R, R, RZ, RZ, RJ, RI) C_O2_I4(r, r, rZ, rZ, rJ, rJ) diff --git a/tcg/sparc64/tcg-target-con-str.h b/tcg/sparc64/tcg-target-con-str.h index fdb25d9313..8f5c7aef97 100644 --- a/tcg/sparc64/tcg-target-con-str.h +++ b/tcg/sparc64/tcg-target-con-str.h @@ -9,10 +9,7 @@ * REGS(letter, register_mask) */ REGS('r', ALL_GENERAL_REGS) -REGS('R', ALL_GENERAL_REGS64) REGS('s', ALL_QLDST_REGS) -REGS('S', ALL_QLDST_REGS64) -REGS('A', TARGET_LONG_BITS == 64 ? ALL_QLDST_REGS64 : ALL_QLDST_REGS) /* * Define constraint letters for constants: diff --git a/tcg/sparc64/tcg-target.c.inc b/tcg/sparc64/tcg-target.c.inc index 097bcfcd12..cb9453efdd 100644 --- a/tcg/sparc64/tcg-target.c.inc +++ b/tcg/sparc64/tcg-target.c.inc @@ -80,19 +80,8 @@ static const char * const tcg_target_reg_names[TCG_TARGET_NB_REGS] = { #else #define SOFTMMU_RESERVE_REGS 0 #endif - -/* - * Note that sparcv8plus can only hold 64 bit quantities in %g and %o - * registers. These are saved manually by the kernel in full 64-bit - * slots. The %i and %l registers are saved by the register window - * mechanism, which only allocates space for 32 bits. Given that this - * window spill/fill can happen on any signal, we must consider the - * high bits of the %i and %l registers garbage at all times. - */ #define ALL_GENERAL_REGS MAKE_64BIT_MASK(0, 32) -# define ALL_GENERAL_REGS64 ALL_GENERAL_REGS #define ALL_QLDST_REGS (ALL_GENERAL_REGS & ~SOFTMMU_RESERVE_REGS) -#define ALL_QLDST_REGS64 (ALL_GENERAL_REGS64 & ~SOFTMMU_RESERVE_REGS) /* Define some temporary registers. T2 is used for constant generation. */ #define TCG_REG_T1 TCG_REG_G1 @@ -1738,107 +1727,91 @@ static TCGConstraintSetIndex tcg_target_op_def(TCGOpcode op) return C_O0_I1(r); case INDEX_op_ld8u_i32: + case INDEX_op_ld8u_i64: case INDEX_op_ld8s_i32: + case INDEX_op_ld8s_i64: case INDEX_op_ld16u_i32: + case INDEX_op_ld16u_i64: case INDEX_op_ld16s_i32: + case INDEX_op_ld16s_i64: case INDEX_op_ld_i32: + case INDEX_op_ld32u_i64: + case INDEX_op_ld32s_i64: + case INDEX_op_ld_i64: case INDEX_op_neg_i32: + case INDEX_op_neg_i64: case INDEX_op_not_i32: + case INDEX_op_not_i64: + case INDEX_op_ext32s_i64: + case INDEX_op_ext32u_i64: + case INDEX_op_ext_i32_i64: + case INDEX_op_extu_i32_i64: + case INDEX_op_extrl_i64_i32: + case INDEX_op_extrh_i64_i32: return C_O1_I1(r, r); case INDEX_op_st8_i32: + case INDEX_op_st8_i64: case INDEX_op_st16_i32: + case INDEX_op_st16_i64: case INDEX_op_st_i32: + case INDEX_op_st32_i64: + case INDEX_op_st_i64: return C_O0_I2(rZ, r); case INDEX_op_add_i32: + case INDEX_op_add_i64: case INDEX_op_mul_i32: + case INDEX_op_mul_i64: case INDEX_op_div_i32: + case INDEX_op_div_i64: case INDEX_op_divu_i32: + case INDEX_op_divu_i64: case INDEX_op_sub_i32: + case INDEX_op_sub_i64: case INDEX_op_and_i32: + case INDEX_op_and_i64: case INDEX_op_andc_i32: + case INDEX_op_andc_i64: case INDEX_op_or_i32: + case INDEX_op_or_i64: case INDEX_op_orc_i32: + case INDEX_op_orc_i64: case INDEX_op_xor_i32: + case INDEX_op_xor_i64: case INDEX_op_shl_i32: + case INDEX_op_shl_i64: case INDEX_op_shr_i32: + case INDEX_op_shr_i64: case INDEX_op_sar_i32: + case INDEX_op_sar_i64: case INDEX_op_setcond_i32: + case INDEX_op_setcond_i64: return C_O1_I2(r, rZ, rJ); case INDEX_op_brcond_i32: + case INDEX_op_brcond_i64: return C_O0_I2(rZ, rJ); case INDEX_op_movcond_i32: + case INDEX_op_movcond_i64: return C_O1_I4(r, rZ, rJ, rI, 0); case INDEX_op_add2_i32: + case INDEX_op_add2_i64: case INDEX_op_sub2_i32: + case INDEX_op_sub2_i64: return C_O2_I4(r, r, rZ, rZ, rJ, rJ); case INDEX_op_mulu2_i32: case INDEX_op_muls2_i32: return C_O2_I2(r, r, rZ, rJ); - - case INDEX_op_ld8u_i64: - case INDEX_op_ld8s_i64: - case INDEX_op_ld16u_i64: - case INDEX_op_ld16s_i64: - case INDEX_op_ld32u_i64: - case INDEX_op_ld32s_i64: - case INDEX_op_ld_i64: - case INDEX_op_ext_i32_i64: - case INDEX_op_extu_i32_i64: - return C_O1_I1(R, r); - - case INDEX_op_st8_i64: - case INDEX_op_st16_i64: - case INDEX_op_st32_i64: - case INDEX_op_st_i64: - return C_O0_I2(RZ, r); - - case INDEX_op_add_i64: - case INDEX_op_mul_i64: - case INDEX_op_div_i64: - case INDEX_op_divu_i64: - case INDEX_op_sub_i64: - case INDEX_op_and_i64: - case INDEX_op_andc_i64: - case INDEX_op_or_i64: - case INDEX_op_orc_i64: - case INDEX_op_xor_i64: - case INDEX_op_shl_i64: - case INDEX_op_shr_i64: - case INDEX_op_sar_i64: - case INDEX_op_setcond_i64: - return C_O1_I2(R, RZ, RJ); - - case INDEX_op_neg_i64: - case INDEX_op_not_i64: - case INDEX_op_ext32s_i64: - case INDEX_op_ext32u_i64: - return C_O1_I1(R, R); - - case INDEX_op_extrl_i64_i32: - case INDEX_op_extrh_i64_i32: - return C_O1_I1(r, R); - - case INDEX_op_brcond_i64: - return C_O0_I2(RZ, RJ); - case INDEX_op_movcond_i64: - return C_O1_I4(R, RZ, RJ, RI, 0); - case INDEX_op_add2_i64: - case INDEX_op_sub2_i64: - return C_O2_I4(R, R, RZ, RZ, RJ, RI); case INDEX_op_muluh_i64: - return C_O1_I2(R, R, R); + return C_O1_I2(r, r, r); case INDEX_op_qemu_ld_i32: - return C_O1_I1(r, A); case INDEX_op_qemu_ld_i64: - return C_O1_I1(R, A); + return C_O1_I1(r, s); case INDEX_op_qemu_st_i32: - return C_O0_I2(sZ, A); case INDEX_op_qemu_st_i64: - return C_O0_I2(SZ, A); + return C_O0_I2(sZ, s); default: g_assert_not_reached(); @@ -1859,7 +1832,7 @@ static void tcg_target_init(TCGContext *s) #endif tcg_target_available_regs[TCG_TYPE_I32] = ALL_GENERAL_REGS; - tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS64; + tcg_target_available_regs[TCG_TYPE_I64] = ALL_GENERAL_REGS; tcg_target_call_clobber_regs = 0; tcg_regset_set_reg(tcg_target_call_clobber_regs, TCG_REG_G1); From 9dd1d56e570e5119fef2b28fda811d6891e597a8 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Sat, 29 Oct 2022 06:23:44 +1100 Subject: [PATCH 04/13] tcg/tci: fix logic error when registering helpers via FFI MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When registering helpers via FFI for TCI, the inner loop that iterates parameters of the helper reuses (and thus pollutes) the same variable used by the outer loop that iterates all helpers, thus made some helpers unregistered. Fix this logic error by using a dedicated temporary variable for the inner loop. Fixes: 22f15579fa ("tcg: Build ffi data structures for helpers") Reviewed-by: Alex Bennée Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Icenowy Zheng Message-Id: <20221028072145.1593205-1-uwu@icenowy.me> [rth: Move declaration of j to the for loop itself] Signed-off-by: Richard Henderson --- tcg/tcg.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tcg/tcg.c b/tcg/tcg.c index c9e664ee31..b6c46b7e25 100644 --- a/tcg/tcg.c +++ b/tcg/tcg.c @@ -634,9 +634,9 @@ static void tcg_context_init(unsigned max_cpus) if (nargs != 0) { ca->cif.arg_types = ca->args; - for (i = 0; i < nargs; ++i) { - int typecode = extract32(typemask, (i + 1) * 3, 3); - ca->args[i] = typecode_to_ffi[typecode]; + for (int j = 0; j < nargs; ++j) { + int typecode = extract32(typemask, (j + 1) * 3, 3); + ca->args[j] = typecode_to_ffi[typecode]; } } From 6392bd6b90a488b3254b1cb85d79bf262ed5f9e0 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 24 Oct 2022 22:15:04 +1000 Subject: [PATCH 05/13] accel/tcg: Introduce cpu_unwind_state_data Add a way to examine the unwind data without actually restoring the data back into env. Reviewed-by: Claudio Fontana Signed-off-by: Richard Henderson --- accel/tcg/internal.h | 4 +-- accel/tcg/translate-all.c | 74 ++++++++++++++++++++++++++------------- include/exec/exec-all.h | 21 ++++++++--- 3 files changed, 68 insertions(+), 31 deletions(-) diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h index 1227bb69bd..9c06b320b7 100644 --- a/accel/tcg/internal.h +++ b/accel/tcg/internal.h @@ -106,8 +106,8 @@ void tb_reset_jump(TranslationBlock *tb, int n); TranslationBlock *tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, tb_page_addr_t phys_page2); bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc); -int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, - uintptr_t searched_pc, bool reset_icount); +void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, + uintptr_t host_pc, bool reset_icount); /* Return the current PC from CPU, which may be cached in TB. */ static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb) diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index f185356a36..319becb698 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -247,52 +247,66 @@ static int encode_search(TranslationBlock *tb, uint8_t *block) return p - block; } -/* The cpu state corresponding to 'searched_pc' is restored. - * When reset_icount is true, current TB will be interrupted and - * icount should be recalculated. - */ -int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, - uintptr_t searched_pc, bool reset_icount) +static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc, + uint64_t *data) { - uint64_t data[TARGET_INSN_START_WORDS]; - uintptr_t host_pc = (uintptr_t)tb->tc.ptr; + uintptr_t iter_pc = (uintptr_t)tb->tc.ptr; const uint8_t *p = tb->tc.ptr + tb->tc.size; int i, j, num_insns = tb->icount; -#ifdef CONFIG_PROFILER - TCGProfile *prof = &tcg_ctx->prof; - int64_t ti = profile_getclock(); -#endif - searched_pc -= GETPC_ADJ; + host_pc -= GETPC_ADJ; - if (searched_pc < host_pc) { + if (host_pc < iter_pc) { return -1; } - memset(data, 0, sizeof(data)); + memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS); if (!TARGET_TB_PCREL) { data[0] = tb_pc(tb); } - /* Reconstruct the stored insn data while looking for the point at - which the end of the insn exceeds the searched_pc. */ + /* + * Reconstruct the stored insn data while looking for the point + * at which the end of the insn exceeds host_pc. + */ for (i = 0; i < num_insns; ++i) { for (j = 0; j < TARGET_INSN_START_WORDS; ++j) { data[j] += decode_sleb128(&p); } - host_pc += decode_sleb128(&p); - if (host_pc > searched_pc) { - goto found; + iter_pc += decode_sleb128(&p); + if (iter_pc > host_pc) { + return num_insns - i; } } return -1; +} + +/* + * The cpu state corresponding to 'host_pc' is restored. + * When reset_icount is true, current TB will be interrupted and + * icount should be recalculated. + */ +void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, + uintptr_t host_pc, bool reset_icount) +{ + uint64_t data[TARGET_INSN_START_WORDS]; +#ifdef CONFIG_PROFILER + TCGProfile *prof = &tcg_ctx->prof; + int64_t ti = profile_getclock(); +#endif + int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data); + + if (insns_left < 0) { + return; + } - found: if (reset_icount && (tb_cflags(tb) & CF_USE_ICOUNT)) { assert(icount_enabled()); - /* Reset the cycle counter to the start of the block - and shift if to the number of actually executed instructions */ - cpu_neg(cpu)->icount_decr.u16.low += num_insns - i; + /* + * Reset the cycle counter to the start of the block and + * shift if to the number of actually executed instructions. + */ + cpu_neg(cpu)->icount_decr.u16.low += insns_left; } cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data); @@ -302,7 +316,6 @@ int cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, prof->restore_time + profile_getclock() - ti); qatomic_set(&prof->restore_count, prof->restore_count + 1); #endif - return 0; } bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit) @@ -335,6 +348,17 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit) return false; } +bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data) +{ + if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) { + TranslationBlock *tb = tcg_tb_lookup(host_pc); + if (tb) { + return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0; + } + } + return false; +} + void page_init(void) { page_size_init(); diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index e948992a80..7d851f5907 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -39,20 +39,33 @@ typedef ram_addr_t tb_page_addr_t; #define TB_PAGE_ADDR_FMT RAM_ADDR_FMT #endif +/** + * cpu_unwind_state_data: + * @cpu: the cpu context + * @host_pc: the host pc within the translation + * @data: output data + * + * Attempt to load the the unwind state for a host pc occurring in + * translated code. If @host_pc is not in translated code, the + * function returns false; otherwise @data is loaded. + * This is the same unwind info as given to restore_state_to_opc. + */ +bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data); + /** * cpu_restore_state: - * @cpu: the vCPU state is to be restore to - * @searched_pc: the host PC the fault occurred at + * @cpu: the cpu context + * @host_pc: the host pc within the translation * @will_exit: true if the TB executed will be interrupted after some cpu adjustments. Required for maintaining the correct icount valus * @return: true if state was restored, false otherwise * * Attempt to restore the state for a fault occurring in translated - * code. If the searched_pc is not in translated code no state is + * code. If @host_pc is not in translated code no state is * restored and the function returns false. */ -bool cpu_restore_state(CPUState *cpu, uintptr_t searched_pc, bool will_exit); +bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit); G_NORETURN void cpu_loop_exit_noexc(CPUState *cpu); G_NORETURN void cpu_loop_exit(CPUState *cpu); From f484f213c9f4ae1cd30ebdaadc7b539d745d39fb Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 24 Oct 2022 22:45:29 +1000 Subject: [PATCH 06/13] target/i386: Use cpu_unwind_state_data for tpr access Avoid cpu_restore_state, and modifying env->eip out from underneath the translator with TARGET_TB_PCREL. There is some slight duplication from x86_restore_state_to_opc, but it's just a few lines. Resolves: https://gitlab.com/qemu-project/qemu/-/issues/1269 Reviewed-by: Claudio Fontana Signed-off-by: Richard Henderson --- target/i386/helper.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) diff --git a/target/i386/helper.c b/target/i386/helper.c index b62a1e48e2..0ac2da066d 100644 --- a/target/i386/helper.c +++ b/target/i386/helper.c @@ -509,6 +509,27 @@ void cpu_x86_inject_mce(Monitor *mon, X86CPU *cpu, int bank, } } +static inline target_ulong get_memio_eip(CPUX86State *env) +{ +#ifdef CONFIG_TCG + uint64_t data[TARGET_INSN_START_WORDS]; + CPUState *cs = env_cpu(env); + + if (!cpu_unwind_state_data(cs, cs->mem_io_pc, data)) { + return env->eip; + } + + /* Per x86_restore_state_to_opc. */ + if (TARGET_TB_PCREL) { + return (env->eip & TARGET_PAGE_MASK) | data[0]; + } else { + return data[0] - env->segs[R_CS].base; + } +#else + qemu_build_not_reached(); +#endif +} + void cpu_report_tpr_access(CPUX86State *env, TPRAccess access) { X86CPU *cpu = env_archcpu(env); @@ -519,9 +540,9 @@ void cpu_report_tpr_access(CPUX86State *env, TPRAccess access) cpu_interrupt(cs, CPU_INTERRUPT_TPR); } else if (tcg_enabled()) { - cpu_restore_state(cs, cs->mem_io_pc, false); + target_ulong eip = get_memio_eip(env); - apic_handle_tpr_access_report(cpu->apic_state, env->eip, access); + apic_handle_tpr_access_report(cpu->apic_state, eip, access); } } #endif /* !CONFIG_USER_ONLY */ From 5813c5c74a755fd0c1b10be38c6fdf5c54c468e4 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 24 Oct 2022 22:54:15 +1000 Subject: [PATCH 07/13] target/openrisc: Always exit after mtspr npc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have called cpu_restore_state asserting will_exit. Do not go back on that promise. This affects icount. Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/openrisc/sys_helper.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/target/openrisc/sys_helper.c b/target/openrisc/sys_helper.c index 09b3c97d7c..a3508e421d 100644 --- a/target/openrisc/sys_helper.c +++ b/target/openrisc/sys_helper.c @@ -51,8 +51,8 @@ void HELPER(mtspr)(CPUOpenRISCState *env, target_ulong spr, target_ulong rb) if (env->pc != rb) { env->pc = rb; env->dflag = 0; - cpu_loop_exit(cs); } + cpu_loop_exit(cs); break; case TO_SPR(0, 17): /* SR */ From cc30dc441b44ad15f4adfb13d9a68cba6fa39a23 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 24 Oct 2022 22:55:26 +1000 Subject: [PATCH 08/13] target/openrisc: Use cpu_unwind_state_data for mfspr Since we do not plan to exit, use cpu_unwind_state_data and extract exactly the data requested. This is a bug fix, in that we no longer clobber dflag. Consider: l.j L2 // branch l.mfspr r1, ppc // delay L1: boom L2: l.lwa r3, (r4) Here, dflag would be set by cpu_restore_state (because that is the current state of the cpu), but but not cleared by tb_stop on exiting the TB (because DisasContext has recorded the current value as zero). The next TB begins at L2 with dflag incorrectly set. If the load has a tlb miss, then the exception will be delivered as per a delay slot: with DSX set in the status register and PC decremented (delay slots restart by re-executing the branch). This will cause the return from interrupt to go to L1, and boom! Signed-off-by: Richard Henderson --- target/openrisc/sys_helper.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/target/openrisc/sys_helper.c b/target/openrisc/sys_helper.c index a3508e421d..dde2fa1623 100644 --- a/target/openrisc/sys_helper.c +++ b/target/openrisc/sys_helper.c @@ -199,6 +199,7 @@ target_ulong HELPER(mfspr)(CPUOpenRISCState *env, target_ulong rd, target_ulong spr) { #ifndef CONFIG_USER_ONLY + uint64_t data[TARGET_INSN_START_WORDS]; MachineState *ms = MACHINE(qdev_get_machine()); OpenRISCCPU *cpu = env_archcpu(env); CPUState *cs = env_cpu(env); @@ -232,14 +233,20 @@ target_ulong HELPER(mfspr)(CPUOpenRISCState *env, target_ulong rd, return env->evbar; case TO_SPR(0, 16): /* NPC (equals PC) */ - cpu_restore_state(cs, GETPC(), false); + if (cpu_unwind_state_data(cs, GETPC(), data)) { + return data[0]; + } return env->pc; case TO_SPR(0, 17): /* SR */ return cpu_get_sr(env); case TO_SPR(0, 18): /* PPC */ - cpu_restore_state(cs, GETPC(), false); + if (cpu_unwind_state_data(cs, GETPC(), data)) { + if (data[1] & 2) { + return data[0] - 4; + } + } return env->ppc; case TO_SPR(0, 32): /* EPCR */ From 3d419a4dd227f174447e0b3978028a1cd52ccc5e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 24 Oct 2022 23:09:57 +1000 Subject: [PATCH 09/13] accel/tcg: Remove will_exit argument from cpu_restore_state The value passed is always true, and if the target's synchronize_from_tb hook is non-trivial, not exiting may be erroneous. Reviewed-by: Claudio Fontana Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec-common.c | 2 +- accel/tcg/translate-all.c | 12 ++---------- include/exec/exec-all.h | 5 +---- target/alpha/helper.c | 2 +- target/alpha/mem_helper.c | 2 +- target/arm/op_helper.c | 2 +- target/arm/tlb_helper.c | 8 ++++---- target/cris/helper.c | 2 +- target/i386/tcg/sysemu/svm_helper.c | 2 +- target/m68k/op_helper.c | 4 ++-- target/microblaze/helper.c | 2 +- target/nios2/op_helper.c | 2 +- target/openrisc/sys_helper.c | 4 ++-- target/ppc/excp_helper.c | 2 +- target/s390x/tcg/excp_helper.c | 2 +- target/tricore/op_helper.c | 2 +- target/xtensa/helper.c | 6 +++--- 17 files changed, 25 insertions(+), 36 deletions(-) diff --git a/accel/tcg/cpu-exec-common.c b/accel/tcg/cpu-exec-common.c index be6fe45aa5..c7bc8c6efa 100644 --- a/accel/tcg/cpu-exec-common.c +++ b/accel/tcg/cpu-exec-common.c @@ -71,7 +71,7 @@ void cpu_loop_exit(CPUState *cpu) void cpu_loop_exit_restore(CPUState *cpu, uintptr_t pc) { if (pc) { - cpu_restore_state(cpu, pc, true); + cpu_restore_state(cpu, pc); } cpu_loop_exit(cpu); } diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 319becb698..90997fed47 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -318,16 +318,8 @@ void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, #endif } -bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit) +bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc) { - /* - * The pc update associated with restore without exit will - * break the relative pc adjustments performed by TARGET_TB_PCREL. - */ - if (TARGET_TB_PCREL) { - assert(will_exit); - } - /* * The host_pc has to be in the rx region of the code buffer. * If it is not we will not be able to resolve it here. @@ -341,7 +333,7 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit) if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) { TranslationBlock *tb = tcg_tb_lookup(host_pc); if (tb) { - cpu_restore_state_from_tb(cpu, tb, host_pc, will_exit); + cpu_restore_state_from_tb(cpu, tb, host_pc, true); return true; } } diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index 7d851f5907..9b7bfbf09a 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -56,16 +56,13 @@ bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data); * cpu_restore_state: * @cpu: the cpu context * @host_pc: the host pc within the translation - * @will_exit: true if the TB executed will be interrupted after some - cpu adjustments. Required for maintaining the correct - icount valus * @return: true if state was restored, false otherwise * * Attempt to restore the state for a fault occurring in translated * code. If @host_pc is not in translated code no state is * restored and the function returns false. */ -bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc, bool will_exit); +bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc); G_NORETURN void cpu_loop_exit_noexc(CPUState *cpu); G_NORETURN void cpu_loop_exit(CPUState *cpu); diff --git a/target/alpha/helper.c b/target/alpha/helper.c index a5a389b5a3..970c869771 100644 --- a/target/alpha/helper.c +++ b/target/alpha/helper.c @@ -532,7 +532,7 @@ G_NORETURN void dynamic_excp(CPUAlphaState *env, uintptr_t retaddr, cs->exception_index = excp; env->error_code = error; if (retaddr) { - cpu_restore_state(cs, retaddr, true); + cpu_restore_state(cs, retaddr); /* Floating-point exceptions (our only users) point to the next PC. */ env->pc += 4; } diff --git a/target/alpha/mem_helper.c b/target/alpha/mem_helper.c index 47283a0612..a39b52c5dd 100644 --- a/target/alpha/mem_helper.c +++ b/target/alpha/mem_helper.c @@ -28,7 +28,7 @@ static void do_unaligned_access(CPUAlphaState *env, vaddr addr, uintptr_t retadd uint64_t pc; uint32_t insn; - cpu_restore_state(env_cpu(env), retaddr, true); + cpu_restore_state(env_cpu(env), retaddr); pc = env->pc; insn = cpu_ldl_code(env, pc); diff --git a/target/arm/op_helper.c b/target/arm/op_helper.c index c5bde1cfcc..70672bcd9f 100644 --- a/target/arm/op_helper.c +++ b/target/arm/op_helper.c @@ -78,7 +78,7 @@ void raise_exception_ra(CPUARMState *env, uint32_t excp, uint32_t syndrome, * we must restore CPU state here before setting the syndrome * the caller passed us, and cannot use cpu_loop_exit_restore(). */ - cpu_restore_state(cs, ra, true); + cpu_restore_state(cs, ra); raise_exception(env, excp, syndrome, target_el); } diff --git a/target/arm/tlb_helper.c b/target/arm/tlb_helper.c index 69b0dc69df..0f4f4fc809 100644 --- a/target/arm/tlb_helper.c +++ b/target/arm/tlb_helper.c @@ -156,7 +156,7 @@ void arm_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr, ARMMMUFaultInfo fi = {}; /* now we have a real cpu fault */ - cpu_restore_state(cs, retaddr, true); + cpu_restore_state(cs, retaddr); fi.type = ARMFault_Alignment; arm_deliver_fault(cpu, vaddr, access_type, mmu_idx, &fi); @@ -196,7 +196,7 @@ void arm_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, ARMMMUFaultInfo fi = {}; /* now we have a real cpu fault */ - cpu_restore_state(cs, retaddr, true); + cpu_restore_state(cs, retaddr); fi.ea = arm_extabort_type(response); fi.type = ARMFault_SyncExternal; @@ -252,7 +252,7 @@ bool arm_cpu_tlb_fill(CPUState *cs, vaddr address, int size, return false; } else { /* now we have a real cpu fault */ - cpu_restore_state(cs, retaddr, true); + cpu_restore_state(cs, retaddr); arm_deliver_fault(cpu, address, access_type, mmu_idx, fi); } } @@ -271,7 +271,7 @@ void arm_cpu_record_sigsegv(CPUState *cs, vaddr addr, * We report both ESR and FAR to signal handlers. * For now, it's easiest to deliver the fault normally. */ - cpu_restore_state(cs, ra, true); + cpu_restore_state(cs, ra); arm_deliver_fault(cpu, addr, access_type, MMU_USER_IDX, &fi); } diff --git a/target/cris/helper.c b/target/cris/helper.c index 91e4aeb178..81a72699b5 100644 --- a/target/cris/helper.c +++ b/target/cris/helper.c @@ -87,7 +87,7 @@ bool cris_cpu_tlb_fill(CPUState *cs, vaddr address, int size, cs->exception_index = EXCP_BUSFAULT; env->fault_vector = res.bf_vec; if (retaddr) { - if (cpu_restore_state(cs, retaddr, true)) { + if (cpu_restore_state(cs, retaddr)) { /* Evaluate flags after retranslation. */ helper_top_evaluate_flags(env); } diff --git a/target/i386/tcg/sysemu/svm_helper.c b/target/i386/tcg/sysemu/svm_helper.c index 8e88567399..2d27731b60 100644 --- a/target/i386/tcg/sysemu/svm_helper.c +++ b/target/i386/tcg/sysemu/svm_helper.c @@ -704,7 +704,7 @@ void cpu_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1, { CPUState *cs = env_cpu(env); - cpu_restore_state(cs, retaddr, true); + cpu_restore_state(cs, retaddr); qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmexit(%08x, %016" PRIx64 ", %016" PRIx64 ", " TARGET_FMT_lx ")!\n", diff --git a/target/m68k/op_helper.c b/target/m68k/op_helper.c index 5da176d642..1ce850bbc5 100644 --- a/target/m68k/op_helper.c +++ b/target/m68k/op_helper.c @@ -460,7 +460,7 @@ void m68k_cpu_transaction_failed(CPUState *cs, hwaddr physaddr, vaddr addr, M68kCPU *cpu = M68K_CPU(cs); CPUM68KState *env = &cpu->env; - cpu_restore_state(cs, retaddr, true); + cpu_restore_state(cs, retaddr); if (m68k_feature(env, M68K_FEATURE_M68040)) { env->mmu.mmusr = 0; @@ -558,7 +558,7 @@ raise_exception_format2(CPUM68KState *env, int tt, int ilen, uintptr_t raddr) cs->exception_index = tt; /* Recover PC and CC_OP for the beginning of the insn. */ - cpu_restore_state(cs, raddr, true); + cpu_restore_state(cs, raddr); /* Flags are current in env->cc_*, or are undefined. */ env->cc_op = CC_OP_FLAGS; diff --git a/target/microblaze/helper.c b/target/microblaze/helper.c index a607fe68e5..98bdb82de8 100644 --- a/target/microblaze/helper.c +++ b/target/microblaze/helper.c @@ -277,7 +277,7 @@ void mb_cpu_do_unaligned_access(CPUState *cs, vaddr addr, uint32_t esr, iflags; /* Recover the pc and iflags from the corresponding insn_start. */ - cpu_restore_state(cs, retaddr, true); + cpu_restore_state(cs, retaddr); iflags = cpu->env.iflags; qemu_log_mask(CPU_LOG_INT, diff --git a/target/nios2/op_helper.c b/target/nios2/op_helper.c index 2e30d0a908..0aaf33ffc2 100644 --- a/target/nios2/op_helper.c +++ b/target/nios2/op_helper.c @@ -40,7 +40,7 @@ void nios2_cpu_loop_exit_advance(CPUNios2State *env, uintptr_t retaddr) * Do this here, rather than in restore_state_to_opc(), * lest we affect QEMU internal exceptions, like EXCP_DEBUG. */ - cpu_restore_state(cs, retaddr, true); + cpu_restore_state(cs, retaddr); env->pc += 4; cpu_loop_exit(cs); } diff --git a/target/openrisc/sys_helper.c b/target/openrisc/sys_helper.c index dde2fa1623..ec145960e3 100644 --- a/target/openrisc/sys_helper.c +++ b/target/openrisc/sys_helper.c @@ -45,7 +45,7 @@ void HELPER(mtspr)(CPUOpenRISCState *env, target_ulong spr, target_ulong rb) break; case TO_SPR(0, 16): /* NPC */ - cpu_restore_state(cs, GETPC(), true); + cpu_restore_state(cs, GETPC()); /* ??? Mirror or1ksim in not trashing delayed branch state when "jumping" to the current instruction. */ if (env->pc != rb) { @@ -131,7 +131,7 @@ void HELPER(mtspr)(CPUOpenRISCState *env, target_ulong spr, target_ulong rb) case TO_SPR(8, 0): /* PMR */ env->pmr = rb; if (env->pmr & PMR_DME || env->pmr & PMR_SME) { - cpu_restore_state(cs, GETPC(), true); + cpu_restore_state(cs, GETPC()); env->pc += 4; cs->halted = 1; raise_exception(cpu, EXCP_HALTED); diff --git a/target/ppc/excp_helper.c b/target/ppc/excp_helper.c index 09a81561d4..a05a2ed595 100644 --- a/target/ppc/excp_helper.c +++ b/target/ppc/excp_helper.c @@ -3075,7 +3075,7 @@ void ppc_cpu_do_unaligned_access(CPUState *cs, vaddr vaddr, uint32_t insn; /* Restore state and reload the insn we executed, for filling in DSISR. */ - cpu_restore_state(cs, retaddr, true); + cpu_restore_state(cs, retaddr); insn = cpu_ldl_code(env, env->nip); switch (env->mmu_model) { diff --git a/target/s390x/tcg/excp_helper.c b/target/s390x/tcg/excp_helper.c index 29ccf70df1..2cd6d062b9 100644 --- a/target/s390x/tcg/excp_helper.c +++ b/target/s390x/tcg/excp_helper.c @@ -39,7 +39,7 @@ G_NORETURN void tcg_s390_program_interrupt(CPUS390XState *env, { CPUState *cs = env_cpu(env); - cpu_restore_state(cs, ra, true); + cpu_restore_state(cs, ra); qemu_log_mask(CPU_LOG_INT, "program interrupt at %#" PRIx64 "\n", env->psw.addr); trigger_pgm_exception(env, code); diff --git a/target/tricore/op_helper.c b/target/tricore/op_helper.c index a79c838a92..532ae6b74c 100644 --- a/target/tricore/op_helper.c +++ b/target/tricore/op_helper.c @@ -31,7 +31,7 @@ void raise_exception_sync_internal(CPUTriCoreState *env, uint32_t class, int tin { CPUState *cs = env_cpu(env); /* in case we come from a helper-call we need to restore the PC */ - cpu_restore_state(cs, pc, true); + cpu_restore_state(cs, pc); /* Tin is loaded into d[15] */ env->gpr_d[15] = tin; diff --git a/target/xtensa/helper.c b/target/xtensa/helper.c index e0a9caab4b..2aa9777a8e 100644 --- a/target/xtensa/helper.c +++ b/target/xtensa/helper.c @@ -253,7 +253,7 @@ void xtensa_cpu_do_unaligned_access(CPUState *cs, assert(xtensa_option_enabled(env->config, XTENSA_OPTION_UNALIGNED_EXCEPTION)); - cpu_restore_state(CPU(cpu), retaddr, true); + cpu_restore_state(CPU(cpu), retaddr); HELPER(exception_cause_vaddr)(env, env->pc, LOAD_STORE_ALIGNMENT_CAUSE, addr); @@ -284,7 +284,7 @@ bool xtensa_cpu_tlb_fill(CPUState *cs, vaddr address, int size, } else if (probe) { return false; } else { - cpu_restore_state(cs, retaddr, true); + cpu_restore_state(cs, retaddr); HELPER(exception_cause_vaddr)(env, env->pc, ret, address); } } @@ -297,7 +297,7 @@ void xtensa_cpu_do_transaction_failed(CPUState *cs, hwaddr physaddr, vaddr addr, XtensaCPU *cpu = XTENSA_CPU(cs); CPUXtensaState *env = &cpu->env; - cpu_restore_state(cs, retaddr, true); + cpu_restore_state(cs, retaddr); HELPER(exception_cause_vaddr)(env, env->pc, access_type == MMU_INST_FETCH ? INSTR_PIF_ADDR_ERROR_CAUSE : From cfa29dd50611a0ecea9888818692290148773c0d Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 24 Oct 2022 23:12:56 +1000 Subject: [PATCH 10/13] accel/tcg: Remove reset_icount argument from cpu_restore_state_from_tb The value passed is always true. Reviewed-by: Claudio Fontana Signed-off-by: Richard Henderson --- accel/tcg/internal.h | 2 +- accel/tcg/tb-maint.c | 4 ++-- accel/tcg/translate-all.c | 15 +++++++-------- 3 files changed, 10 insertions(+), 11 deletions(-) diff --git a/accel/tcg/internal.h b/accel/tcg/internal.h index 9c06b320b7..cb13bade4f 100644 --- a/accel/tcg/internal.h +++ b/accel/tcg/internal.h @@ -107,7 +107,7 @@ TranslationBlock *tb_link_page(TranslationBlock *tb, tb_page_addr_t phys_pc, tb_page_addr_t phys_page2); bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc); void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, - uintptr_t host_pc, bool reset_icount); + uintptr_t host_pc); /* Return the current PC from CPU, which may be cached in TB. */ static inline target_ulong log_pc(CPUState *cpu, const TranslationBlock *tb) diff --git a/accel/tcg/tb-maint.c b/accel/tcg/tb-maint.c index c8e921089d..0cdb35548c 100644 --- a/accel/tcg/tb-maint.c +++ b/accel/tcg/tb-maint.c @@ -536,7 +536,7 @@ tb_invalidate_phys_page_range__locked(struct page_collection *pages, * restore the CPU state. */ current_tb_modified = true; - cpu_restore_state_from_tb(cpu, current_tb, retaddr, true); + cpu_restore_state_from_tb(cpu, current_tb, retaddr); } #endif /* TARGET_HAS_PRECISE_SMC */ tb_phys_invalidate__locked(tb); @@ -685,7 +685,7 @@ bool tb_invalidate_phys_page_unwind(tb_page_addr_t addr, uintptr_t pc) * function to partially restore the CPU state. */ current_tb_modified = true; - cpu_restore_state_from_tb(cpu, current_tb, pc, true); + cpu_restore_state_from_tb(cpu, current_tb, pc); } #endif /* TARGET_HAS_PRECISE_SMC */ tb_phys_invalidate(tb, addr); diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 90997fed47..0089578f8f 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -282,12 +282,11 @@ static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc, } /* - * The cpu state corresponding to 'host_pc' is restored. - * When reset_icount is true, current TB will be interrupted and - * icount should be recalculated. + * The cpu state corresponding to 'host_pc' is restored in + * preparation for exiting the TB. */ void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, - uintptr_t host_pc, bool reset_icount) + uintptr_t host_pc) { uint64_t data[TARGET_INSN_START_WORDS]; #ifdef CONFIG_PROFILER @@ -300,7 +299,7 @@ void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb, return; } - if (reset_icount && (tb_cflags(tb) & CF_USE_ICOUNT)) { + if (tb_cflags(tb) & CF_USE_ICOUNT) { assert(icount_enabled()); /* * Reset the cycle counter to the start of the block and @@ -333,7 +332,7 @@ bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc) if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) { TranslationBlock *tb = tcg_tb_lookup(host_pc); if (tb) { - cpu_restore_state_from_tb(cpu, tb, host_pc, true); + cpu_restore_state_from_tb(cpu, tb, host_pc); return true; } } @@ -1032,7 +1031,7 @@ void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr) tb = tcg_tb_lookup(retaddr); if (tb) { /* We can use retranslation to find the PC. */ - cpu_restore_state_from_tb(cpu, tb, retaddr, true); + cpu_restore_state_from_tb(cpu, tb, retaddr); tb_phys_invalidate(tb, -1); } else { /* The exception probably happened in a helper. The CPU state should @@ -1068,7 +1067,7 @@ void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr) cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p", (void *)retaddr); } - cpu_restore_state_from_tb(cpu, tb, retaddr, true); + cpu_restore_state_from_tb(cpu, tb, retaddr); /* * Some guests must re-execute the branch when re-executing a delay From 631793308679cf0436cd7145a9ff318331c982c9 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 24 Oct 2022 16:16:30 +1000 Subject: [PATCH 11/13] target/i386: Expand eflags updates inline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The helpers for reset_rf, cli, sti, clac, stac are completely trivial; implement them inline. Drop some nearby #if 0 code. Reviewed-by: Paolo Bonzini Reviewed-by: Philippe Mathieu-Daudé Signed-off-by: Richard Henderson --- target/i386/helper.h | 5 ----- target/i386/tcg/cc_helper.c | 41 ------------------------------------- target/i386/tcg/translate.c | 30 ++++++++++++++++++++++----- 3 files changed, 25 insertions(+), 51 deletions(-) diff --git a/target/i386/helper.h b/target/i386/helper.h index 88143b2a24..b7de5429ef 100644 --- a/target/i386/helper.h +++ b/target/i386/helper.h @@ -56,13 +56,8 @@ DEF_HELPER_2(syscall, void, env, int) DEF_HELPER_2(sysret, void, env, int) #endif DEF_HELPER_FLAGS_2(pause, TCG_CALL_NO_WG, noreturn, env, int) -DEF_HELPER_1(reset_rf, void, env) DEF_HELPER_FLAGS_3(raise_interrupt, TCG_CALL_NO_WG, noreturn, env, int, int) DEF_HELPER_FLAGS_2(raise_exception, TCG_CALL_NO_WG, noreturn, env, int) -DEF_HELPER_1(cli, void, env) -DEF_HELPER_1(sti, void, env) -DEF_HELPER_1(clac, void, env) -DEF_HELPER_1(stac, void, env) DEF_HELPER_3(boundw, void, env, tl, int) DEF_HELPER_3(boundl, void, env, tl, int) diff --git a/target/i386/tcg/cc_helper.c b/target/i386/tcg/cc_helper.c index cc7ea9e8b9..6227dbb30b 100644 --- a/target/i386/tcg/cc_helper.c +++ b/target/i386/tcg/cc_helper.c @@ -346,44 +346,3 @@ void helper_clts(CPUX86State *env) env->cr[0] &= ~CR0_TS_MASK; env->hflags &= ~HF_TS_MASK; } - -void helper_reset_rf(CPUX86State *env) -{ - env->eflags &= ~RF_MASK; -} - -void helper_cli(CPUX86State *env) -{ - env->eflags &= ~IF_MASK; -} - -void helper_sti(CPUX86State *env) -{ - env->eflags |= IF_MASK; -} - -void helper_clac(CPUX86State *env) -{ - env->eflags &= ~AC_MASK; -} - -void helper_stac(CPUX86State *env) -{ - env->eflags |= AC_MASK; -} - -#if 0 -/* vm86plus instructions */ -void helper_cli_vm(CPUX86State *env) -{ - env->eflags &= ~VIF_MASK; -} - -void helper_sti_vm(CPUX86State *env) -{ - env->eflags |= VIF_MASK; - if (env->eflags & VIP_MASK) { - raise_exception_ra(env, EXCP0D_GPF, GETPC()); - } -} -#endif diff --git a/target/i386/tcg/translate.c b/target/i386/tcg/translate.c index 546c427c23..0ee548ce56 100644 --- a/target/i386/tcg/translate.c +++ b/target/i386/tcg/translate.c @@ -2746,6 +2746,26 @@ static void gen_reset_hflag(DisasContext *s, uint32_t mask) } } +static void gen_set_eflags(DisasContext *s, target_ulong mask) +{ + TCGv t = tcg_temp_new(); + + tcg_gen_ld_tl(t, cpu_env, offsetof(CPUX86State, eflags)); + tcg_gen_ori_tl(t, t, mask); + tcg_gen_st_tl(t, cpu_env, offsetof(CPUX86State, eflags)); + tcg_temp_free(t); +} + +static void gen_reset_eflags(DisasContext *s, target_ulong mask) +{ + TCGv t = tcg_temp_new(); + + tcg_gen_ld_tl(t, cpu_env, offsetof(CPUX86State, eflags)); + tcg_gen_andi_tl(t, t, ~mask); + tcg_gen_st_tl(t, cpu_env, offsetof(CPUX86State, eflags)); + tcg_temp_free(t); +} + /* Clear BND registers during legacy branches. */ static void gen_bnd_jmp(DisasContext *s) { @@ -2776,7 +2796,7 @@ do_gen_eob_worker(DisasContext *s, bool inhibit, bool recheck_tf, bool jr) } if (s->base.tb->flags & HF_RF_MASK) { - gen_helper_reset_rf(cpu_env); + gen_reset_eflags(s, RF_MASK); } if (recheck_tf) { gen_helper_rechecking_single_step(cpu_env); @@ -5502,12 +5522,12 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) #endif case 0xfa: /* cli */ if (check_iopl(s)) { - gen_helper_cli(cpu_env); + gen_reset_eflags(s, IF_MASK); } break; case 0xfb: /* sti */ if (check_iopl(s)) { - gen_helper_sti(cpu_env); + gen_set_eflags(s, IF_MASK); /* interruptions are enabled only the first insn after sti */ gen_update_eip_next(s); gen_eob_inhibit_irq(s, true); @@ -5789,7 +5809,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) || CPL(s) != 0) { goto illegal_op; } - gen_helper_clac(cpu_env); + gen_reset_eflags(s, AC_MASK); s->base.is_jmp = DISAS_EOB_NEXT; break; @@ -5798,7 +5818,7 @@ static bool disas_insn(DisasContext *s, CPUState *cpu) || CPL(s) != 0) { goto illegal_op; } - gen_helper_stac(cpu_env); + gen_set_eflags(s, AC_MASK); s->base.is_jmp = DISAS_EOB_NEXT; break; From 4e4fa6c12d97ee3ee87623c153009a5abd7b428e Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 31 Oct 2022 13:26:36 +1100 Subject: [PATCH 12/13] accel/tcg: Complete cpu initialization before registration Delay cpu_list_add until realize is complete, so that cross-cpu interaction does not happen with incomplete cpu state. For this, we must delay plugin initialization out of tcg_exec_realizefn, because no cpu_index has been assigned. Fixes a problem with cross-cpu jump cache flushing, when the jump cache has not yet been allocated. Fixes: a976a99a2975 ("include/hw/core: Create struct CPUJumpCache") Acked-by: Ilya Leoshkevich Reported-by: Ilya Leoshkevich Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec.c | 8 +++++--- accel/tcg/translate-all.c | 16 +++++++--------- cpu.c | 10 +++++++++- 3 files changed, 21 insertions(+), 13 deletions(-) diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 82b06c1824..356fe348de 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -1052,23 +1052,25 @@ void tcg_exec_realizefn(CPUState *cpu, Error **errp) cc->tcg_ops->initialize(); tcg_target_initialized = true; } - tlb_init(cpu); - qemu_plugin_vcpu_init_hook(cpu); + cpu->tb_jmp_cache = g_new0(CPUJumpCache, 1); + tlb_init(cpu); #ifndef CONFIG_USER_ONLY tcg_iommu_init_notifier_list(cpu); #endif /* !CONFIG_USER_ONLY */ + /* qemu_plugin_vcpu_init_hook delayed until cpu_index assigned. */ } /* undo the initializations in reverse order */ void tcg_exec_unrealizefn(CPUState *cpu) { + qemu_plugin_vcpu_exit_hook(cpu); #ifndef CONFIG_USER_ONLY tcg_iommu_free_notifier_list(cpu); #endif /* !CONFIG_USER_ONLY */ - qemu_plugin_vcpu_exit_hook(cpu); tlb_destroy(cpu); + g_free(cpu->tb_jmp_cache); } #ifndef CONFIG_USER_ONLY diff --git a/accel/tcg/translate-all.c b/accel/tcg/translate-all.c index 0089578f8f..921944a5ab 100644 --- a/accel/tcg/translate-all.c +++ b/accel/tcg/translate-all.c @@ -1580,15 +1580,13 @@ void tcg_flush_jmp_cache(CPUState *cpu) { CPUJumpCache *jc = cpu->tb_jmp_cache; - if (likely(jc)) { - for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) { - qatomic_set(&jc->array[i].tb, NULL); - } - } else { - /* This should happen once during realize, and thus never race. */ - jc = g_new0(CPUJumpCache, 1); - jc = qatomic_xchg(&cpu->tb_jmp_cache, jc); - assert(jc == NULL); + /* During early initialization, the cache may not yet be allocated. */ + if (unlikely(jc == NULL)) { + return; + } + + for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) { + qatomic_set(&jc->array[i].tb, NULL); } } diff --git a/cpu.c b/cpu.c index 2a09b05205..4a7d865427 100644 --- a/cpu.c +++ b/cpu.c @@ -134,15 +134,23 @@ void cpu_exec_realizefn(CPUState *cpu, Error **errp) /* cache the cpu class for the hotpath */ cpu->cc = CPU_GET_CLASS(cpu); - cpu_list_add(cpu); if (!accel_cpu_realizefn(cpu, errp)) { return; } + /* NB: errp parameter is unused currently */ if (tcg_enabled()) { tcg_exec_realizefn(cpu, errp); } + /* Wait until cpu initialization complete before exposing cpu. */ + cpu_list_add(cpu); + + /* Plugin initialization must wait until cpu_index assigned. */ + if (tcg_enabled()) { + qemu_plugin_vcpu_init_hook(cpu); + } + #ifdef CONFIG_USER_ONLY assert(qdev_get_vmsd(DEVICE(cpu)) == NULL || qdev_get_vmsd(DEVICE(cpu))->unmigratable); From 83d92559cdf0ce842e52e5bbf230f7f62a6206aa Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Fri, 28 Oct 2022 14:42:27 +0200 Subject: [PATCH 13/13] tests/tcg/multiarch: Add munmap-pthread.c Add a test to detect races between munmap() and creating new threads. Signed-off-by: Ilya Leoshkevich Message-Id: <20221028124227.2354792-3-iii@linux.ibm.com> [rth: add more return insns] Signed-off-by: Richard Henderson --- tests/tcg/multiarch/Makefile.target | 3 ++ tests/tcg/multiarch/munmap-pthread.c | 79 ++++++++++++++++++++++++++++ 2 files changed, 82 insertions(+) create mode 100644 tests/tcg/multiarch/munmap-pthread.c diff --git a/tests/tcg/multiarch/Makefile.target b/tests/tcg/multiarch/Makefile.target index 78104f9bbb..5f0fee1aad 100644 --- a/tests/tcg/multiarch/Makefile.target +++ b/tests/tcg/multiarch/Makefile.target @@ -36,6 +36,9 @@ threadcount: LDFLAGS+=-lpthread signals: LDFLAGS+=-lrt -lpthread +munmap-pthread: CFLAGS+=-pthread +munmap-pthread: LDFLAGS+=-pthread + # We define the runner for test-mmap after the individual # architectures have defined their supported pages sizes. If no # additional page sizes are defined we only run the default test. diff --git a/tests/tcg/multiarch/munmap-pthread.c b/tests/tcg/multiarch/munmap-pthread.c new file mode 100644 index 0000000000..d7143b00d5 --- /dev/null +++ b/tests/tcg/multiarch/munmap-pthread.c @@ -0,0 +1,79 @@ +/* Test that munmap() and thread creation do not race. */ +#include +#include +#include +#include +#include +#include +#include + +static const char nop_func[] = { +#if defined(__aarch64__) + 0xc0, 0x03, 0x5f, 0xd6, /* ret */ +#elif defined(__alpha__) + 0x01, 0x80, 0xFA, 0x6B, /* ret */ +#elif defined(__arm__) + 0x1e, 0xff, 0x2f, 0xe1, /* bx lr */ +#elif defined(__riscv) + 0x67, 0x80, 0x00, 0x00, /* ret */ +#elif defined(__s390__) + 0x07, 0xfe, /* br %r14 */ +#elif defined(__i386__) || defined(__x86_64__) + 0xc3, /* ret */ +#endif +}; + +static void *thread_mmap_munmap(void *arg) +{ + volatile bool *run = arg; + char *p; + int ret; + + while (*run) { + p = mmap(NULL, getpagesize(), PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + assert(p != MAP_FAILED); + + /* Create a small translation block. */ + memcpy(p, nop_func, sizeof(nop_func)); + ((void(*)(void))p)(); + + ret = munmap(p, getpagesize()); + assert(ret == 0); + } + + return NULL; +} + +static void *thread_dummy(void *arg) +{ + return NULL; +} + +int main(void) +{ + pthread_t mmap_munmap, dummy; + volatile bool run = true; + int i, ret; + + /* Without a template, nothing to test. */ + if (sizeof(nop_func) == 0) { + return EXIT_SUCCESS; + } + + ret = pthread_create(&mmap_munmap, NULL, thread_mmap_munmap, (void *)&run); + assert(ret == 0); + + for (i = 0; i < 1000; i++) { + ret = pthread_create(&dummy, NULL, thread_dummy, NULL); + assert(ret == 0); + ret = pthread_join(dummy, NULL); + assert(ret == 0); + } + + run = false; + ret = pthread_join(mmap_munmap, NULL); + assert(ret == 0); + + return EXIT_SUCCESS; +}