From f07aaf4856ee642f9f566bc26d8547819871516f Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Mon, 10 Apr 2023 10:30:21 -0700 Subject: [PATCH] tcg/mips: Convert tcg_out_qemu_{ld,st}_slow_path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret, and tcg_out_st_helper_args. This allows our local tcg_out_arg_* infrastructure to be removed. We are no longer filling the call or return branch delay slots, nor are we tail-calling for the store, but this seems a small price to pay. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/mips/tcg-target.c.inc | 154 ++++++-------------------------------- 1 file changed, 22 insertions(+), 132 deletions(-) diff --git a/tcg/mips/tcg-target.c.inc b/tcg/mips/tcg-target.c.inc index 94708e6ea7..022960d79a 100644 --- a/tcg/mips/tcg-target.c.inc +++ b/tcg/mips/tcg-target.c.inc @@ -1115,79 +1115,15 @@ static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = { [MO_BEUQ] = helper_be_stq_mmu, }; -/* Helper routines for marshalling helper function arguments into - * the correct registers and stack. - * I is where we want to put this argument, and is updated and returned - * for the next call. ARG is the argument itself. - * - * We provide routines for arguments which are: immediate, 32 bit - * value in register, 16 and 8 bit values in register (which must be zero - * extended before use) and 64 bit value in a lo:hi register pair. - */ - -static int tcg_out_call_iarg_reg(TCGContext *s, int i, TCGReg arg) -{ - if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { - tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[i], arg); - } else { - /* For N32 and N64, the initial offset is different. But there - we also have 8 argument register so we don't run out here. */ - tcg_debug_assert(TCG_TARGET_REG_BITS == 32); - tcg_out_st(s, TCG_TYPE_REG, arg, TCG_REG_SP, 4 * i); - } - return i + 1; -} - -static int tcg_out_call_iarg_reg8(TCGContext *s, int i, TCGReg arg) -{ - TCGReg tmp = TCG_TMP0; - if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { - tmp = tcg_target_call_iarg_regs[i]; - } - tcg_out_ext8u(s, tmp, arg); - return tcg_out_call_iarg_reg(s, i, tmp); -} - -static int tcg_out_call_iarg_reg16(TCGContext *s, int i, TCGReg arg) -{ - TCGReg tmp = TCG_TMP0; - if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { - tmp = tcg_target_call_iarg_regs[i]; - } - tcg_out_opc_imm(s, OPC_ANDI, tmp, arg, 0xffff); - return tcg_out_call_iarg_reg(s, i, tmp); -} - -static int tcg_out_call_iarg_imm(TCGContext *s, int i, TCGArg arg) -{ - TCGReg tmp = TCG_TMP0; - if (arg == 0) { - tmp = TCG_REG_ZERO; - } else { - if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) { - tmp = tcg_target_call_iarg_regs[i]; - } - tcg_out_movi(s, TCG_TYPE_REG, tmp, arg); - } - return tcg_out_call_iarg_reg(s, i, tmp); -} - -static int tcg_out_call_iarg_reg2(TCGContext *s, int i, TCGReg al, TCGReg ah) -{ - tcg_debug_assert(TCG_TARGET_REG_BITS == 32); - i = (i + 1) & ~1; - i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? ah : al)); - i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? al : ah)); - return i; -} +/* We have four temps, we might as well expose three of them. */ +static const TCGLdstHelperParam ldst_helper_param = { + .ntmp = 3, .tmp = { TCG_TMP0, TCG_TMP1, TCG_TMP2 } +}; static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { const tcg_insn_unit *tgt_rx = tcg_splitwx_to_rx(s->code_ptr); - MemOpIdx oi = l->oi; - MemOp opc = get_memop(oi); - TCGReg v0; - int i; + MemOp opc = get_memop(l->oi); /* resolve label address */ if (!reloc_pc16(l->label_ptr[0], tgt_rx) @@ -1196,29 +1132,13 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) return false; } - i = 1; - if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { - i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg); - } else { - i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg); - } - i = tcg_out_call_iarg_imm(s, i, oi); - i = tcg_out_call_iarg_imm(s, i, (intptr_t)l->raddr); + tcg_out_ld_helper_args(s, l, &ldst_helper_param); + tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)], false); /* delay slot */ - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + tcg_out_nop(s); - v0 = l->datalo_reg; - if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) { - /* We eliminated V0 from the possible output registers, so it - cannot be clobbered here. So we must move V1 first. */ - if (MIPS_BE) { - tcg_out_mov(s, TCG_TYPE_I32, v0, TCG_REG_V1); - v0 = l->datahi_reg; - } else { - tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_V1); - } - } + tcg_out_ld_helper_ret(s, l, true, &ldst_helper_param); tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO); if (!reloc_pc16(s->code_ptr - 1, l->raddr)) { @@ -1226,22 +1146,14 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l) } /* delay slot */ - if (TCG_TARGET_REG_BITS == 64 && l->type == TCG_TYPE_I32) { - /* we always sign-extend 32-bit loads */ - tcg_out_ext32s(s, v0, TCG_REG_V0); - } else { - tcg_out_opc_reg(s, OPC_OR, v0, TCG_REG_V0, TCG_REG_ZERO); - } + tcg_out_nop(s); return true; } static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) { const tcg_insn_unit *tgt_rx = tcg_splitwx_to_rx(s->code_ptr); - MemOpIdx oi = l->oi; - MemOp opc = get_memop(oi); - MemOp s_bits = opc & MO_SIZE; - int i; + MemOp opc = get_memop(l->oi); /* resolve label address */ if (!reloc_pc16(l->label_ptr[0], tgt_rx) @@ -1250,41 +1162,19 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) return false; } - i = 1; - if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) { - i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg); - } else { - i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg); - } - switch (s_bits) { - case MO_8: - i = tcg_out_call_iarg_reg8(s, i, l->datalo_reg); - break; - case MO_16: - i = tcg_out_call_iarg_reg16(s, i, l->datalo_reg); - break; - case MO_32: - i = tcg_out_call_iarg_reg(s, i, l->datalo_reg); - break; - case MO_64: - if (TCG_TARGET_REG_BITS == 32) { - i = tcg_out_call_iarg_reg2(s, i, l->datalo_reg, l->datahi_reg); - } else { - i = tcg_out_call_iarg_reg(s, i, l->datalo_reg); - } - break; - default: - g_assert_not_reached(); - } - i = tcg_out_call_iarg_imm(s, i, oi); + tcg_out_st_helper_args(s, l, &ldst_helper_param); - /* Tail call to the store helper. Thus force the return address - computation to take place in the return address register. */ - tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)l->raddr); - i = tcg_out_call_iarg_reg(s, i, TCG_REG_RA); - tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)], true); + tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)], false); /* delay slot */ - tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0); + tcg_out_nop(s); + + tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO); + if (!reloc_pc16(s->code_ptr - 1, l->raddr)) { + return false; + } + + /* delay slot */ + tcg_out_nop(s); return true; }