tcg/mips: Convert tcg_out_qemu_{ld,st}_slow_path

Use tcg_out_ld_helper_args, tcg_out_ld_helper_ret,
and tcg_out_st_helper_args.  This allows our local
tcg_out_arg_* infrastructure to be removed.

We are no longer filling the call or return branch
delay slots, nor are we tail-calling for the store,
but this seems a small price to pay.

Reviewed-by: Alex Bennée <alex.bennee@linaro.org>
Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
Richard Henderson 2023-04-10 10:30:21 -07:00
parent 338b9e0950
commit f07aaf4856

View File

@ -1115,79 +1115,15 @@ static void * const qemu_st_helpers[(MO_SIZE | MO_BSWAP) + 1] = {
[MO_BEUQ] = helper_be_stq_mmu,
};
/* Helper routines for marshalling helper function arguments into
* the correct registers and stack.
* I is where we want to put this argument, and is updated and returned
* for the next call. ARG is the argument itself.
*
* We provide routines for arguments which are: immediate, 32 bit
* value in register, 16 and 8 bit values in register (which must be zero
* extended before use) and 64 bit value in a lo:hi register pair.
*/
static int tcg_out_call_iarg_reg(TCGContext *s, int i, TCGReg arg)
{
if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
tcg_out_mov(s, TCG_TYPE_REG, tcg_target_call_iarg_regs[i], arg);
} else {
/* For N32 and N64, the initial offset is different. But there
we also have 8 argument register so we don't run out here. */
tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
tcg_out_st(s, TCG_TYPE_REG, arg, TCG_REG_SP, 4 * i);
}
return i + 1;
}
static int tcg_out_call_iarg_reg8(TCGContext *s, int i, TCGReg arg)
{
TCGReg tmp = TCG_TMP0;
if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
tmp = tcg_target_call_iarg_regs[i];
}
tcg_out_ext8u(s, tmp, arg);
return tcg_out_call_iarg_reg(s, i, tmp);
}
static int tcg_out_call_iarg_reg16(TCGContext *s, int i, TCGReg arg)
{
TCGReg tmp = TCG_TMP0;
if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
tmp = tcg_target_call_iarg_regs[i];
}
tcg_out_opc_imm(s, OPC_ANDI, tmp, arg, 0xffff);
return tcg_out_call_iarg_reg(s, i, tmp);
}
static int tcg_out_call_iarg_imm(TCGContext *s, int i, TCGArg arg)
{
TCGReg tmp = TCG_TMP0;
if (arg == 0) {
tmp = TCG_REG_ZERO;
} else {
if (i < ARRAY_SIZE(tcg_target_call_iarg_regs)) {
tmp = tcg_target_call_iarg_regs[i];
}
tcg_out_movi(s, TCG_TYPE_REG, tmp, arg);
}
return tcg_out_call_iarg_reg(s, i, tmp);
}
static int tcg_out_call_iarg_reg2(TCGContext *s, int i, TCGReg al, TCGReg ah)
{
tcg_debug_assert(TCG_TARGET_REG_BITS == 32);
i = (i + 1) & ~1;
i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? ah : al));
i = tcg_out_call_iarg_reg(s, i, (MIPS_BE ? al : ah));
return i;
}
/* We have four temps, we might as well expose three of them. */
static const TCGLdstHelperParam ldst_helper_param = {
.ntmp = 3, .tmp = { TCG_TMP0, TCG_TMP1, TCG_TMP2 }
};
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
const tcg_insn_unit *tgt_rx = tcg_splitwx_to_rx(s->code_ptr);
MemOpIdx oi = l->oi;
MemOp opc = get_memop(oi);
TCGReg v0;
int i;
MemOp opc = get_memop(l->oi);
/* resolve label address */
if (!reloc_pc16(l->label_ptr[0], tgt_rx)
@ -1196,29 +1132,13 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
return false;
}
i = 1;
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg);
} else {
i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg);
}
i = tcg_out_call_iarg_imm(s, i, oi);
i = tcg_out_call_iarg_imm(s, i, (intptr_t)l->raddr);
tcg_out_ld_helper_args(s, l, &ldst_helper_param);
tcg_out_call_int(s, qemu_ld_helpers[opc & (MO_BSWAP | MO_SSIZE)], false);
/* delay slot */
tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
tcg_out_nop(s);
v0 = l->datalo_reg;
if (TCG_TARGET_REG_BITS == 32 && (opc & MO_SIZE) == MO_64) {
/* We eliminated V0 from the possible output registers, so it
cannot be clobbered here. So we must move V1 first. */
if (MIPS_BE) {
tcg_out_mov(s, TCG_TYPE_I32, v0, TCG_REG_V1);
v0 = l->datahi_reg;
} else {
tcg_out_mov(s, TCG_TYPE_I32, l->datahi_reg, TCG_REG_V1);
}
}
tcg_out_ld_helper_ret(s, l, true, &ldst_helper_param);
tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO);
if (!reloc_pc16(s->code_ptr - 1, l->raddr)) {
@ -1226,22 +1146,14 @@ static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
}
/* delay slot */
if (TCG_TARGET_REG_BITS == 64 && l->type == TCG_TYPE_I32) {
/* we always sign-extend 32-bit loads */
tcg_out_ext32s(s, v0, TCG_REG_V0);
} else {
tcg_out_opc_reg(s, OPC_OR, v0, TCG_REG_V0, TCG_REG_ZERO);
}
tcg_out_nop(s);
return true;
}
static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
{
const tcg_insn_unit *tgt_rx = tcg_splitwx_to_rx(s->code_ptr);
MemOpIdx oi = l->oi;
MemOp opc = get_memop(oi);
MemOp s_bits = opc & MO_SIZE;
int i;
MemOp opc = get_memop(l->oi);
/* resolve label address */
if (!reloc_pc16(l->label_ptr[0], tgt_rx)
@ -1250,41 +1162,19 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
return false;
}
i = 1;
if (TCG_TARGET_REG_BITS < TARGET_LONG_BITS) {
i = tcg_out_call_iarg_reg2(s, i, l->addrlo_reg, l->addrhi_reg);
} else {
i = tcg_out_call_iarg_reg(s, i, l->addrlo_reg);
}
switch (s_bits) {
case MO_8:
i = tcg_out_call_iarg_reg8(s, i, l->datalo_reg);
break;
case MO_16:
i = tcg_out_call_iarg_reg16(s, i, l->datalo_reg);
break;
case MO_32:
i = tcg_out_call_iarg_reg(s, i, l->datalo_reg);
break;
case MO_64:
if (TCG_TARGET_REG_BITS == 32) {
i = tcg_out_call_iarg_reg2(s, i, l->datalo_reg, l->datahi_reg);
} else {
i = tcg_out_call_iarg_reg(s, i, l->datalo_reg);
}
break;
default:
g_assert_not_reached();
}
i = tcg_out_call_iarg_imm(s, i, oi);
tcg_out_st_helper_args(s, l, &ldst_helper_param);
/* Tail call to the store helper. Thus force the return address
computation to take place in the return address register. */
tcg_out_movi(s, TCG_TYPE_PTR, TCG_REG_RA, (intptr_t)l->raddr);
i = tcg_out_call_iarg_reg(s, i, TCG_REG_RA);
tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)], true);
tcg_out_call_int(s, qemu_st_helpers[opc & (MO_BSWAP | MO_SIZE)], false);
/* delay slot */
tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0], TCG_AREG0);
tcg_out_nop(s);
tcg_out_opc_br(s, OPC_BEQ, TCG_REG_ZERO, TCG_REG_ZERO);
if (!reloc_pc16(s->code_ptr - 1, l->raddr)) {
return false;
}
/* delay slot */
tcg_out_nop(s);
return true;
}