tcg-hppa: Fix broken load/store helpers

The CONFIG_TCG_PASS_AREG0 code for calling ld/st helpers
was not respecting the ABI requirement for 64-bit values
being aligned in registers.

Mirror the ARM port in use of helper functions to marshal
arguments into the correct registers.

Signed-off-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
This commit is contained in:
Richard Henderson 2012-09-18 19:59:48 -07:00 committed by Aurelien Jarno
parent c08d9ee38d
commit e55f523d79
1 changed files with 73 additions and 61 deletions

View File

@ -976,10 +976,11 @@ static int tcg_out_tlb_read(TCGContext *s, int r0, int r1, int addrlo,
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, r1, offset);
}
/* Compute the value that ought to appear in the TLB for a hit, namely, the page
of the address. We include the low N bits of the address to catch unaligned
accesses and force them onto the slow path. Do this computation after having
issued the load from the TLB slot to give the load time to complete. */
/* Compute the value that ought to appear in the TLB for a hit, namely,
the page of the address. We include the low N bits of the address
to catch unaligned accesses and force them onto the slow path. Do
this computation after having issued the load from the TLB slot to
give the load time to complete. */
tcg_out_andi(s, r0, addrlo, TARGET_PAGE_MASK | ((1 << s_bits) - 1));
/* If not equal, jump to lab_miss. */
@ -992,6 +993,36 @@ static int tcg_out_tlb_read(TCGContext *s, int r0, int r1, int addrlo,
return ret;
}
static int tcg_out_arg_reg32(TCGContext *s, int argno, TCGArg v, bool vconst)
{
if (argno < 4) {
if (vconst) {
tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[argno], v);
} else {
tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[argno], v);
}
} else {
if (vconst && v != 0) {
tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_R20, v);
v = TCG_REG_R20;
}
tcg_out_st(s, TCG_TYPE_I32, v, TCG_REG_CALL_STACK,
TCG_TARGET_CALL_STACK_OFFSET - ((argno - 3) * 4));
}
return argno + 1;
}
static int tcg_out_arg_reg64(TCGContext *s, int argno, TCGArg vl, TCGArg vh)
{
/* 64-bit arguments must go in even reg pairs and stack slots. */
if (argno & 1) {
argno++;
}
argno = tcg_out_arg_reg32(s, argno, vl, false);
argno = tcg_out_arg_reg32(s, argno, vh, false);
return argno;
}
#endif
static void tcg_out_qemu_ld_direct(TCGContext *s, int datalo_reg, int datahi_reg,
@ -1072,39 +1103,36 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
/* Note that addrhi_reg is only used for 64-bit guests. */
int addrhi_reg = (TARGET_LONG_BITS == 64 ? *args++ : TCG_REG_R0);
int mem_index = *args;
int lab1, lab2, argreg, offset;
int lab1, lab2, argno, offset;
lab1 = gen_new_label();
lab2 = gen_new_label();
offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_read);
offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, addrhi_reg,
opc & 3, lab1, offset);
offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg,
addrhi_reg, opc & 3, lab1, offset);
/* TLB Hit. */
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, (offset ? TCG_REG_R1 : TCG_REG_R25),
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20,
(offset ? TCG_REG_R1 : TCG_REG_R25),
offsetof(CPUArchState, tlb_table[mem_index][0].addend) - offset);
tcg_out_qemu_ld_direct(s, datalo_reg, datahi_reg, addrlo_reg, TCG_REG_R20, opc);
tcg_out_qemu_ld_direct(s, datalo_reg, datahi_reg, addrlo_reg,
TCG_REG_R20, opc);
tcg_out_branch(s, lab2, 1);
/* TLB Miss. */
/* label1: */
tcg_out_label(s, lab1, s->code_ptr);
argreg = TCG_REG_R26;
tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrlo_reg);
argno = 0;
argno = tcg_out_arg_reg32(s, argno, TCG_AREG0, false);
if (TARGET_LONG_BITS == 64) {
tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrhi_reg);
argno = tcg_out_arg_reg64(s, argno, addrlo_reg, addrhi_reg);
} else {
argno = tcg_out_arg_reg32(s, argno, addrlo_reg, false);
}
tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
argno = tcg_out_arg_reg32(s, argno, mem_index, true);
/* XXX/FIXME: suboptimal */
tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2],
tcg_target_call_iarg_regs[1]);
tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
tcg_target_call_iarg_regs[0]);
tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
TCG_AREG0);
tcg_out_call(s, qemu_ld_helpers[opc & 3]);
switch (opc) {
@ -1140,8 +1168,8 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
#endif
}
static void tcg_out_qemu_st_direct(TCGContext *s, int datalo_reg, int datahi_reg,
int addr_reg, int opc)
static void tcg_out_qemu_st_direct(TCGContext *s, int datalo_reg,
int datahi_reg, int addr_reg, int opc)
{
#ifdef TARGET_WORDS_BIGENDIAN
const int bswap = 0;
@ -1194,17 +1222,18 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
/* Note that addrhi_reg is only used for 64-bit guests. */
int addrhi_reg = (TARGET_LONG_BITS == 64 ? *args++ : TCG_REG_R0);
int mem_index = *args;
int lab1, lab2, argreg, offset;
int lab1, lab2, argno, next, offset;
lab1 = gen_new_label();
lab2 = gen_new_label();
offset = offsetof(CPUArchState, tlb_table[mem_index][0].addr_write);
offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg, addrhi_reg,
opc, lab1, offset);
offset = tcg_out_tlb_read(s, TCG_REG_R26, TCG_REG_R25, addrlo_reg,
addrhi_reg, opc, lab1, offset);
/* TLB Hit. */
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20, (offset ? TCG_REG_R1 : TCG_REG_R25),
tcg_out_ld(s, TCG_TYPE_PTR, TCG_REG_R20,
(offset ? TCG_REG_R1 : TCG_REG_R25),
offsetof(CPUArchState, tlb_table[mem_index][0].addend) - offset);
/* There are no indexed stores, so we must do this addition explitly.
@ -1217,63 +1246,46 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
/* label1: */
tcg_out_label(s, lab1, s->code_ptr);
argreg = TCG_REG_R26;
tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrlo_reg);
argno = 0;
argno = tcg_out_arg_reg32(s, argno, TCG_AREG0, false);
if (TARGET_LONG_BITS == 64) {
tcg_out_mov(s, TCG_TYPE_I32, argreg--, addrhi_reg);
argno = tcg_out_arg_reg64(s, argno, addrlo_reg, addrhi_reg);
} else {
argno = tcg_out_arg_reg32(s, argno, addrlo_reg, false);
}
next = (argno < 4 ? tcg_target_call_iarg_regs[argno] : TCG_REG_R20);
switch(opc) {
case 0:
tcg_out_andi(s, argreg--, datalo_reg, 0xff);
tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
tcg_out_andi(s, next, datalo_reg, 0xff);
argno = tcg_out_arg_reg32(s, argno, next, false);
break;
case 1:
tcg_out_andi(s, argreg--, datalo_reg, 0xffff);
tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
tcg_out_andi(s, next, datalo_reg, 0xffff);
argno = tcg_out_arg_reg32(s, argno, next, false);
break;
case 2:
tcg_out_mov(s, TCG_TYPE_I32, argreg--, datalo_reg);
tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
argno = tcg_out_arg_reg32(s, argno, datalo_reg, false);
break;
case 3:
/* Because of the alignment required by the 64-bit data argument,
we will always use R23/R24. Also, we will always run out of
argument registers for storing mem_index, so that will have
to go on the stack. */
if (mem_index == 0) {
argreg = TCG_REG_R0;
} else {
argreg = TCG_REG_R20;
tcg_out_movi(s, TCG_TYPE_I32, argreg, mem_index);
}
tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R23, datahi_reg);
tcg_out_mov(s, TCG_TYPE_I32, TCG_REG_R24, datalo_reg);
tcg_out_st(s, TCG_TYPE_I32, argreg, TCG_REG_CALL_STACK,
TCG_TARGET_CALL_STACK_OFFSET - 4);
argno = tcg_out_arg_reg64(s, argno, datalo_reg, datahi_reg);
break;
default:
tcg_abort();
}
argno = tcg_out_arg_reg32(s, argno, mem_index, true);
/* XXX/FIXME: suboptimal */
tcg_out_mov(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3],
tcg_target_call_iarg_regs[2]);
tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2],
tcg_target_call_iarg_regs[1]);
tcg_out_mov(s, TCG_TYPE_TL, tcg_target_call_iarg_regs[1],
tcg_target_call_iarg_regs[0]);
tcg_out_mov(s, TCG_TYPE_PTR, tcg_target_call_iarg_regs[0],
TCG_AREG0);
tcg_out_call(s, qemu_st_helpers[opc]);
/* label2: */
tcg_out_label(s, lab2, s->code_ptr);
#else
/* There are no indexed stores, so if GUEST_BASE is set we must do the add
explicitly. Careful to avoid R20, which is used for the bswaps to follow. */
/* There are no indexed stores, so if GUEST_BASE is set we must do
the add explicitly. Careful to avoid R20, which is used for the
bswaps to follow. */
if (GUEST_BASE != 0) {
tcg_out_arith(s, TCG_REG_R31, addrlo_reg, TCG_GUEST_BASE_REG, INSN_ADDL);
tcg_out_arith(s, TCG_REG_R31, addrlo_reg,
TCG_GUEST_BASE_REG, INSN_ADDL);
addrlo_reg = TCG_REG_R31;
}
tcg_out_qemu_st_direct(s, datalo_reg, datahi_reg, addrlo_reg, opc);