tcg-arm: Move load of tlb addend into tcg_out_tlb_read

This allows us to make more intelligent decisions about the relative
offsets of the tlb comparator and the addend, avoiding any need of
writeback addressing.

Signed-off-by: Richard Henderson <rth@twiddle.net>
This commit is contained in:
Richard Henderson 2013-08-30 08:16:00 -07:00
parent f248873637
commit d0ebde2284

View File

@ -1172,42 +1172,39 @@ QEMU_BUILD_BUG_ON(CPU_TLB_BITS > 8);
QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1]) QEMU_BUILD_BUG_ON(offsetof(CPUArchState, tlb_table[NB_MMU_MODES - 1][1])
> 0xffff); > 0xffff);
/* Load and compare a TLB entry, leaving the flags set. Leaves R2 pointing /* Load and compare a TLB entry, leaving the flags set. Leaves R1 containing
to the tlb entry. Clobbers R1 and TMP. */ the addend of the tlb entry. Clobbers R0, R2, TMP. */
static void tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, static void tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
int s_bits, int tlb_offset) int s_bits, int mem_index, bool is_load)
{ {
TCGReg base = TCG_AREG0; TCGReg base = TCG_AREG0;
int cmp_off =
(is_load
? offsetof(CPUArchState, tlb_table[mem_index][0].addr_read)
: offsetof(CPUArchState, tlb_table[mem_index][0].addr_write));
int add_off = offsetof(CPUArchState, tlb_table[mem_index][0].addend);
/* Should generate something like the following: /* Should generate something like the following:
* pre-v7:
* shr tmp, addr_reg, #TARGET_PAGE_BITS (1) * shr tmp, addr_reg, #TARGET_PAGE_BITS (1)
* add r2, env, #off & 0xff00 * add r2, env, #high
* and r0, tmp, #(CPU_TLB_SIZE - 1) (2) * and r0, tmp, #(CPU_TLB_SIZE - 1) (2)
* add r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS (3) * add r2, r2, r0, lsl #CPU_TLB_ENTRY_BITS (3)
* ldr r0, [r2, #off & 0xff]! (4) * ldr r0, [r2, #cmp] (4)
* tst addr_reg, #s_mask * tst addr_reg, #s_mask
* cmpeq r0, tmp, lsl #TARGET_PAGE_BITS (5) * cmpeq r0, tmp, lsl #TARGET_PAGE_BITS (5)
* * ldr r1, [r2, #add]
* v7 (not implemented yet):
* ubfx r2, addr_reg, #TARGET_PAGE_BITS, #CPU_TLB_BITS (1)
* movw tmp, #~TARGET_PAGE_MASK & ~s_mask
* movw r0, #off
* add r2, env, r2, lsl #CPU_TLB_ENTRY_BITS (2)
* bic tmp, addr_reg, tmp
* ldr r0, [r2, r0]! (3)
* cmp r0, tmp (4)
*/ */
tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP,
0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS)); 0, addrlo, SHIFT_IMM_LSR(TARGET_PAGE_BITS));
/* We checked that the offset is contained within 16 bits above. */ /* We checked that the offset is contained within 16 bits above. */
if (tlb_offset > 0xff) { if (add_off > 0xfff || (use_armv6_instructions && cmp_off > 0xff)) {
tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base, tcg_out_dat_imm(s, COND_AL, ARITH_ADD, TCG_REG_R2, base,
(24 << 7) | (tlb_offset >> 8)); (24 << 7) | (cmp_off >> 8));
tlb_offset &= 0xff;
base = TCG_REG_R2; base = TCG_REG_R2;
add_off -= cmp_off & 0xff00;
cmp_off &= 0xff;
} }
tcg_out_dat_imm(s, COND_AL, ARITH_AND, tcg_out_dat_imm(s, COND_AL, ARITH_AND,
@ -1219,14 +1216,11 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
but due to how the pointer needs setting up, ldm isn't useful. but due to how the pointer needs setting up, ldm isn't useful.
Base arm5 doesn't have ldrd, but armv5te does. */ Base arm5 doesn't have ldrd, but armv5te does. */
if (use_armv6_instructions && TARGET_LONG_BITS == 64) { if (use_armv6_instructions && TARGET_LONG_BITS == 64) {
tcg_out_memop_8(s, COND_AL, INSN_LDRD_IMM, TCG_REG_R0, tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
TCG_REG_R2, tlb_offset, 1, 1);
} else { } else {
tcg_out_memop_12(s, COND_AL, INSN_LDR_IMM, TCG_REG_R0, tcg_out_ld32_12(s, COND_AL, TCG_REG_R0, TCG_REG_R2, cmp_off);
TCG_REG_R2, tlb_offset, 1, 1);
if (TARGET_LONG_BITS == 64) { if (TARGET_LONG_BITS == 64) {
tcg_out_memop_12(s, COND_AL, INSN_LDR_IMM, TCG_REG_R1, tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, cmp_off + 4);
TCG_REG_R2, 4, 1, 0);
} }
} }
@ -1243,6 +1237,9 @@ static void tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0,
TCG_REG_R1, addrhi, SHIFT_IMM_LSL(0)); TCG_REG_R1, addrhi, SHIFT_IMM_LSL(0));
} }
/* Load the tlb addend. */
tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2, add_off);
} }
/* Record the context of a call to the out of line helper code for the slow /* Record the context of a call to the out of line helper code for the slow
@ -1386,18 +1383,13 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args, int opc)
mem_index = *args; mem_index = *args;
s_bits = opc & 3; s_bits = opc & 3;
tcg_out_tlb_read(s, addr_reg, addr_reg2, s_bits, tcg_out_tlb_read(s, addr_reg, addr_reg2, s_bits, mem_index, 1);
offsetof(CPUArchState, tlb_table[mem_index][0].addr_read));
/* This a conditional BL only to load a pointer within this opcode into LR /* This a conditional BL only to load a pointer within this opcode into LR
for the slow path. We will not be using the value for a tail call. */ for the slow path. We will not be using the value for a tail call. */
label_ptr = s->code_ptr; label_ptr = s->code_ptr;
tcg_out_bl_noaddr(s, COND_NE); tcg_out_bl_noaddr(s, COND_NE);
tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2,
offsetof(CPUTLBEntry, addend)
- offsetof(CPUTLBEntry, addr_read));
switch (opc) { switch (opc) {
case 0: case 0:
tcg_out_ld8_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1); tcg_out_ld8_r(s, COND_AL, data_reg, addr_reg, TCG_REG_R1);
@ -1533,13 +1525,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args, int opc)
mem_index = *args; mem_index = *args;
s_bits = opc & 3; s_bits = opc & 3;
tcg_out_tlb_read(s, addr_reg, addr_reg2, s_bits, tcg_out_tlb_read(s, addr_reg, addr_reg2, s_bits, mem_index, 0);
offsetof(CPUArchState,
tlb_table[mem_index][0].addr_write));
tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R2,
offsetof(CPUTLBEntry, addend)
- offsetof(CPUTLBEntry, addr_write));
switch (opc) { switch (opc) {
case 0: case 0: