tcg/i386: remove suboptimal register shifting
Now that CONFIG_TCG_PASS_AREG0 has been removed, it's easier to get an optimal code for the load/store functions. First swap the two registers used in tcg_out_tlb_load() so that the address end-up in the second register instead of the first one. Adjust tcg_out_qemu_ld() and tcg_out_qemu_st() to respectively call tcg_out_qemu_ld_direct() and tcg_out_qemu_st_direct() with the correct registers. Then replace the register shifting by direct load of the arguments. Reviewed-by: Richard Henderson <rth@twiddle.net> Signed-off-by: Aurelien Jarno <aurelien@aurel32.net>
This commit is contained in:
parent
50cd721482
commit
166792f7bb
@ -1016,12 +1016,12 @@ static const void *qemu_st_helpers[4] = {
|
||||
LABEL_PTRS is filled with 1 (32-bit addresses) or 2 (64-bit addresses)
|
||||
positions of the displacements of forward jumps to the TLB miss case.
|
||||
|
||||
First argument register is loaded with the low part of the address.
|
||||
Second argument register is loaded with the low part of the address.
|
||||
In the TLB hit case, it has been adjusted as indicated by the TLB
|
||||
and so is a host address. In the TLB miss case, it continues to
|
||||
hold a guest address.
|
||||
|
||||
Second argument register is clobbered. */
|
||||
First argument register is clobbered. */
|
||||
|
||||
static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
|
||||
int mem_index, int s_bits,
|
||||
@ -1039,25 +1039,25 @@ static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
|
||||
rexw = P_REXW;
|
||||
}
|
||||
|
||||
tcg_out_mov(s, type, r1, addrlo);
|
||||
tcg_out_mov(s, type, r0, addrlo);
|
||||
tcg_out_mov(s, type, r1, addrlo);
|
||||
|
||||
tcg_out_shifti(s, SHIFT_SHR + rexw, r1,
|
||||
tcg_out_shifti(s, SHIFT_SHR + rexw, r0,
|
||||
TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS);
|
||||
|
||||
tgen_arithi(s, ARITH_AND + rexw, r0,
|
||||
TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
|
||||
tgen_arithi(s, ARITH_AND + rexw, r1,
|
||||
TARGET_PAGE_MASK | ((1 << s_bits) - 1), 0);
|
||||
tgen_arithi(s, ARITH_AND + rexw, r0,
|
||||
(CPU_TLB_SIZE - 1) << CPU_TLB_ENTRY_BITS, 0);
|
||||
|
||||
tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r1, TCG_AREG0, r1, 0,
|
||||
tcg_out_modrm_sib_offset(s, OPC_LEA + P_REXW, r0, TCG_AREG0, r0, 0,
|
||||
offsetof(CPUArchState, tlb_table[mem_index][0])
|
||||
+ which);
|
||||
|
||||
/* cmp 0(r1), r0 */
|
||||
tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r0, r1, 0);
|
||||
/* cmp 0(r0), r1 */
|
||||
tcg_out_modrm_offset(s, OPC_CMP_GvEv + rexw, r1, r0, 0);
|
||||
|
||||
tcg_out_mov(s, type, r0, addrlo);
|
||||
tcg_out_mov(s, type, r1, addrlo);
|
||||
|
||||
/* jne label1 */
|
||||
tcg_out8(s, OPC_JCC_short + JCC_JNE);
|
||||
@ -1065,8 +1065,8 @@ static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
|
||||
s->code_ptr++;
|
||||
|
||||
if (TARGET_LONG_BITS > TCG_TARGET_REG_BITS) {
|
||||
/* cmp 4(r1), addrhi */
|
||||
tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r1, 4);
|
||||
/* cmp 4(r0), addrhi */
|
||||
tcg_out_modrm_offset(s, OPC_CMP_GvEv, args[addrlo_idx+1], r0, 4);
|
||||
|
||||
/* jne label1 */
|
||||
tcg_out8(s, OPC_JCC_short + JCC_JNE);
|
||||
@ -1076,8 +1076,8 @@ static inline void tcg_out_tlb_load(TCGContext *s, int addrlo_idx,
|
||||
|
||||
/* TLB Hit. */
|
||||
|
||||
/* add addend(r1), r0 */
|
||||
tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r0, r1,
|
||||
/* add addend(r0), r1 */
|
||||
tcg_out_modrm_offset(s, OPC_ADD_GvEv + P_REXW, r1, r0,
|
||||
offsetof(CPUTLBEntry, addend) - which);
|
||||
}
|
||||
#endif
|
||||
@ -1169,9 +1169,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
|
||||
int addrlo_idx;
|
||||
#if defined(CONFIG_SOFTMMU)
|
||||
int mem_index, s_bits;
|
||||
#if TCG_TARGET_REG_BITS == 64
|
||||
int arg_idx;
|
||||
#else
|
||||
#if TCG_TARGET_REG_BITS == 32
|
||||
int stack_adjust;
|
||||
#endif
|
||||
uint8_t *label_ptr[3];
|
||||
@ -1192,7 +1190,7 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
|
||||
label_ptr, offsetof(CPUTLBEntry, addr_read));
|
||||
|
||||
/* TLB Hit. */
|
||||
tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc);
|
||||
tcg_out_qemu_ld_direct(s, data_reg, data_reg2, TCG_REG_L1, 0, opc);
|
||||
|
||||
/* jmp label2 */
|
||||
tcg_out8(s, OPC_JMP_short);
|
||||
@ -1220,15 +1218,9 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
|
||||
tcg_out_push(s, TCG_AREG0);
|
||||
stack_adjust += 4;
|
||||
#else
|
||||
/* The first argument is already loaded with addrlo. */
|
||||
arg_idx = 1;
|
||||
tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[arg_idx],
|
||||
mem_index);
|
||||
/* XXX/FIXME: suboptimal */
|
||||
tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], TCG_REG_L2);
|
||||
tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], TCG_REG_L1);
|
||||
tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1], TCG_REG_L0);
|
||||
tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0);
|
||||
/* The second argument is already loaded with addrlo. */
|
||||
tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[2], mem_index);
|
||||
#endif
|
||||
|
||||
tcg_out_calli(s, (tcg_target_long)qemu_ld_helpers[s_bits]);
|
||||
@ -1294,9 +1286,9 @@ static void tcg_out_qemu_ld(TCGContext *s, const TCGArg *args,
|
||||
use the ADDR32 prefix. For now, do nothing. */
|
||||
|
||||
if (offset != GUEST_BASE) {
|
||||
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
|
||||
tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
|
||||
base = TCG_REG_L0;
|
||||
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
|
||||
tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
|
||||
base = TCG_REG_L1;
|
||||
offset = 0;
|
||||
}
|
||||
}
|
||||
@ -1317,8 +1309,8 @@ static void tcg_out_qemu_st_direct(TCGContext *s, int datalo, int datahi,
|
||||
/* ??? Ideally we wouldn't need a scratch register. For user-only,
|
||||
we could perform the bswap twice to restore the original value
|
||||
instead of moving to the scratch. But as it is, the L constraint
|
||||
means that TCG_REG_L1 is definitely free here. */
|
||||
const int scratch = TCG_REG_L1;
|
||||
means that TCG_REG_L0 is definitely free here. */
|
||||
const int scratch = TCG_REG_L0;
|
||||
|
||||
switch (sizeop) {
|
||||
case 0:
|
||||
@ -1391,7 +1383,7 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
|
||||
label_ptr, offsetof(CPUTLBEntry, addr_write));
|
||||
|
||||
/* TLB Hit. */
|
||||
tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L0, 0, opc);
|
||||
tcg_out_qemu_st_direct(s, data_reg, data_reg2, TCG_REG_L1, 0, opc);
|
||||
|
||||
/* jmp label2 */
|
||||
tcg_out8(s, OPC_JMP_short);
|
||||
@ -1425,15 +1417,12 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
|
||||
tcg_out_push(s, TCG_AREG0);
|
||||
stack_adjust += 4;
|
||||
#else
|
||||
tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
|
||||
TCG_REG_L1, data_reg);
|
||||
tcg_out_movi(s, TCG_TYPE_I32, TCG_REG_L2, mem_index);
|
||||
stack_adjust = 0;
|
||||
/* XXX/FIXME: suboptimal */
|
||||
tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[3], TCG_REG_L2);
|
||||
tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[2], TCG_REG_L1);
|
||||
tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[1], TCG_REG_L0);
|
||||
tcg_out_mov(s, TCG_TYPE_I64, tcg_target_call_iarg_regs[0], TCG_AREG0);
|
||||
/* The second argument is already loaded with addrlo. */
|
||||
tcg_out_mov(s, (opc == 3 ? TCG_TYPE_I64 : TCG_TYPE_I32),
|
||||
tcg_target_call_iarg_regs[2], data_reg);
|
||||
tcg_out_movi(s, TCG_TYPE_I32, tcg_target_call_iarg_regs[3], mem_index);
|
||||
stack_adjust = 0;
|
||||
#endif
|
||||
|
||||
tcg_out_calli(s, (tcg_target_long)qemu_st_helpers[s_bits]);
|
||||
@ -1460,9 +1449,9 @@ static void tcg_out_qemu_st(TCGContext *s, const TCGArg *args,
|
||||
use the ADDR32 prefix. For now, do nothing. */
|
||||
|
||||
if (offset != GUEST_BASE) {
|
||||
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L0, GUEST_BASE);
|
||||
tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L0, base);
|
||||
base = TCG_REG_L0;
|
||||
tcg_out_movi(s, TCG_TYPE_I64, TCG_REG_L1, GUEST_BASE);
|
||||
tgen_arithr(s, ARITH_ADD + P_REXW, TCG_REG_L1, base);
|
||||
base = TCG_REG_L1;
|
||||
offset = 0;
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user