tcg/arm: Introduce prepare_host_addr
Merge tcg_out_tlb_load, add_qemu_ldst_label, and some code that lived in both tcg_out_qemu_ld and tcg_out_qemu_st into one function that returns HostAddress and TCGLabelQemuLdst structures. Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
This commit is contained in:
parent
1e612dd66a
commit
7131d3cf72
@ -1434,125 +1434,6 @@ static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg,
|
||||
}
|
||||
}
|
||||
|
||||
#define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS)
|
||||
|
||||
/* We expect to use an 9-bit sign-magnitude negative offset from ENV. */
|
||||
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
|
||||
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -256);
|
||||
|
||||
/* These offsets are built into the LDRD below. */
|
||||
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
|
||||
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4);
|
||||
|
||||
/* Load and compare a TLB entry, leaving the flags set. Returns the register
|
||||
containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */
|
||||
|
||||
static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi,
|
||||
MemOp opc, int mem_index, bool is_load)
|
||||
{
|
||||
int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read)
|
||||
: offsetof(CPUTLBEntry, addr_write));
|
||||
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
|
||||
unsigned s_mask = (1 << (opc & MO_SIZE)) - 1;
|
||||
unsigned a_mask = (1 << get_alignment_bits(opc)) - 1;
|
||||
TCGReg t_addr;
|
||||
|
||||
/* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}. */
|
||||
tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off);
|
||||
|
||||
/* Extract the tlb index from the address into R0. */
|
||||
tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo,
|
||||
SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
|
||||
|
||||
/*
|
||||
* Add the tlb_table pointer, creating the CPUTLBEntry address in R1.
|
||||
* Load the tlb comparator into R2/R3 and the fast path addend into R1.
|
||||
*/
|
||||
if (cmp_off == 0) {
|
||||
if (TARGET_LONG_BITS == 64) {
|
||||
tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
|
||||
} else {
|
||||
tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
|
||||
}
|
||||
} else {
|
||||
tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
|
||||
TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0);
|
||||
if (TARGET_LONG_BITS == 64) {
|
||||
tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
|
||||
} else {
|
||||
tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
|
||||
}
|
||||
}
|
||||
|
||||
/* Load the tlb addend. */
|
||||
tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R1,
|
||||
offsetof(CPUTLBEntry, addend));
|
||||
|
||||
/*
|
||||
* Check alignment, check comparators.
|
||||
* Do this in 2-4 insns. Use MOVW for v7, if possible,
|
||||
* to reduce the number of sequential conditional instructions.
|
||||
* Almost all guests have at least 4k pages, which means that we need
|
||||
* to clear at least 9 bits even for an 8-byte memory, which means it
|
||||
* isn't worth checking for an immediate operand for BIC.
|
||||
*
|
||||
* For unaligned accesses, test the page of the last unit of alignment.
|
||||
* This leaves the least significant alignment bits unchanged, and of
|
||||
* course must be zero.
|
||||
*/
|
||||
t_addr = addrlo;
|
||||
if (a_mask < s_mask) {
|
||||
t_addr = TCG_REG_R0;
|
||||
tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr,
|
||||
addrlo, s_mask - a_mask);
|
||||
}
|
||||
if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) {
|
||||
tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(TARGET_PAGE_MASK | a_mask));
|
||||
tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
|
||||
t_addr, TCG_REG_TMP, 0);
|
||||
tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0);
|
||||
} else {
|
||||
if (a_mask) {
|
||||
tcg_debug_assert(a_mask <= 0xff);
|
||||
tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
|
||||
}
|
||||
tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr,
|
||||
SHIFT_IMM_LSR(TARGET_PAGE_BITS));
|
||||
tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP,
|
||||
0, TCG_REG_R2, TCG_REG_TMP,
|
||||
SHIFT_IMM_LSL(TARGET_PAGE_BITS));
|
||||
}
|
||||
|
||||
if (TARGET_LONG_BITS == 64) {
|
||||
tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
|
||||
}
|
||||
|
||||
return TCG_REG_R1;
|
||||
}
|
||||
|
||||
/* Record the context of a call to the out of line helper code for the slow
|
||||
path for a load or store, so that we can later generate the correct
|
||||
helper code. */
|
||||
static void add_qemu_ldst_label(TCGContext *s, bool is_ld,
|
||||
MemOpIdx oi, TCGType type,
|
||||
TCGReg datalo, TCGReg datahi,
|
||||
TCGReg addrlo, TCGReg addrhi,
|
||||
tcg_insn_unit *raddr,
|
||||
tcg_insn_unit *label_ptr)
|
||||
{
|
||||
TCGLabelQemuLdst *label = new_ldst_label(s);
|
||||
|
||||
label->is_ld = is_ld;
|
||||
label->oi = oi;
|
||||
label->type = type;
|
||||
label->datalo_reg = datalo;
|
||||
label->datahi_reg = datahi;
|
||||
label->addrlo_reg = addrlo;
|
||||
label->addrhi_reg = addrhi;
|
||||
label->raddr = tcg_splitwx_to_rx(raddr);
|
||||
label->label_ptr[0] = label_ptr;
|
||||
}
|
||||
|
||||
static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
|
||||
{
|
||||
TCGReg argreg;
|
||||
@ -1636,29 +1517,6 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb)
|
||||
return true;
|
||||
}
|
||||
#else
|
||||
|
||||
static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo,
|
||||
TCGReg addrhi, unsigned a_bits)
|
||||
{
|
||||
unsigned a_mask = (1 << a_bits) - 1;
|
||||
TCGLabelQemuLdst *label = new_ldst_label(s);
|
||||
|
||||
label->is_ld = is_ld;
|
||||
label->addrlo_reg = addrlo;
|
||||
label->addrhi_reg = addrhi;
|
||||
|
||||
/* We are expecting a_bits to max out at 7, and can easily support 8. */
|
||||
tcg_debug_assert(a_mask <= 0xff);
|
||||
/* tst addr, #mask */
|
||||
tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
|
||||
|
||||
/* blne slow_path */
|
||||
label->label_ptr[0] = s->code_ptr;
|
||||
tcg_out_bl_imm(s, COND_NE, 0);
|
||||
|
||||
label->raddr = tcg_splitwx_to_rx(s->code_ptr);
|
||||
}
|
||||
|
||||
static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l)
|
||||
{
|
||||
if (!reloc_pc24(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) {
|
||||
@ -1703,6 +1561,134 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l)
|
||||
}
|
||||
#endif /* SOFTMMU */
|
||||
|
||||
static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h,
|
||||
TCGReg addrlo, TCGReg addrhi,
|
||||
MemOpIdx oi, bool is_ld)
|
||||
{
|
||||
TCGLabelQemuLdst *ldst = NULL;
|
||||
MemOp opc = get_memop(oi);
|
||||
MemOp a_bits = get_alignment_bits(opc);
|
||||
unsigned a_mask = (1 << a_bits) - 1;
|
||||
|
||||
#ifdef CONFIG_SOFTMMU
|
||||
int mem_index = get_mmuidx(oi);
|
||||
int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read)
|
||||
: offsetof(CPUTLBEntry, addr_write);
|
||||
int fast_off = TLB_MASK_TABLE_OFS(mem_index);
|
||||
unsigned s_mask = (1 << (opc & MO_SIZE)) - 1;
|
||||
TCGReg t_addr;
|
||||
|
||||
ldst = new_ldst_label(s);
|
||||
ldst->is_ld = is_ld;
|
||||
ldst->oi = oi;
|
||||
ldst->addrlo_reg = addrlo;
|
||||
ldst->addrhi_reg = addrhi;
|
||||
|
||||
/* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}. */
|
||||
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0);
|
||||
QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -256);
|
||||
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0);
|
||||
QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4);
|
||||
tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off);
|
||||
|
||||
/* Extract the tlb index from the address into R0. */
|
||||
tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo,
|
||||
SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS));
|
||||
|
||||
/*
|
||||
* Add the tlb_table pointer, creating the CPUTLBEntry address in R1.
|
||||
* Load the tlb comparator into R2/R3 and the fast path addend into R1.
|
||||
*/
|
||||
if (cmp_off == 0) {
|
||||
if (TARGET_LONG_BITS == 64) {
|
||||
tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
|
||||
} else {
|
||||
tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0);
|
||||
}
|
||||
} else {
|
||||
tcg_out_dat_reg(s, COND_AL, ARITH_ADD,
|
||||
TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0);
|
||||
if (TARGET_LONG_BITS == 64) {
|
||||
tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
|
||||
} else {
|
||||
tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off);
|
||||
}
|
||||
}
|
||||
|
||||
/* Load the tlb addend. */
|
||||
tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R1,
|
||||
offsetof(CPUTLBEntry, addend));
|
||||
|
||||
/*
|
||||
* Check alignment, check comparators.
|
||||
* Do this in 2-4 insns. Use MOVW for v7, if possible,
|
||||
* to reduce the number of sequential conditional instructions.
|
||||
* Almost all guests have at least 4k pages, which means that we need
|
||||
* to clear at least 9 bits even for an 8-byte memory, which means it
|
||||
* isn't worth checking for an immediate operand for BIC.
|
||||
*
|
||||
* For unaligned accesses, test the page of the last unit of alignment.
|
||||
* This leaves the least significant alignment bits unchanged, and of
|
||||
* course must be zero.
|
||||
*/
|
||||
t_addr = addrlo;
|
||||
if (a_mask < s_mask) {
|
||||
t_addr = TCG_REG_R0;
|
||||
tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr,
|
||||
addrlo, s_mask - a_mask);
|
||||
}
|
||||
if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) {
|
||||
tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(TARGET_PAGE_MASK | a_mask));
|
||||
tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP,
|
||||
t_addr, TCG_REG_TMP, 0);
|
||||
tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0);
|
||||
} else {
|
||||
if (a_mask) {
|
||||
tcg_debug_assert(a_mask <= 0xff);
|
||||
tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
|
||||
}
|
||||
tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr,
|
||||
SHIFT_IMM_LSR(TARGET_PAGE_BITS));
|
||||
tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP,
|
||||
0, TCG_REG_R2, TCG_REG_TMP,
|
||||
SHIFT_IMM_LSL(TARGET_PAGE_BITS));
|
||||
}
|
||||
|
||||
if (TARGET_LONG_BITS == 64) {
|
||||
tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0);
|
||||
}
|
||||
|
||||
*h = (HostAddress){
|
||||
.cond = COND_AL,
|
||||
.base = addrlo,
|
||||
.index = TCG_REG_R1,
|
||||
.index_scratch = true,
|
||||
};
|
||||
#else
|
||||
if (a_mask) {
|
||||
ldst = new_ldst_label(s);
|
||||
ldst->is_ld = is_ld;
|
||||
ldst->oi = oi;
|
||||
ldst->addrlo_reg = addrlo;
|
||||
ldst->addrhi_reg = addrhi;
|
||||
|
||||
/* We are expecting a_bits to max out at 7 */
|
||||
tcg_debug_assert(a_mask <= 0xff);
|
||||
/* tst addr, #mask */
|
||||
tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask);
|
||||
}
|
||||
|
||||
*h = (HostAddress){
|
||||
.cond = COND_AL,
|
||||
.base = addrlo,
|
||||
.index = guest_base ? TCG_REG_GUEST_BASE : -1,
|
||||
.index_scratch = false,
|
||||
};
|
||||
#endif
|
||||
|
||||
return ldst;
|
||||
}
|
||||
|
||||
static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo,
|
||||
TCGReg datahi, HostAddress h)
|
||||
{
|
||||
@ -1799,37 +1785,28 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi,
|
||||
MemOpIdx oi, TCGType data_type)
|
||||
{
|
||||
MemOp opc = get_memop(oi);
|
||||
TCGLabelQemuLdst *ldst;
|
||||
HostAddress h;
|
||||
|
||||
#ifdef CONFIG_SOFTMMU
|
||||
h.cond = COND_AL;
|
||||
h.base = addrlo;
|
||||
h.index_scratch = true;
|
||||
h.index = tcg_out_tlb_read(s, addrlo, addrhi, opc, get_mmuidx(oi), 1);
|
||||
ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true);
|
||||
if (ldst) {
|
||||
ldst->type = data_type;
|
||||
ldst->datalo_reg = datalo;
|
||||
ldst->datahi_reg = datahi;
|
||||
|
||||
/*
|
||||
* This a conditional BL only to load a pointer within this opcode into
|
||||
* LR for the slow path. We will not be using the value for a tail call.
|
||||
*/
|
||||
tcg_insn_unit *label_ptr = s->code_ptr;
|
||||
tcg_out_bl_imm(s, COND_NE, 0);
|
||||
/*
|
||||
* This a conditional BL only to load a pointer within this
|
||||
* opcode into LR for the slow path. We will not be using
|
||||
* the value for a tail call.
|
||||
*/
|
||||
ldst->label_ptr[0] = s->code_ptr;
|
||||
tcg_out_bl_imm(s, COND_NE, 0);
|
||||
|
||||
tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h);
|
||||
|
||||
add_qemu_ldst_label(s, true, oi, data_type, datalo, datahi,
|
||||
addrlo, addrhi, s->code_ptr, label_ptr);
|
||||
#else
|
||||
unsigned a_bits = get_alignment_bits(opc);
|
||||
if (a_bits) {
|
||||
tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits);
|
||||
tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h);
|
||||
ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
|
||||
} else {
|
||||
tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h);
|
||||
}
|
||||
|
||||
h.cond = COND_AL;
|
||||
h.base = addrlo;
|
||||
h.index = guest_base ? TCG_REG_GUEST_BASE : -1;
|
||||
h.index_scratch = false;
|
||||
tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h);
|
||||
#endif
|
||||
}
|
||||
|
||||
static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo,
|
||||
@ -1891,35 +1868,25 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi,
|
||||
MemOpIdx oi, TCGType data_type)
|
||||
{
|
||||
MemOp opc = get_memop(oi);
|
||||
TCGLabelQemuLdst *ldst;
|
||||
HostAddress h;
|
||||
|
||||
#ifdef CONFIG_SOFTMMU
|
||||
h.cond = COND_EQ;
|
||||
h.base = addrlo;
|
||||
h.index_scratch = true;
|
||||
h.index = tcg_out_tlb_read(s, addrlo, addrhi, opc, get_mmuidx(oi), 0);
|
||||
tcg_out_qemu_st_direct(s, opc, datalo, datahi, h);
|
||||
ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false);
|
||||
if (ldst) {
|
||||
ldst->type = data_type;
|
||||
ldst->datalo_reg = datalo;
|
||||
ldst->datahi_reg = datahi;
|
||||
|
||||
/* The conditional call must come last, as we're going to return here. */
|
||||
tcg_insn_unit *label_ptr = s->code_ptr;
|
||||
tcg_out_bl_imm(s, COND_NE, 0);
|
||||
|
||||
add_qemu_ldst_label(s, false, oi, data_type, datalo, datahi,
|
||||
addrlo, addrhi, s->code_ptr, label_ptr);
|
||||
#else
|
||||
unsigned a_bits = get_alignment_bits(opc);
|
||||
|
||||
h.cond = COND_AL;
|
||||
if (a_bits) {
|
||||
tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits);
|
||||
h.cond = COND_EQ;
|
||||
}
|
||||
tcg_out_qemu_st_direct(s, opc, datalo, datahi, h);
|
||||
|
||||
h.base = addrlo;
|
||||
h.index = guest_base ? TCG_REG_GUEST_BASE : -1;
|
||||
h.index_scratch = false;
|
||||
tcg_out_qemu_st_direct(s, opc, datalo, datahi, h);
|
||||
#endif
|
||||
/* The conditional call is last, as we're going to return here. */
|
||||
ldst->label_ptr[0] = s->code_ptr;
|
||||
tcg_out_bl_imm(s, COND_NE, 0);
|
||||
ldst->raddr = tcg_splitwx_to_rx(s->code_ptr);
|
||||
} else {
|
||||
tcg_out_qemu_st_direct(s, opc, datalo, datahi, h);
|
||||
}
|
||||
}
|
||||
|
||||
static void tcg_out_epilogue(TCGContext *s);
|
||||
|
Loading…
Reference in New Issue
Block a user