From 7131d3cf72af0ecd54d325cbe2d88db144e52fed Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Sun, 23 Apr 2023 15:54:41 +0100 Subject: [PATCH] tcg/arm: Introduce prepare_host_addr MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Merge tcg_out_tlb_load, add_qemu_ldst_label, and some code that lived in both tcg_out_qemu_ld and tcg_out_qemu_st into one function that returns HostAddress and TCGLabelQemuLdst structures. Reviewed-by: Alex Bennée Signed-off-by: Richard Henderson --- tcg/arm/tcg-target.c.inc | 351 ++++++++++++++++++--------------------- 1 file changed, 159 insertions(+), 192 deletions(-) diff --git a/tcg/arm/tcg-target.c.inc b/tcg/arm/tcg-target.c.inc index b6b4ffc546..c744512778 100644 --- a/tcg/arm/tcg-target.c.inc +++ b/tcg/arm/tcg-target.c.inc @@ -1434,125 +1434,6 @@ static TCGReg tcg_out_arg_reg64(TCGContext *s, TCGReg argreg, } } -#define TLB_SHIFT (CPU_TLB_ENTRY_BITS + CPU_TLB_BITS) - -/* We expect to use an 9-bit sign-magnitude negative offset from ENV. */ -QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); -QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -256); - -/* These offsets are built into the LDRD below. */ -QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); -QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4); - -/* Load and compare a TLB entry, leaving the flags set. Returns the register - containing the addend of the tlb entry. Clobbers R0, R1, R2, TMP. */ - -static TCGReg tcg_out_tlb_read(TCGContext *s, TCGReg addrlo, TCGReg addrhi, - MemOp opc, int mem_index, bool is_load) -{ - int cmp_off = (is_load ? offsetof(CPUTLBEntry, addr_read) - : offsetof(CPUTLBEntry, addr_write)); - int fast_off = TLB_MASK_TABLE_OFS(mem_index); - unsigned s_mask = (1 << (opc & MO_SIZE)) - 1; - unsigned a_mask = (1 << get_alignment_bits(opc)) - 1; - TCGReg t_addr; - - /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}. */ - tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off); - - /* Extract the tlb index from the address into R0. */ - tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo, - SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS)); - - /* - * Add the tlb_table pointer, creating the CPUTLBEntry address in R1. - * Load the tlb comparator into R2/R3 and the fast path addend into R1. - */ - if (cmp_off == 0) { - if (TARGET_LONG_BITS == 64) { - tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0); - } else { - tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0); - } - } else { - tcg_out_dat_reg(s, COND_AL, ARITH_ADD, - TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0); - if (TARGET_LONG_BITS == 64) { - tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off); - } else { - tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off); - } - } - - /* Load the tlb addend. */ - tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R1, - offsetof(CPUTLBEntry, addend)); - - /* - * Check alignment, check comparators. - * Do this in 2-4 insns. Use MOVW for v7, if possible, - * to reduce the number of sequential conditional instructions. - * Almost all guests have at least 4k pages, which means that we need - * to clear at least 9 bits even for an 8-byte memory, which means it - * isn't worth checking for an immediate operand for BIC. - * - * For unaligned accesses, test the page of the last unit of alignment. - * This leaves the least significant alignment bits unchanged, and of - * course must be zero. - */ - t_addr = addrlo; - if (a_mask < s_mask) { - t_addr = TCG_REG_R0; - tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr, - addrlo, s_mask - a_mask); - } - if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) { - tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(TARGET_PAGE_MASK | a_mask)); - tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP, - t_addr, TCG_REG_TMP, 0); - tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0); - } else { - if (a_mask) { - tcg_debug_assert(a_mask <= 0xff); - tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask); - } - tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr, - SHIFT_IMM_LSR(TARGET_PAGE_BITS)); - tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP, - 0, TCG_REG_R2, TCG_REG_TMP, - SHIFT_IMM_LSL(TARGET_PAGE_BITS)); - } - - if (TARGET_LONG_BITS == 64) { - tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0); - } - - return TCG_REG_R1; -} - -/* Record the context of a call to the out of line helper code for the slow - path for a load or store, so that we can later generate the correct - helper code. */ -static void add_qemu_ldst_label(TCGContext *s, bool is_ld, - MemOpIdx oi, TCGType type, - TCGReg datalo, TCGReg datahi, - TCGReg addrlo, TCGReg addrhi, - tcg_insn_unit *raddr, - tcg_insn_unit *label_ptr) -{ - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->oi = oi; - label->type = type; - label->datalo_reg = datalo; - label->datahi_reg = datahi; - label->addrlo_reg = addrlo; - label->addrhi_reg = addrhi; - label->raddr = tcg_splitwx_to_rx(raddr); - label->label_ptr[0] = label_ptr; -} - static bool tcg_out_qemu_ld_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) { TCGReg argreg; @@ -1636,29 +1517,6 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *lb) return true; } #else - -static void tcg_out_test_alignment(TCGContext *s, bool is_ld, TCGReg addrlo, - TCGReg addrhi, unsigned a_bits) -{ - unsigned a_mask = (1 << a_bits) - 1; - TCGLabelQemuLdst *label = new_ldst_label(s); - - label->is_ld = is_ld; - label->addrlo_reg = addrlo; - label->addrhi_reg = addrhi; - - /* We are expecting a_bits to max out at 7, and can easily support 8. */ - tcg_debug_assert(a_mask <= 0xff); - /* tst addr, #mask */ - tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask); - - /* blne slow_path */ - label->label_ptr[0] = s->code_ptr; - tcg_out_bl_imm(s, COND_NE, 0); - - label->raddr = tcg_splitwx_to_rx(s->code_ptr); -} - static bool tcg_out_fail_alignment(TCGContext *s, TCGLabelQemuLdst *l) { if (!reloc_pc24(l->label_ptr[0], tcg_splitwx_to_rx(s->code_ptr))) { @@ -1703,6 +1561,134 @@ static bool tcg_out_qemu_st_slow_path(TCGContext *s, TCGLabelQemuLdst *l) } #endif /* SOFTMMU */ +static TCGLabelQemuLdst *prepare_host_addr(TCGContext *s, HostAddress *h, + TCGReg addrlo, TCGReg addrhi, + MemOpIdx oi, bool is_ld) +{ + TCGLabelQemuLdst *ldst = NULL; + MemOp opc = get_memop(oi); + MemOp a_bits = get_alignment_bits(opc); + unsigned a_mask = (1 << a_bits) - 1; + +#ifdef CONFIG_SOFTMMU + int mem_index = get_mmuidx(oi); + int cmp_off = is_ld ? offsetof(CPUTLBEntry, addr_read) + : offsetof(CPUTLBEntry, addr_write); + int fast_off = TLB_MASK_TABLE_OFS(mem_index); + unsigned s_mask = (1 << (opc & MO_SIZE)) - 1; + TCGReg t_addr; + + ldst = new_ldst_label(s); + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addrlo; + ldst->addrhi_reg = addrhi; + + /* Load env_tlb(env)->f[mmu_idx].{mask,table} into {r0,r1}. */ + QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) > 0); + QEMU_BUILD_BUG_ON(TLB_MASK_TABLE_OFS(0) < -256); + QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, mask) != 0); + QEMU_BUILD_BUG_ON(offsetof(CPUTLBDescFast, table) != 4); + tcg_out_ldrd_8(s, COND_AL, TCG_REG_R0, TCG_AREG0, fast_off); + + /* Extract the tlb index from the address into R0. */ + tcg_out_dat_reg(s, COND_AL, ARITH_AND, TCG_REG_R0, TCG_REG_R0, addrlo, + SHIFT_IMM_LSR(TARGET_PAGE_BITS - CPU_TLB_ENTRY_BITS)); + + /* + * Add the tlb_table pointer, creating the CPUTLBEntry address in R1. + * Load the tlb comparator into R2/R3 and the fast path addend into R1. + */ + if (cmp_off == 0) { + if (TARGET_LONG_BITS == 64) { + tcg_out_ldrd_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0); + } else { + tcg_out_ld32_rwb(s, COND_AL, TCG_REG_R2, TCG_REG_R1, TCG_REG_R0); + } + } else { + tcg_out_dat_reg(s, COND_AL, ARITH_ADD, + TCG_REG_R1, TCG_REG_R1, TCG_REG_R0, 0); + if (TARGET_LONG_BITS == 64) { + tcg_out_ldrd_8(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off); + } else { + tcg_out_ld32_12(s, COND_AL, TCG_REG_R2, TCG_REG_R1, cmp_off); + } + } + + /* Load the tlb addend. */ + tcg_out_ld32_12(s, COND_AL, TCG_REG_R1, TCG_REG_R1, + offsetof(CPUTLBEntry, addend)); + + /* + * Check alignment, check comparators. + * Do this in 2-4 insns. Use MOVW for v7, if possible, + * to reduce the number of sequential conditional instructions. + * Almost all guests have at least 4k pages, which means that we need + * to clear at least 9 bits even for an 8-byte memory, which means it + * isn't worth checking for an immediate operand for BIC. + * + * For unaligned accesses, test the page of the last unit of alignment. + * This leaves the least significant alignment bits unchanged, and of + * course must be zero. + */ + t_addr = addrlo; + if (a_mask < s_mask) { + t_addr = TCG_REG_R0; + tcg_out_dat_imm(s, COND_AL, ARITH_ADD, t_addr, + addrlo, s_mask - a_mask); + } + if (use_armv7_instructions && TARGET_PAGE_BITS <= 16) { + tcg_out_movi32(s, COND_AL, TCG_REG_TMP, ~(TARGET_PAGE_MASK | a_mask)); + tcg_out_dat_reg(s, COND_AL, ARITH_BIC, TCG_REG_TMP, + t_addr, TCG_REG_TMP, 0); + tcg_out_dat_reg(s, COND_AL, ARITH_CMP, 0, TCG_REG_R2, TCG_REG_TMP, 0); + } else { + if (a_mask) { + tcg_debug_assert(a_mask <= 0xff); + tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask); + } + tcg_out_dat_reg(s, COND_AL, ARITH_MOV, TCG_REG_TMP, 0, t_addr, + SHIFT_IMM_LSR(TARGET_PAGE_BITS)); + tcg_out_dat_reg(s, (a_mask ? COND_EQ : COND_AL), ARITH_CMP, + 0, TCG_REG_R2, TCG_REG_TMP, + SHIFT_IMM_LSL(TARGET_PAGE_BITS)); + } + + if (TARGET_LONG_BITS == 64) { + tcg_out_dat_reg(s, COND_EQ, ARITH_CMP, 0, TCG_REG_R3, addrhi, 0); + } + + *h = (HostAddress){ + .cond = COND_AL, + .base = addrlo, + .index = TCG_REG_R1, + .index_scratch = true, + }; +#else + if (a_mask) { + ldst = new_ldst_label(s); + ldst->is_ld = is_ld; + ldst->oi = oi; + ldst->addrlo_reg = addrlo; + ldst->addrhi_reg = addrhi; + + /* We are expecting a_bits to max out at 7 */ + tcg_debug_assert(a_mask <= 0xff); + /* tst addr, #mask */ + tcg_out_dat_imm(s, COND_AL, ARITH_TST, 0, addrlo, a_mask); + } + + *h = (HostAddress){ + .cond = COND_AL, + .base = addrlo, + .index = guest_base ? TCG_REG_GUEST_BASE : -1, + .index_scratch = false, + }; +#endif + + return ldst; +} + static void tcg_out_qemu_ld_direct(TCGContext *s, MemOp opc, TCGReg datalo, TCGReg datahi, HostAddress h) { @@ -1799,37 +1785,28 @@ static void tcg_out_qemu_ld(TCGContext *s, TCGReg datalo, TCGReg datahi, MemOpIdx oi, TCGType data_type) { MemOp opc = get_memop(oi); + TCGLabelQemuLdst *ldst; HostAddress h; -#ifdef CONFIG_SOFTMMU - h.cond = COND_AL; - h.base = addrlo; - h.index_scratch = true; - h.index = tcg_out_tlb_read(s, addrlo, addrhi, opc, get_mmuidx(oi), 1); + ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, true); + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = datalo; + ldst->datahi_reg = datahi; - /* - * This a conditional BL only to load a pointer within this opcode into - * LR for the slow path. We will not be using the value for a tail call. - */ - tcg_insn_unit *label_ptr = s->code_ptr; - tcg_out_bl_imm(s, COND_NE, 0); + /* + * This a conditional BL only to load a pointer within this + * opcode into LR for the slow path. We will not be using + * the value for a tail call. + */ + ldst->label_ptr[0] = s->code_ptr; + tcg_out_bl_imm(s, COND_NE, 0); - tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h); - - add_qemu_ldst_label(s, true, oi, data_type, datalo, datahi, - addrlo, addrhi, s->code_ptr, label_ptr); -#else - unsigned a_bits = get_alignment_bits(opc); - if (a_bits) { - tcg_out_test_alignment(s, true, addrlo, addrhi, a_bits); + tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h); + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); + } else { + tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h); } - - h.cond = COND_AL; - h.base = addrlo; - h.index = guest_base ? TCG_REG_GUEST_BASE : -1; - h.index_scratch = false; - tcg_out_qemu_ld_direct(s, opc, datalo, datahi, h); -#endif } static void tcg_out_qemu_st_direct(TCGContext *s, MemOp opc, TCGReg datalo, @@ -1891,35 +1868,25 @@ static void tcg_out_qemu_st(TCGContext *s, TCGReg datalo, TCGReg datahi, MemOpIdx oi, TCGType data_type) { MemOp opc = get_memop(oi); + TCGLabelQemuLdst *ldst; HostAddress h; -#ifdef CONFIG_SOFTMMU - h.cond = COND_EQ; - h.base = addrlo; - h.index_scratch = true; - h.index = tcg_out_tlb_read(s, addrlo, addrhi, opc, get_mmuidx(oi), 0); - tcg_out_qemu_st_direct(s, opc, datalo, datahi, h); + ldst = prepare_host_addr(s, &h, addrlo, addrhi, oi, false); + if (ldst) { + ldst->type = data_type; + ldst->datalo_reg = datalo; + ldst->datahi_reg = datahi; - /* The conditional call must come last, as we're going to return here. */ - tcg_insn_unit *label_ptr = s->code_ptr; - tcg_out_bl_imm(s, COND_NE, 0); - - add_qemu_ldst_label(s, false, oi, data_type, datalo, datahi, - addrlo, addrhi, s->code_ptr, label_ptr); -#else - unsigned a_bits = get_alignment_bits(opc); - - h.cond = COND_AL; - if (a_bits) { - tcg_out_test_alignment(s, false, addrlo, addrhi, a_bits); h.cond = COND_EQ; - } + tcg_out_qemu_st_direct(s, opc, datalo, datahi, h); - h.base = addrlo; - h.index = guest_base ? TCG_REG_GUEST_BASE : -1; - h.index_scratch = false; - tcg_out_qemu_st_direct(s, opc, datalo, datahi, h); -#endif + /* The conditional call is last, as we're going to return here. */ + ldst->label_ptr[0] = s->code_ptr; + tcg_out_bl_imm(s, COND_NE, 0); + ldst->raddr = tcg_splitwx_to_rx(s->code_ptr); + } else { + tcg_out_qemu_st_direct(s, opc, datalo, datahi, h); + } } static void tcg_out_epilogue(TCGContext *s);