tcg/aarch64: Use ADRP+ADD to compute target address

We use ADRP+ADD to compute the target address for goto_tb. This patch
introduces the NOP instruction which is used to align the above
instruction pair so that we can use one atomic instruction to patch
the destination offsets.

CC: Alex Bennée <alex.bennee@linaro.org>
Reviewed-by: Richard Henderson <rth@twiddle.net>
Signed-off-by: Pranith Kumar <bobby.prani@gmail.com>
Message-Id: <20170630143614.31059-2-bobby.prani@gmail.com>
Signed-off-by: Richard Henderson <rth@twiddle.net>
This commit is contained in:
Pranith Kumar 2017-06-30 10:36:13 -04:00 committed by Richard Henderson
parent 23b7aa1d2a
commit b68686bd4b
2 changed files with 31 additions and 7 deletions

View File

@ -504,7 +504,7 @@ static inline PageDesc *page_find(tb_page_addr_t index)
#elif defined(__powerpc__) #elif defined(__powerpc__)
# define MAX_CODE_GEN_BUFFER_SIZE (32u * 1024 * 1024) # define MAX_CODE_GEN_BUFFER_SIZE (32u * 1024 * 1024)
#elif defined(__aarch64__) #elif defined(__aarch64__)
# define MAX_CODE_GEN_BUFFER_SIZE (128ul * 1024 * 1024) # define MAX_CODE_GEN_BUFFER_SIZE (2ul * 1024 * 1024 * 1024)
#elif defined(__s390x__) #elif defined(__s390x__)
/* We have a +- 4GB range on the branches; leave some slop. */ /* We have a +- 4GB range on the branches; leave some slop. */
# define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024) # define MAX_CODE_GEN_BUFFER_SIZE (3ul * 1024 * 1024 * 1024)

View File

@ -372,6 +372,7 @@ typedef enum {
I3510_EON = 0x4a200000, I3510_EON = 0x4a200000,
I3510_ANDS = 0x6a000000, I3510_ANDS = 0x6a000000,
NOP = 0xd503201f,
/* System instructions. */ /* System instructions. */
DMB_ISH = 0xd50338bf, DMB_ISH = 0xd50338bf,
DMB_LD = 0x00000100, DMB_LD = 0x00000100,
@ -865,11 +866,27 @@ static inline void tcg_out_call(TCGContext *s, tcg_insn_unit *target)
void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr) void aarch64_tb_set_jmp_target(uintptr_t jmp_addr, uintptr_t addr)
{ {
tcg_insn_unit *code_ptr = (tcg_insn_unit *)jmp_addr; tcg_insn_unit i1, i2;
tcg_insn_unit *target = (tcg_insn_unit *)addr; TCGType rt = TCG_TYPE_I64;
TCGReg rd = TCG_REG_TMP;
uint64_t pair;
reloc_pc26_atomic(code_ptr, target); ptrdiff_t offset = addr - jmp_addr;
flush_icache_range(jmp_addr, jmp_addr + 4);
if (offset == sextract64(offset, 0, 26)) {
i1 = I3206_B | ((offset >> 2) & 0x3ffffff);
i2 = NOP;
} else {
offset = (addr >> 12) - (jmp_addr >> 12);
/* patch ADRP */
i1 = I3406_ADRP | (offset & 3) << 29 | (offset & 0x1ffffc) << (5 - 2) | rd;
/* patch ADDI */
i2 = I3401_ADDI | rt << 31 | (addr & 0xfff) << 10 | rd << 5 | rd;
}
pair = (uint64_t)i2 << 32 | i1;
atomic_set((uint64_t *)jmp_addr, pair);
flush_icache_range(jmp_addr, jmp_addr + 8);
} }
static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l) static inline void tcg_out_goto_label(TCGContext *s, TCGLabel *l)
@ -1388,10 +1405,17 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
#endif #endif
/* consistency for USE_DIRECT_JUMP */ /* consistency for USE_DIRECT_JUMP */
tcg_debug_assert(s->tb_jmp_insn_offset != NULL); tcg_debug_assert(s->tb_jmp_insn_offset != NULL);
/* Ensure that ADRP+ADD are 8-byte aligned so that an atomic
write can be used to patch the target address. */
if ((uintptr_t)s->code_ptr & 7) {
tcg_out32(s, NOP);
}
s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s); s->tb_jmp_insn_offset[a0] = tcg_current_code_size(s);
/* actual branch destination will be patched by /* actual branch destination will be patched by
aarch64_tb_set_jmp_target later, beware retranslation. */ aarch64_tb_set_jmp_target later. */
tcg_out_goto_noaddr(s); tcg_out_insn(s, 3406, ADRP, TCG_REG_TMP, 0);
tcg_out_insn(s, 3401, ADDI, TCG_TYPE_I64, TCG_REG_TMP, TCG_REG_TMP, 0);
tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s); s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
break; break;