From eba40358b49cb09f4e24764db7951e30987aa416 Mon Sep 17 00:00:00 2001 From: Richard Henderson Date: Thu, 29 Oct 2020 13:18:12 -0700 Subject: [PATCH] tcg: Return the TB pointer from the rx region from exit_tb This produces a small pc-relative displacement within the generated code to the TB structure that preceeds it. Reviewed-by: Joelle van Dyne Signed-off-by: Richard Henderson --- accel/tcg/cpu-exec.c | 35 +++++++++++++++++++++-------------- tcg/tcg-op.c | 13 ++++++++++++- 2 files changed, 33 insertions(+), 15 deletions(-) diff --git a/accel/tcg/cpu-exec.c b/accel/tcg/cpu-exec.c index 29294f2804..e0df9b6a1d 100644 --- a/accel/tcg/cpu-exec.c +++ b/accel/tcg/cpu-exec.c @@ -154,13 +154,12 @@ static void init_delay_params(SyncClocks *sc, const CPUState *cpu) * TCG is not considered a security-sensitive part of QEMU so this does not * affect the impact of CFI in environment with high security requirements */ -QEMU_DISABLE_CFI -static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb) +static inline TranslationBlock * QEMU_DISABLE_CFI +cpu_tb_exec(CPUState *cpu, TranslationBlock *itb, int *tb_exit) { CPUArchState *env = cpu->env_ptr; uintptr_t ret; TranslationBlock *last_tb; - int tb_exit; const void *tb_ptr = itb->tc.ptr; qemu_log_mask_and_addr(CPU_LOG_EXEC, itb->pc, @@ -188,11 +187,20 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb) ret = tcg_qemu_tb_exec(env, tb_ptr); cpu->can_do_io = 1; - last_tb = (TranslationBlock *)(ret & ~TB_EXIT_MASK); - tb_exit = ret & TB_EXIT_MASK; - trace_exec_tb_exit(last_tb, tb_exit); + /* + * TODO: Delay swapping back to the read-write region of the TB + * until we actually need to modify the TB. The read-only copy, + * coming from the rx region, shares the same host TLB entry as + * the code that executed the exit_tb opcode that arrived here. + * If we insist on touching both the RX and the RW pages, we + * double the host TLB pressure. + */ + last_tb = tcg_splitwx_to_rw((void *)(ret & ~TB_EXIT_MASK)); + *tb_exit = ret & TB_EXIT_MASK; - if (tb_exit > TB_EXIT_IDX1) { + trace_exec_tb_exit(last_tb, *tb_exit); + + if (*tb_exit > TB_EXIT_IDX1) { /* We didn't start executing this TB (eg because the instruction * counter hit zero); we must restore the guest PC to the address * of the start of the TB. @@ -210,7 +218,7 @@ static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb) cc->set_pc(cpu, last_tb->pc); } } - return ret; + return last_tb; } #ifndef CONFIG_USER_ONLY @@ -221,6 +229,7 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles, { TranslationBlock *tb; uint32_t cflags = curr_cflags() | CF_NOCACHE; + int tb_exit; if (ignore_icount) { cflags &= ~CF_USE_ICOUNT; @@ -238,7 +247,7 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles, /* execute the generated code */ trace_exec_tb_nocache(tb, tb->pc); - cpu_tb_exec(cpu, tb); + cpu_tb_exec(cpu, tb, &tb_exit); mmap_lock(); tb_phys_invalidate(tb, -1); @@ -272,6 +281,7 @@ void cpu_exec_step_atomic(CPUState *cpu) uint32_t flags; uint32_t cflags = 1; uint32_t cf_mask = cflags & CF_HASH_MASK; + int tb_exit; if (sigsetjmp(cpu->jmp_env, 0) == 0) { start_exclusive(); @@ -288,7 +298,7 @@ void cpu_exec_step_atomic(CPUState *cpu) cpu_exec_enter(cpu); /* execute the generated code */ trace_exec_tb(tb, pc); - cpu_tb_exec(cpu, tb); + cpu_tb_exec(cpu, tb, &tb_exit); cpu_exec_exit(cpu); } else { /* @@ -684,13 +694,10 @@ static inline bool cpu_handle_interrupt(CPUState *cpu, static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb, TranslationBlock **last_tb, int *tb_exit) { - uintptr_t ret; int32_t insns_left; trace_exec_tb(tb, tb->pc); - ret = cpu_tb_exec(cpu, tb); - tb = (TranslationBlock *)(ret & ~TB_EXIT_MASK); - *tb_exit = ret & TB_EXIT_MASK; + tb = cpu_tb_exec(cpu, tb, tb_exit); if (*tb_exit != TB_EXIT_REQUESTED) { *last_tb = tb; return; diff --git a/tcg/tcg-op.c b/tcg/tcg-op.c index 19fa8e4691..0374b5d56d 100644 --- a/tcg/tcg-op.c +++ b/tcg/tcg-op.c @@ -2666,7 +2666,18 @@ void tcg_gen_extr32_i64(TCGv_i64 lo, TCGv_i64 hi, TCGv_i64 arg) void tcg_gen_exit_tb(const TranslationBlock *tb, unsigned idx) { - uintptr_t val = (uintptr_t)tb + idx; + /* + * Let the jit code return the read-only version of the + * TranslationBlock, so that we minimize the pc-relative + * distance of the address of the exit_tb code to TB. + * This will improve utilization of pc-relative address loads. + * + * TODO: Move this to translator_loop, so that all const + * TranslationBlock pointers refer to read-only memory. + * This requires coordination with targets that do not use + * the translator_loop. + */ + uintptr_t val = (uintptr_t)tcg_splitwx_to_rx((void *)tb) + idx; if (tb == NULL) { tcg_debug_assert(idx == 0);