b7e4afbd9f
This can replace four other variables that are references into the TranslationBlock structure. Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
794 lines
26 KiB
C
794 lines
26 KiB
C
/*
|
|
* Host code generation
|
|
*
|
|
* Copyright (c) 2003 Fabrice Bellard
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
|
|
#define NO_CPU_IO_DEFS
|
|
#include "trace.h"
|
|
#include "disas/disas.h"
|
|
#include "exec/exec-all.h"
|
|
#include "tcg/tcg.h"
|
|
#if defined(CONFIG_USER_ONLY)
|
|
#include "qemu.h"
|
|
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
|
|
#include <sys/param.h>
|
|
#if __FreeBSD_version >= 700104
|
|
#define HAVE_KINFO_GETVMMAP
|
|
#define sigqueue sigqueue_freebsd /* avoid redefinition */
|
|
#include <sys/proc.h>
|
|
#include <machine/profile.h>
|
|
#define _KERNEL
|
|
#include <sys/user.h>
|
|
#undef _KERNEL
|
|
#undef sigqueue
|
|
#include <libutil.h>
|
|
#endif
|
|
#endif
|
|
#else
|
|
#include "exec/ram_addr.h"
|
|
#endif
|
|
|
|
#include "exec/cputlb.h"
|
|
#include "exec/translate-all.h"
|
|
#include "exec/translator.h"
|
|
#include "qemu/bitmap.h"
|
|
#include "qemu/qemu-print.h"
|
|
#include "qemu/timer.h"
|
|
#include "qemu/main-loop.h"
|
|
#include "qemu/cacheinfo.h"
|
|
#include "exec/log.h"
|
|
#include "sysemu/cpus.h"
|
|
#include "sysemu/cpu-timers.h"
|
|
#include "sysemu/tcg.h"
|
|
#include "qapi/error.h"
|
|
#include "hw/core/tcg-cpu-ops.h"
|
|
#include "tb-jmp-cache.h"
|
|
#include "tb-hash.h"
|
|
#include "tb-context.h"
|
|
#include "internal.h"
|
|
#include "perf.h"
|
|
|
|
/* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
|
|
QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
|
|
sizeof_field(TranslationBlock, trace_vcpu_dstate)
|
|
* BITS_PER_BYTE);
|
|
|
|
TBContext tb_ctx;
|
|
|
|
/* Encode VAL as a signed leb128 sequence at P.
|
|
Return P incremented past the encoded value. */
|
|
static uint8_t *encode_sleb128(uint8_t *p, target_long val)
|
|
{
|
|
int more, byte;
|
|
|
|
do {
|
|
byte = val & 0x7f;
|
|
val >>= 7;
|
|
more = !((val == 0 && (byte & 0x40) == 0)
|
|
|| (val == -1 && (byte & 0x40) != 0));
|
|
if (more) {
|
|
byte |= 0x80;
|
|
}
|
|
*p++ = byte;
|
|
} while (more);
|
|
|
|
return p;
|
|
}
|
|
|
|
/* Decode a signed leb128 sequence at *PP; increment *PP past the
|
|
decoded value. Return the decoded value. */
|
|
static target_long decode_sleb128(const uint8_t **pp)
|
|
{
|
|
const uint8_t *p = *pp;
|
|
target_long val = 0;
|
|
int byte, shift = 0;
|
|
|
|
do {
|
|
byte = *p++;
|
|
val |= (target_ulong)(byte & 0x7f) << shift;
|
|
shift += 7;
|
|
} while (byte & 0x80);
|
|
if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
|
|
val |= -(target_ulong)1 << shift;
|
|
}
|
|
|
|
*pp = p;
|
|
return val;
|
|
}
|
|
|
|
/* Encode the data collected about the instructions while compiling TB.
|
|
Place the data at BLOCK, and return the number of bytes consumed.
|
|
|
|
The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
|
|
which come from the target's insn_start data, followed by a uintptr_t
|
|
which comes from the host pc of the end of the code implementing the insn.
|
|
|
|
Each line of the table is encoded as sleb128 deltas from the previous
|
|
line. The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
|
|
That is, the first column is seeded with the guest pc, the last column
|
|
with the host pc, and the middle columns with zeros. */
|
|
|
|
static int encode_search(TranslationBlock *tb, uint8_t *block)
|
|
{
|
|
uint8_t *highwater = tcg_ctx->code_gen_highwater;
|
|
uint8_t *p = block;
|
|
int i, j, n;
|
|
|
|
for (i = 0, n = tb->icount; i < n; ++i) {
|
|
target_ulong prev;
|
|
|
|
for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
|
|
if (i == 0) {
|
|
prev = (!TARGET_TB_PCREL && j == 0 ? tb_pc(tb) : 0);
|
|
} else {
|
|
prev = tcg_ctx->gen_insn_data[i - 1][j];
|
|
}
|
|
p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev);
|
|
}
|
|
prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]);
|
|
p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev);
|
|
|
|
/* Test for (pending) buffer overflow. The assumption is that any
|
|
one row beginning below the high water mark cannot overrun
|
|
the buffer completely. Thus we can test for overflow after
|
|
encoding a row without having to check during encoding. */
|
|
if (unlikely(p > highwater)) {
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
return p - block;
|
|
}
|
|
|
|
static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc,
|
|
uint64_t *data)
|
|
{
|
|
uintptr_t iter_pc = (uintptr_t)tb->tc.ptr;
|
|
const uint8_t *p = tb->tc.ptr + tb->tc.size;
|
|
int i, j, num_insns = tb->icount;
|
|
|
|
host_pc -= GETPC_ADJ;
|
|
|
|
if (host_pc < iter_pc) {
|
|
return -1;
|
|
}
|
|
|
|
memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS);
|
|
if (!TARGET_TB_PCREL) {
|
|
data[0] = tb_pc(tb);
|
|
}
|
|
|
|
/*
|
|
* Reconstruct the stored insn data while looking for the point
|
|
* at which the end of the insn exceeds host_pc.
|
|
*/
|
|
for (i = 0; i < num_insns; ++i) {
|
|
for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
|
|
data[j] += decode_sleb128(&p);
|
|
}
|
|
iter_pc += decode_sleb128(&p);
|
|
if (iter_pc > host_pc) {
|
|
return num_insns - i;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
* The cpu state corresponding to 'host_pc' is restored in
|
|
* preparation for exiting the TB.
|
|
*/
|
|
void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
|
|
uintptr_t host_pc)
|
|
{
|
|
uint64_t data[TARGET_INSN_START_WORDS];
|
|
#ifdef CONFIG_PROFILER
|
|
TCGProfile *prof = &tcg_ctx->prof;
|
|
int64_t ti = profile_getclock();
|
|
#endif
|
|
int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);
|
|
|
|
if (insns_left < 0) {
|
|
return;
|
|
}
|
|
|
|
if (tb_cflags(tb) & CF_USE_ICOUNT) {
|
|
assert(icount_enabled());
|
|
/*
|
|
* Reset the cycle counter to the start of the block and
|
|
* shift if to the number of actually executed instructions.
|
|
*/
|
|
cpu_neg(cpu)->icount_decr.u16.low += insns_left;
|
|
}
|
|
|
|
cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
|
|
|
|
#ifdef CONFIG_PROFILER
|
|
qatomic_set(&prof->restore_time,
|
|
prof->restore_time + profile_getclock() - ti);
|
|
qatomic_set(&prof->restore_count, prof->restore_count + 1);
|
|
#endif
|
|
}
|
|
|
|
bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
|
|
{
|
|
/*
|
|
* The host_pc has to be in the rx region of the code buffer.
|
|
* If it is not we will not be able to resolve it here.
|
|
* The two cases where host_pc will not be correct are:
|
|
*
|
|
* - fault during translation (instruction fetch)
|
|
* - fault from helper (not using GETPC() macro)
|
|
*
|
|
* Either way we need return early as we can't resolve it here.
|
|
*/
|
|
if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
|
|
TranslationBlock *tb = tcg_tb_lookup(host_pc);
|
|
if (tb) {
|
|
cpu_restore_state_from_tb(cpu, tb, host_pc);
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data)
|
|
{
|
|
if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
|
|
TranslationBlock *tb = tcg_tb_lookup(host_pc);
|
|
if (tb) {
|
|
return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void page_init(void)
|
|
{
|
|
page_size_init();
|
|
page_table_config_init();
|
|
}
|
|
|
|
/*
|
|
* Isolate the portion of code gen which can setjmp/longjmp.
|
|
* Return the size of the generated code, or negative on error.
|
|
*/
|
|
static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
|
|
target_ulong pc, void *host_pc,
|
|
int *max_insns, int64_t *ti)
|
|
{
|
|
int ret = sigsetjmp(tcg_ctx->jmp_trans, 0);
|
|
if (unlikely(ret != 0)) {
|
|
return ret;
|
|
}
|
|
|
|
tcg_func_start(tcg_ctx);
|
|
|
|
tcg_ctx->cpu = env_cpu(env);
|
|
gen_intermediate_code(env_cpu(env), tb, *max_insns, pc, host_pc);
|
|
assert(tb->size != 0);
|
|
tcg_ctx->cpu = NULL;
|
|
*max_insns = tb->icount;
|
|
|
|
#ifdef CONFIG_PROFILER
|
|
qatomic_set(&tcg_ctx->prof.tb_count, tcg_ctx->prof.tb_count + 1);
|
|
qatomic_set(&tcg_ctx->prof.interm_time,
|
|
tcg_ctx->prof.interm_time + profile_getclock() - *ti);
|
|
*ti = profile_getclock();
|
|
#endif
|
|
|
|
return tcg_gen_code(tcg_ctx, tb, pc);
|
|
}
|
|
|
|
/* Called with mmap_lock held for user mode emulation. */
|
|
TranslationBlock *tb_gen_code(CPUState *cpu,
|
|
target_ulong pc, target_ulong cs_base,
|
|
uint32_t flags, int cflags)
|
|
{
|
|
CPUArchState *env = cpu->env_ptr;
|
|
TranslationBlock *tb, *existing_tb;
|
|
tb_page_addr_t phys_pc;
|
|
tcg_insn_unit *gen_code_buf;
|
|
int gen_code_size, search_size, max_insns;
|
|
#ifdef CONFIG_PROFILER
|
|
TCGProfile *prof = &tcg_ctx->prof;
|
|
#endif
|
|
int64_t ti;
|
|
void *host_pc;
|
|
|
|
assert_memory_lock();
|
|
qemu_thread_jit_write();
|
|
|
|
phys_pc = get_page_addr_code_hostp(env, pc, &host_pc);
|
|
|
|
if (phys_pc == -1) {
|
|
/* Generate a one-shot TB with 1 insn in it */
|
|
cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1;
|
|
}
|
|
|
|
max_insns = cflags & CF_COUNT_MASK;
|
|
if (max_insns == 0) {
|
|
max_insns = TCG_MAX_INSNS;
|
|
}
|
|
QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS);
|
|
|
|
buffer_overflow:
|
|
tb = tcg_tb_alloc(tcg_ctx);
|
|
if (unlikely(!tb)) {
|
|
/* flush must be done */
|
|
tb_flush(cpu);
|
|
mmap_unlock();
|
|
/* Make the execution loop process the flush as soon as possible. */
|
|
cpu->exception_index = EXCP_INTERRUPT;
|
|
cpu_loop_exit(cpu);
|
|
}
|
|
|
|
gen_code_buf = tcg_ctx->code_gen_ptr;
|
|
tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
|
|
#if !TARGET_TB_PCREL
|
|
tb->pc = pc;
|
|
#endif
|
|
tb->cs_base = cs_base;
|
|
tb->flags = flags;
|
|
tb->cflags = cflags;
|
|
tb->trace_vcpu_dstate = *cpu->trace_dstate;
|
|
tb_set_page_addr0(tb, phys_pc);
|
|
tb_set_page_addr1(tb, -1);
|
|
tcg_ctx->gen_tb = tb;
|
|
tb_overflow:
|
|
|
|
#ifdef CONFIG_PROFILER
|
|
/* includes aborted translations because of exceptions */
|
|
qatomic_set(&prof->tb_count1, prof->tb_count1 + 1);
|
|
ti = profile_getclock();
|
|
#endif
|
|
|
|
trace_translate_block(tb, pc, tb->tc.ptr);
|
|
|
|
gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti);
|
|
if (unlikely(gen_code_size < 0)) {
|
|
switch (gen_code_size) {
|
|
case -1:
|
|
/*
|
|
* Overflow of code_gen_buffer, or the current slice of it.
|
|
*
|
|
* TODO: We don't need to re-do gen_intermediate_code, nor
|
|
* should we re-do the tcg optimization currently hidden
|
|
* inside tcg_gen_code. All that should be required is to
|
|
* flush the TBs, allocate a new TB, re-initialize it per
|
|
* above, and re-do the actual code generation.
|
|
*/
|
|
qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
|
|
"Restarting code generation for "
|
|
"code_gen_buffer overflow\n");
|
|
goto buffer_overflow;
|
|
|
|
case -2:
|
|
/*
|
|
* The code generated for the TranslationBlock is too large.
|
|
* The maximum size allowed by the unwind info is 64k.
|
|
* There may be stricter constraints from relocations
|
|
* in the tcg backend.
|
|
*
|
|
* Try again with half as many insns as we attempted this time.
|
|
* If a single insn overflows, there's a bug somewhere...
|
|
*/
|
|
assert(max_insns > 1);
|
|
max_insns /= 2;
|
|
qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
|
|
"Restarting code generation with "
|
|
"smaller translation block (max %d insns)\n",
|
|
max_insns);
|
|
goto tb_overflow;
|
|
|
|
default:
|
|
g_assert_not_reached();
|
|
}
|
|
}
|
|
search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
|
|
if (unlikely(search_size < 0)) {
|
|
goto buffer_overflow;
|
|
}
|
|
tb->tc.size = gen_code_size;
|
|
|
|
/*
|
|
* For TARGET_TB_PCREL, attribute all executions of the generated
|
|
* code to its first mapping.
|
|
*/
|
|
perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf));
|
|
|
|
#ifdef CONFIG_PROFILER
|
|
qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
|
|
qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
|
|
qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size);
|
|
qatomic_set(&prof->search_out_len, prof->search_out_len + search_size);
|
|
#endif
|
|
|
|
#ifdef DEBUG_DISAS
|
|
if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
|
|
qemu_log_in_addr_range(pc)) {
|
|
FILE *logfile = qemu_log_trylock();
|
|
if (logfile) {
|
|
int code_size, data_size;
|
|
const tcg_target_ulong *rx_data_gen_ptr;
|
|
size_t chunk_start;
|
|
int insn = 0;
|
|
|
|
if (tcg_ctx->data_gen_ptr) {
|
|
rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr);
|
|
code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr;
|
|
data_size = gen_code_size - code_size;
|
|
} else {
|
|
rx_data_gen_ptr = 0;
|
|
code_size = gen_code_size;
|
|
data_size = 0;
|
|
}
|
|
|
|
/* Dump header and the first instruction */
|
|
fprintf(logfile, "OUT: [size=%d]\n", gen_code_size);
|
|
fprintf(logfile,
|
|
" -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n",
|
|
tcg_ctx->gen_insn_data[insn][0]);
|
|
chunk_start = tcg_ctx->gen_insn_end_off[insn];
|
|
disas(logfile, tb->tc.ptr, chunk_start);
|
|
|
|
/*
|
|
* Dump each instruction chunk, wrapping up empty chunks into
|
|
* the next instruction. The whole array is offset so the
|
|
* first entry is the beginning of the 2nd instruction.
|
|
*/
|
|
while (insn < tb->icount) {
|
|
size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
|
|
if (chunk_end > chunk_start) {
|
|
fprintf(logfile, " -- guest addr 0x" TARGET_FMT_lx "\n",
|
|
tcg_ctx->gen_insn_data[insn][0]);
|
|
disas(logfile, tb->tc.ptr + chunk_start,
|
|
chunk_end - chunk_start);
|
|
chunk_start = chunk_end;
|
|
}
|
|
insn++;
|
|
}
|
|
|
|
if (chunk_start < code_size) {
|
|
fprintf(logfile, " -- tb slow paths + alignment\n");
|
|
disas(logfile, tb->tc.ptr + chunk_start,
|
|
code_size - chunk_start);
|
|
}
|
|
|
|
/* Finally dump any data we may have after the block */
|
|
if (data_size) {
|
|
int i;
|
|
fprintf(logfile, " data: [size=%d]\n", data_size);
|
|
for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
|
|
if (sizeof(tcg_target_ulong) == 8) {
|
|
fprintf(logfile,
|
|
"0x%08" PRIxPTR ": .quad 0x%016" TCG_PRIlx "\n",
|
|
(uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
|
|
} else if (sizeof(tcg_target_ulong) == 4) {
|
|
fprintf(logfile,
|
|
"0x%08" PRIxPTR ": .long 0x%08" TCG_PRIlx "\n",
|
|
(uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
|
|
} else {
|
|
qemu_build_not_reached();
|
|
}
|
|
}
|
|
}
|
|
fprintf(logfile, "\n");
|
|
qemu_log_unlock(logfile);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
|
|
ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
|
|
CODE_GEN_ALIGN));
|
|
|
|
/* init jump list */
|
|
qemu_spin_init(&tb->jmp_lock);
|
|
tb->jmp_list_head = (uintptr_t)NULL;
|
|
tb->jmp_list_next[0] = (uintptr_t)NULL;
|
|
tb->jmp_list_next[1] = (uintptr_t)NULL;
|
|
tb->jmp_dest[0] = (uintptr_t)NULL;
|
|
tb->jmp_dest[1] = (uintptr_t)NULL;
|
|
|
|
/* init original jump addresses which have been set during tcg_gen_code() */
|
|
if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) {
|
|
tb_reset_jump(tb, 0);
|
|
}
|
|
if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) {
|
|
tb_reset_jump(tb, 1);
|
|
}
|
|
|
|
/*
|
|
* If the TB is not associated with a physical RAM page then it must be
|
|
* a temporary one-insn TB, and we have nothing left to do. Return early
|
|
* before attempting to link to other TBs or add to the lookup table.
|
|
*/
|
|
if (tb_page_addr0(tb) == -1) {
|
|
return tb;
|
|
}
|
|
|
|
/*
|
|
* Insert TB into the corresponding region tree before publishing it
|
|
* through QHT. Otherwise rewinding happened in the TB might fail to
|
|
* lookup itself using host PC.
|
|
*/
|
|
tcg_tb_insert(tb);
|
|
|
|
/*
|
|
* No explicit memory barrier is required -- tb_link_page() makes the
|
|
* TB visible in a consistent state.
|
|
*/
|
|
existing_tb = tb_link_page(tb, tb_page_addr0(tb), tb_page_addr1(tb));
|
|
/* if the TB already exists, discard what we just translated */
|
|
if (unlikely(existing_tb != tb)) {
|
|
uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
|
|
|
|
orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
|
|
qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
|
|
tcg_tb_remove(tb);
|
|
return existing_tb;
|
|
}
|
|
return tb;
|
|
}
|
|
|
|
/* user-mode: call with mmap_lock held */
|
|
void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
|
|
{
|
|
TranslationBlock *tb;
|
|
|
|
assert_memory_lock();
|
|
|
|
tb = tcg_tb_lookup(retaddr);
|
|
if (tb) {
|
|
/* We can use retranslation to find the PC. */
|
|
cpu_restore_state_from_tb(cpu, tb, retaddr);
|
|
tb_phys_invalidate(tb, -1);
|
|
} else {
|
|
/* The exception probably happened in a helper. The CPU state should
|
|
have been saved before calling it. Fetch the PC from there. */
|
|
CPUArchState *env = cpu->env_ptr;
|
|
target_ulong pc, cs_base;
|
|
tb_page_addr_t addr;
|
|
uint32_t flags;
|
|
|
|
cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
|
|
addr = get_page_addr_code(env, pc);
|
|
if (addr != -1) {
|
|
tb_invalidate_phys_range(addr, addr + 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifndef CONFIG_USER_ONLY
|
|
/*
|
|
* In deterministic execution mode, instructions doing device I/Os
|
|
* must be at the end of the TB.
|
|
*
|
|
* Called by softmmu_template.h, with iothread mutex not held.
|
|
*/
|
|
void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
|
|
{
|
|
TranslationBlock *tb;
|
|
CPUClass *cc;
|
|
uint32_t n;
|
|
|
|
tb = tcg_tb_lookup(retaddr);
|
|
if (!tb) {
|
|
cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
|
|
(void *)retaddr);
|
|
}
|
|
cpu_restore_state_from_tb(cpu, tb, retaddr);
|
|
|
|
/*
|
|
* Some guests must re-execute the branch when re-executing a delay
|
|
* slot instruction. When this is the case, adjust icount and N
|
|
* to account for the re-execution of the branch.
|
|
*/
|
|
n = 1;
|
|
cc = CPU_GET_CLASS(cpu);
|
|
if (cc->tcg_ops->io_recompile_replay_branch &&
|
|
cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
|
|
cpu_neg(cpu)->icount_decr.u16.low++;
|
|
n = 2;
|
|
}
|
|
|
|
/*
|
|
* Exit the loop and potentially generate a new TB executing the
|
|
* just the I/O insns. We also limit instrumentation to memory
|
|
* operations only (which execute after completion) so we don't
|
|
* double instrument the instruction.
|
|
*/
|
|
cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
|
|
|
|
if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
|
|
target_ulong pc = log_pc(cpu, tb);
|
|
if (qemu_log_in_addr_range(pc)) {
|
|
qemu_log("cpu_io_recompile: rewound execution of TB to "
|
|
TARGET_FMT_lx "\n", pc);
|
|
}
|
|
}
|
|
|
|
cpu_loop_exit_noexc(cpu);
|
|
}
|
|
|
|
static void print_qht_statistics(struct qht_stats hst, GString *buf)
|
|
{
|
|
uint32_t hgram_opts;
|
|
size_t hgram_bins;
|
|
char *hgram;
|
|
|
|
if (!hst.head_buckets) {
|
|
return;
|
|
}
|
|
g_string_append_printf(buf, "TB hash buckets %zu/%zu "
|
|
"(%0.2f%% head buckets used)\n",
|
|
hst.used_head_buckets, hst.head_buckets,
|
|
(double)hst.used_head_buckets /
|
|
hst.head_buckets * 100);
|
|
|
|
hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
|
|
hgram_opts |= QDIST_PR_100X | QDIST_PR_PERCENT;
|
|
if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
|
|
hgram_opts |= QDIST_PR_NODECIMAL;
|
|
}
|
|
hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
|
|
g_string_append_printf(buf, "TB hash occupancy %0.2f%% avg chain occ. "
|
|
"Histogram: %s\n",
|
|
qdist_avg(&hst.occupancy) * 100, hgram);
|
|
g_free(hgram);
|
|
|
|
hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
|
|
hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
|
|
if (hgram_bins > 10) {
|
|
hgram_bins = 10;
|
|
} else {
|
|
hgram_bins = 0;
|
|
hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
|
|
}
|
|
hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
|
|
g_string_append_printf(buf, "TB hash avg chain %0.3f buckets. "
|
|
"Histogram: %s\n",
|
|
qdist_avg(&hst.chain), hgram);
|
|
g_free(hgram);
|
|
}
|
|
|
|
struct tb_tree_stats {
|
|
size_t nb_tbs;
|
|
size_t host_size;
|
|
size_t target_size;
|
|
size_t max_target_size;
|
|
size_t direct_jmp_count;
|
|
size_t direct_jmp2_count;
|
|
size_t cross_page;
|
|
};
|
|
|
|
static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
|
|
{
|
|
const TranslationBlock *tb = value;
|
|
struct tb_tree_stats *tst = data;
|
|
|
|
tst->nb_tbs++;
|
|
tst->host_size += tb->tc.size;
|
|
tst->target_size += tb->size;
|
|
if (tb->size > tst->max_target_size) {
|
|
tst->max_target_size = tb->size;
|
|
}
|
|
if (tb_page_addr1(tb) != -1) {
|
|
tst->cross_page++;
|
|
}
|
|
if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) {
|
|
tst->direct_jmp_count++;
|
|
if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) {
|
|
tst->direct_jmp2_count++;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void dump_exec_info(GString *buf)
|
|
{
|
|
struct tb_tree_stats tst = {};
|
|
struct qht_stats hst;
|
|
size_t nb_tbs, flush_full, flush_part, flush_elide;
|
|
|
|
tcg_tb_foreach(tb_tree_stats_iter, &tst);
|
|
nb_tbs = tst.nb_tbs;
|
|
/* XXX: avoid using doubles ? */
|
|
g_string_append_printf(buf, "Translation buffer state:\n");
|
|
/*
|
|
* Report total code size including the padding and TB structs;
|
|
* otherwise users might think "-accel tcg,tb-size" is not honoured.
|
|
* For avg host size we use the precise numbers from tb_tree_stats though.
|
|
*/
|
|
g_string_append_printf(buf, "gen code size %zu/%zu\n",
|
|
tcg_code_size(), tcg_code_capacity());
|
|
g_string_append_printf(buf, "TB count %zu\n", nb_tbs);
|
|
g_string_append_printf(buf, "TB avg target size %zu max=%zu bytes\n",
|
|
nb_tbs ? tst.target_size / nb_tbs : 0,
|
|
tst.max_target_size);
|
|
g_string_append_printf(buf, "TB avg host size %zu bytes "
|
|
"(expansion ratio: %0.1f)\n",
|
|
nb_tbs ? tst.host_size / nb_tbs : 0,
|
|
tst.target_size ?
|
|
(double)tst.host_size / tst.target_size : 0);
|
|
g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n",
|
|
tst.cross_page,
|
|
nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
|
|
g_string_append_printf(buf, "direct jump count %zu (%zu%%) "
|
|
"(2 jumps=%zu %zu%%)\n",
|
|
tst.direct_jmp_count,
|
|
nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
|
|
tst.direct_jmp2_count,
|
|
nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
|
|
|
|
qht_statistics_init(&tb_ctx.htable, &hst);
|
|
print_qht_statistics(hst, buf);
|
|
qht_statistics_destroy(&hst);
|
|
|
|
g_string_append_printf(buf, "\nStatistics:\n");
|
|
g_string_append_printf(buf, "TB flush count %u\n",
|
|
qatomic_read(&tb_ctx.tb_flush_count));
|
|
g_string_append_printf(buf, "TB invalidate count %u\n",
|
|
qatomic_read(&tb_ctx.tb_phys_invalidate_count));
|
|
|
|
tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
|
|
g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full);
|
|
g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part);
|
|
g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide);
|
|
tcg_dump_info(buf);
|
|
}
|
|
|
|
#else /* CONFIG_USER_ONLY */
|
|
|
|
void cpu_interrupt(CPUState *cpu, int mask)
|
|
{
|
|
g_assert(qemu_mutex_iothread_locked());
|
|
cpu->interrupt_request |= mask;
|
|
qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
|
|
}
|
|
|
|
#endif /* CONFIG_USER_ONLY */
|
|
|
|
/*
|
|
* Called by generic code at e.g. cpu reset after cpu creation,
|
|
* therefore we must be prepared to allocate the jump cache.
|
|
*/
|
|
void tcg_flush_jmp_cache(CPUState *cpu)
|
|
{
|
|
CPUJumpCache *jc = cpu->tb_jmp_cache;
|
|
|
|
/* During early initialization, the cache may not yet be allocated. */
|
|
if (unlikely(jc == NULL)) {
|
|
return;
|
|
}
|
|
|
|
for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) {
|
|
qatomic_set(&jc->array[i].tb, NULL);
|
|
}
|
|
}
|
|
|
|
/* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
|
|
void tcg_flush_softmmu_tlb(CPUState *cs)
|
|
{
|
|
#ifdef CONFIG_SOFTMMU
|
|
tlb_flush(cs);
|
|
#endif
|
|
}
|