e60a7d0d4d
In commitf0a08b0913
we changed the type of the PC from target_ulong to vaddr. In doing so we inadvertently dropped the zero-padding on the PC in trace lines (the second item inside the [] in these lines). They used to look like this on AArch64, for instance: Trace 0: 0x7f2260000100 [00000000/0000000040000000/00000061/ff200000] and now they look like this: Trace 0: 0x7f4f50000100 [00000000/40000000/00000061/ff200000] and if the PC happens to be somewhere low like 0x5000 then the field is shown as /5000/. This is because TARGET_FMT_lx is a "%08x" or "%016x" specifier, depending on TARGET_LONG_SIZE, whereas VADDR_PRIx is just PRIx64 with no width specifier. Restore the zero-padding by adding an 016 width specifier to this tracing and a couple of others that were similarly recently changed to use VADDR_PRIx without a width specifier. We can't unfortunately restore the "32-bit guests are padded to 8 hex digits and 64-bit guests to 16 hex digits" behaviour so easily. Fixes:f0a08b0913
("accel/tcg/cpu-exec.c: Widen pc to vaddr") Signed-off-by: Peter Maydell <peter.maydell@linaro.org> Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> Reviewed-by: Anton Johansson <anjo@rev.ng> Message-id: 20230711165434.4123674-1-peter.maydell@linaro.org
811 lines
26 KiB
C
811 lines
26 KiB
C
/*
|
|
* Host code generation
|
|
*
|
|
* Copyright (c) 2003 Fabrice Bellard
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
|
|
#include "trace.h"
|
|
#include "disas/disas.h"
|
|
#include "exec/exec-all.h"
|
|
#include "tcg/tcg.h"
|
|
#if defined(CONFIG_USER_ONLY)
|
|
#include "qemu.h"
|
|
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
|
|
#include <sys/param.h>
|
|
#if __FreeBSD_version >= 700104
|
|
#define HAVE_KINFO_GETVMMAP
|
|
#define sigqueue sigqueue_freebsd /* avoid redefinition */
|
|
#include <sys/proc.h>
|
|
#include <machine/profile.h>
|
|
#define _KERNEL
|
|
#include <sys/user.h>
|
|
#undef _KERNEL
|
|
#undef sigqueue
|
|
#include <libutil.h>
|
|
#endif
|
|
#endif
|
|
#else
|
|
#include "exec/ram_addr.h"
|
|
#endif
|
|
|
|
#include "exec/cputlb.h"
|
|
#include "exec/translate-all.h"
|
|
#include "exec/translator.h"
|
|
#include "exec/tb-flush.h"
|
|
#include "qemu/bitmap.h"
|
|
#include "qemu/qemu-print.h"
|
|
#include "qemu/main-loop.h"
|
|
#include "qemu/cacheinfo.h"
|
|
#include "qemu/timer.h"
|
|
#include "exec/log.h"
|
|
#include "sysemu/cpus.h"
|
|
#include "sysemu/cpu-timers.h"
|
|
#include "sysemu/tcg.h"
|
|
#include "qapi/error.h"
|
|
#include "hw/core/tcg-cpu-ops.h"
|
|
#include "tb-jmp-cache.h"
|
|
#include "tb-hash.h"
|
|
#include "tb-context.h"
|
|
#include "internal.h"
|
|
#include "perf.h"
|
|
#include "tcg/insn-start-words.h"
|
|
|
|
TBContext tb_ctx;
|
|
|
|
/*
|
|
* Encode VAL as a signed leb128 sequence at P.
|
|
* Return P incremented past the encoded value.
|
|
*/
|
|
static uint8_t *encode_sleb128(uint8_t *p, int64_t val)
|
|
{
|
|
int more, byte;
|
|
|
|
do {
|
|
byte = val & 0x7f;
|
|
val >>= 7;
|
|
more = !((val == 0 && (byte & 0x40) == 0)
|
|
|| (val == -1 && (byte & 0x40) != 0));
|
|
if (more) {
|
|
byte |= 0x80;
|
|
}
|
|
*p++ = byte;
|
|
} while (more);
|
|
|
|
return p;
|
|
}
|
|
|
|
/*
|
|
* Decode a signed leb128 sequence at *PP; increment *PP past the
|
|
* decoded value. Return the decoded value.
|
|
*/
|
|
static int64_t decode_sleb128(const uint8_t **pp)
|
|
{
|
|
const uint8_t *p = *pp;
|
|
int64_t val = 0;
|
|
int byte, shift = 0;
|
|
|
|
do {
|
|
byte = *p++;
|
|
val |= (int64_t)(byte & 0x7f) << shift;
|
|
shift += 7;
|
|
} while (byte & 0x80);
|
|
if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
|
|
val |= -(int64_t)1 << shift;
|
|
}
|
|
|
|
*pp = p;
|
|
return val;
|
|
}
|
|
|
|
/* Encode the data collected about the instructions while compiling TB.
|
|
Place the data at BLOCK, and return the number of bytes consumed.
|
|
|
|
The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
|
|
which come from the target's insn_start data, followed by a uintptr_t
|
|
which comes from the host pc of the end of the code implementing the insn.
|
|
|
|
Each line of the table is encoded as sleb128 deltas from the previous
|
|
line. The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
|
|
That is, the first column is seeded with the guest pc, the last column
|
|
with the host pc, and the middle columns with zeros. */
|
|
|
|
static int encode_search(TranslationBlock *tb, uint8_t *block)
|
|
{
|
|
uint8_t *highwater = tcg_ctx->code_gen_highwater;
|
|
uint64_t *insn_data = tcg_ctx->gen_insn_data;
|
|
uint16_t *insn_end_off = tcg_ctx->gen_insn_end_off;
|
|
uint8_t *p = block;
|
|
int i, j, n;
|
|
|
|
for (i = 0, n = tb->icount; i < n; ++i) {
|
|
uint64_t prev, curr;
|
|
|
|
for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
|
|
if (i == 0) {
|
|
prev = (!(tb_cflags(tb) & CF_PCREL) && j == 0 ? tb->pc : 0);
|
|
} else {
|
|
prev = insn_data[(i - 1) * TARGET_INSN_START_WORDS + j];
|
|
}
|
|
curr = insn_data[i * TARGET_INSN_START_WORDS + j];
|
|
p = encode_sleb128(p, curr - prev);
|
|
}
|
|
prev = (i == 0 ? 0 : insn_end_off[i - 1]);
|
|
curr = insn_end_off[i];
|
|
p = encode_sleb128(p, curr - prev);
|
|
|
|
/* Test for (pending) buffer overflow. The assumption is that any
|
|
one row beginning below the high water mark cannot overrun
|
|
the buffer completely. Thus we can test for overflow after
|
|
encoding a row without having to check during encoding. */
|
|
if (unlikely(p > highwater)) {
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
return p - block;
|
|
}
|
|
|
|
static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc,
|
|
uint64_t *data)
|
|
{
|
|
uintptr_t iter_pc = (uintptr_t)tb->tc.ptr;
|
|
const uint8_t *p = tb->tc.ptr + tb->tc.size;
|
|
int i, j, num_insns = tb->icount;
|
|
|
|
host_pc -= GETPC_ADJ;
|
|
|
|
if (host_pc < iter_pc) {
|
|
return -1;
|
|
}
|
|
|
|
memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS);
|
|
if (!(tb_cflags(tb) & CF_PCREL)) {
|
|
data[0] = tb->pc;
|
|
}
|
|
|
|
/*
|
|
* Reconstruct the stored insn data while looking for the point
|
|
* at which the end of the insn exceeds host_pc.
|
|
*/
|
|
for (i = 0; i < num_insns; ++i) {
|
|
for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
|
|
data[j] += decode_sleb128(&p);
|
|
}
|
|
iter_pc += decode_sleb128(&p);
|
|
if (iter_pc > host_pc) {
|
|
return num_insns - i;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
* The cpu state corresponding to 'host_pc' is restored in
|
|
* preparation for exiting the TB.
|
|
*/
|
|
void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
|
|
uintptr_t host_pc)
|
|
{
|
|
uint64_t data[TARGET_INSN_START_WORDS];
|
|
int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);
|
|
|
|
if (insns_left < 0) {
|
|
return;
|
|
}
|
|
|
|
if (tb_cflags(tb) & CF_USE_ICOUNT) {
|
|
assert(icount_enabled());
|
|
/*
|
|
* Reset the cycle counter to the start of the block and
|
|
* shift if to the number of actually executed instructions.
|
|
*/
|
|
cpu_neg(cpu)->icount_decr.u16.low += insns_left;
|
|
}
|
|
|
|
cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
|
|
}
|
|
|
|
bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
|
|
{
|
|
/*
|
|
* The host_pc has to be in the rx region of the code buffer.
|
|
* If it is not we will not be able to resolve it here.
|
|
* The two cases where host_pc will not be correct are:
|
|
*
|
|
* - fault during translation (instruction fetch)
|
|
* - fault from helper (not using GETPC() macro)
|
|
*
|
|
* Either way we need return early as we can't resolve it here.
|
|
*/
|
|
if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
|
|
TranslationBlock *tb = tcg_tb_lookup(host_pc);
|
|
if (tb) {
|
|
cpu_restore_state_from_tb(cpu, tb, host_pc);
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data)
|
|
{
|
|
if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
|
|
TranslationBlock *tb = tcg_tb_lookup(host_pc);
|
|
if (tb) {
|
|
return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void page_init(void)
|
|
{
|
|
page_size_init();
|
|
page_table_config_init();
|
|
}
|
|
|
|
/*
|
|
* Isolate the portion of code gen which can setjmp/longjmp.
|
|
* Return the size of the generated code, or negative on error.
|
|
*/
|
|
static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
|
|
vaddr pc, void *host_pc,
|
|
int *max_insns, int64_t *ti)
|
|
{
|
|
int ret = sigsetjmp(tcg_ctx->jmp_trans, 0);
|
|
if (unlikely(ret != 0)) {
|
|
return ret;
|
|
}
|
|
|
|
tcg_func_start(tcg_ctx);
|
|
|
|
tcg_ctx->cpu = env_cpu(env);
|
|
gen_intermediate_code(env_cpu(env), tb, max_insns, pc, host_pc);
|
|
assert(tb->size != 0);
|
|
tcg_ctx->cpu = NULL;
|
|
*max_insns = tb->icount;
|
|
|
|
return tcg_gen_code(tcg_ctx, tb, pc);
|
|
}
|
|
|
|
/* Called with mmap_lock held for user mode emulation. */
|
|
TranslationBlock *tb_gen_code(CPUState *cpu,
|
|
vaddr pc, uint64_t cs_base,
|
|
uint32_t flags, int cflags)
|
|
{
|
|
CPUArchState *env = cpu->env_ptr;
|
|
TranslationBlock *tb, *existing_tb;
|
|
tb_page_addr_t phys_pc, phys_p2;
|
|
tcg_insn_unit *gen_code_buf;
|
|
int gen_code_size, search_size, max_insns;
|
|
int64_t ti;
|
|
void *host_pc;
|
|
|
|
assert_memory_lock();
|
|
qemu_thread_jit_write();
|
|
|
|
phys_pc = get_page_addr_code_hostp(env, pc, &host_pc);
|
|
|
|
if (phys_pc == -1) {
|
|
/* Generate a one-shot TB with 1 insn in it */
|
|
cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1;
|
|
}
|
|
|
|
max_insns = cflags & CF_COUNT_MASK;
|
|
if (max_insns == 0) {
|
|
max_insns = TCG_MAX_INSNS;
|
|
}
|
|
QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS);
|
|
|
|
buffer_overflow:
|
|
assert_no_pages_locked();
|
|
tb = tcg_tb_alloc(tcg_ctx);
|
|
if (unlikely(!tb)) {
|
|
/* flush must be done */
|
|
tb_flush(cpu);
|
|
mmap_unlock();
|
|
/* Make the execution loop process the flush as soon as possible. */
|
|
cpu->exception_index = EXCP_INTERRUPT;
|
|
cpu_loop_exit(cpu);
|
|
}
|
|
|
|
gen_code_buf = tcg_ctx->code_gen_ptr;
|
|
tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
|
|
if (!(cflags & CF_PCREL)) {
|
|
tb->pc = pc;
|
|
}
|
|
tb->cs_base = cs_base;
|
|
tb->flags = flags;
|
|
tb->cflags = cflags;
|
|
tb_set_page_addr0(tb, phys_pc);
|
|
tb_set_page_addr1(tb, -1);
|
|
if (phys_pc != -1) {
|
|
tb_lock_page0(phys_pc);
|
|
}
|
|
|
|
tcg_ctx->gen_tb = tb;
|
|
tcg_ctx->addr_type = TARGET_LONG_BITS == 32 ? TCG_TYPE_I32 : TCG_TYPE_I64;
|
|
#ifdef CONFIG_SOFTMMU
|
|
tcg_ctx->page_bits = TARGET_PAGE_BITS;
|
|
tcg_ctx->page_mask = TARGET_PAGE_MASK;
|
|
tcg_ctx->tlb_dyn_max_bits = CPU_TLB_DYN_MAX_BITS;
|
|
tcg_ctx->tlb_fast_offset =
|
|
(int)offsetof(ArchCPU, neg.tlb.f) - (int)offsetof(ArchCPU, env);
|
|
#endif
|
|
tcg_ctx->insn_start_words = TARGET_INSN_START_WORDS;
|
|
#ifdef TCG_GUEST_DEFAULT_MO
|
|
tcg_ctx->guest_mo = TCG_GUEST_DEFAULT_MO;
|
|
#else
|
|
tcg_ctx->guest_mo = TCG_MO_ALL;
|
|
#endif
|
|
|
|
restart_translate:
|
|
trace_translate_block(tb, pc, tb->tc.ptr);
|
|
|
|
gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti);
|
|
if (unlikely(gen_code_size < 0)) {
|
|
switch (gen_code_size) {
|
|
case -1:
|
|
/*
|
|
* Overflow of code_gen_buffer, or the current slice of it.
|
|
*
|
|
* TODO: We don't need to re-do gen_intermediate_code, nor
|
|
* should we re-do the tcg optimization currently hidden
|
|
* inside tcg_gen_code. All that should be required is to
|
|
* flush the TBs, allocate a new TB, re-initialize it per
|
|
* above, and re-do the actual code generation.
|
|
*/
|
|
qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
|
|
"Restarting code generation for "
|
|
"code_gen_buffer overflow\n");
|
|
tb_unlock_pages(tb);
|
|
goto buffer_overflow;
|
|
|
|
case -2:
|
|
/*
|
|
* The code generated for the TranslationBlock is too large.
|
|
* The maximum size allowed by the unwind info is 64k.
|
|
* There may be stricter constraints from relocations
|
|
* in the tcg backend.
|
|
*
|
|
* Try again with half as many insns as we attempted this time.
|
|
* If a single insn overflows, there's a bug somewhere...
|
|
*/
|
|
assert(max_insns > 1);
|
|
max_insns /= 2;
|
|
qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
|
|
"Restarting code generation with "
|
|
"smaller translation block (max %d insns)\n",
|
|
max_insns);
|
|
|
|
/*
|
|
* The half-sized TB may not cross pages.
|
|
* TODO: Fix all targets that cross pages except with
|
|
* the first insn, at which point this can't be reached.
|
|
*/
|
|
phys_p2 = tb_page_addr1(tb);
|
|
if (unlikely(phys_p2 != -1)) {
|
|
tb_unlock_page1(phys_pc, phys_p2);
|
|
tb_set_page_addr1(tb, -1);
|
|
}
|
|
goto restart_translate;
|
|
|
|
case -3:
|
|
/*
|
|
* We had a page lock ordering problem. In order to avoid
|
|
* deadlock we had to drop the lock on page0, which means
|
|
* that everything we translated so far is compromised.
|
|
* Restart with locks held on both pages.
|
|
*/
|
|
qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
|
|
"Restarting code generation with re-locked pages");
|
|
goto restart_translate;
|
|
|
|
default:
|
|
g_assert_not_reached();
|
|
}
|
|
}
|
|
tcg_ctx->gen_tb = NULL;
|
|
|
|
search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
|
|
if (unlikely(search_size < 0)) {
|
|
tb_unlock_pages(tb);
|
|
goto buffer_overflow;
|
|
}
|
|
tb->tc.size = gen_code_size;
|
|
|
|
/*
|
|
* For CF_PCREL, attribute all executions of the generated code
|
|
* to its first mapping.
|
|
*/
|
|
perf_report_code(pc, tb, tcg_splitwx_to_rx(gen_code_buf));
|
|
|
|
if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
|
|
qemu_log_in_addr_range(pc)) {
|
|
FILE *logfile = qemu_log_trylock();
|
|
if (logfile) {
|
|
int code_size, data_size;
|
|
const tcg_target_ulong *rx_data_gen_ptr;
|
|
size_t chunk_start;
|
|
int insn = 0;
|
|
|
|
if (tcg_ctx->data_gen_ptr) {
|
|
rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr);
|
|
code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr;
|
|
data_size = gen_code_size - code_size;
|
|
} else {
|
|
rx_data_gen_ptr = 0;
|
|
code_size = gen_code_size;
|
|
data_size = 0;
|
|
}
|
|
|
|
/* Dump header and the first instruction */
|
|
fprintf(logfile, "OUT: [size=%d]\n", gen_code_size);
|
|
fprintf(logfile,
|
|
" -- guest addr 0x%016" PRIx64 " + tb prologue\n",
|
|
tcg_ctx->gen_insn_data[insn * TARGET_INSN_START_WORDS]);
|
|
chunk_start = tcg_ctx->gen_insn_end_off[insn];
|
|
disas(logfile, tb->tc.ptr, chunk_start);
|
|
|
|
/*
|
|
* Dump each instruction chunk, wrapping up empty chunks into
|
|
* the next instruction. The whole array is offset so the
|
|
* first entry is the beginning of the 2nd instruction.
|
|
*/
|
|
while (insn < tb->icount) {
|
|
size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
|
|
if (chunk_end > chunk_start) {
|
|
fprintf(logfile, " -- guest addr 0x%016" PRIx64 "\n",
|
|
tcg_ctx->gen_insn_data[insn * TARGET_INSN_START_WORDS]);
|
|
disas(logfile, tb->tc.ptr + chunk_start,
|
|
chunk_end - chunk_start);
|
|
chunk_start = chunk_end;
|
|
}
|
|
insn++;
|
|
}
|
|
|
|
if (chunk_start < code_size) {
|
|
fprintf(logfile, " -- tb slow paths + alignment\n");
|
|
disas(logfile, tb->tc.ptr + chunk_start,
|
|
code_size - chunk_start);
|
|
}
|
|
|
|
/* Finally dump any data we may have after the block */
|
|
if (data_size) {
|
|
int i;
|
|
fprintf(logfile, " data: [size=%d]\n", data_size);
|
|
for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
|
|
if (sizeof(tcg_target_ulong) == 8) {
|
|
fprintf(logfile,
|
|
"0x%08" PRIxPTR ": .quad 0x%016" TCG_PRIlx "\n",
|
|
(uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
|
|
} else if (sizeof(tcg_target_ulong) == 4) {
|
|
fprintf(logfile,
|
|
"0x%08" PRIxPTR ": .long 0x%08" TCG_PRIlx "\n",
|
|
(uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
|
|
} else {
|
|
qemu_build_not_reached();
|
|
}
|
|
}
|
|
}
|
|
fprintf(logfile, "\n");
|
|
qemu_log_unlock(logfile);
|
|
}
|
|
}
|
|
|
|
qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
|
|
ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
|
|
CODE_GEN_ALIGN));
|
|
|
|
/* init jump list */
|
|
qemu_spin_init(&tb->jmp_lock);
|
|
tb->jmp_list_head = (uintptr_t)NULL;
|
|
tb->jmp_list_next[0] = (uintptr_t)NULL;
|
|
tb->jmp_list_next[1] = (uintptr_t)NULL;
|
|
tb->jmp_dest[0] = (uintptr_t)NULL;
|
|
tb->jmp_dest[1] = (uintptr_t)NULL;
|
|
|
|
/* init original jump addresses which have been set during tcg_gen_code() */
|
|
if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) {
|
|
tb_reset_jump(tb, 0);
|
|
}
|
|
if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) {
|
|
tb_reset_jump(tb, 1);
|
|
}
|
|
|
|
/*
|
|
* If the TB is not associated with a physical RAM page then it must be
|
|
* a temporary one-insn TB, and we have nothing left to do. Return early
|
|
* before attempting to link to other TBs or add to the lookup table.
|
|
*/
|
|
if (tb_page_addr0(tb) == -1) {
|
|
assert_no_pages_locked();
|
|
return tb;
|
|
}
|
|
|
|
/*
|
|
* Insert TB into the corresponding region tree before publishing it
|
|
* through QHT. Otherwise rewinding happened in the TB might fail to
|
|
* lookup itself using host PC.
|
|
*/
|
|
tcg_tb_insert(tb);
|
|
|
|
/*
|
|
* No explicit memory barrier is required -- tb_link_page() makes the
|
|
* TB visible in a consistent state.
|
|
*/
|
|
existing_tb = tb_link_page(tb);
|
|
assert_no_pages_locked();
|
|
|
|
/* if the TB already exists, discard what we just translated */
|
|
if (unlikely(existing_tb != tb)) {
|
|
uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
|
|
|
|
orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
|
|
qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
|
|
tcg_tb_remove(tb);
|
|
return existing_tb;
|
|
}
|
|
return tb;
|
|
}
|
|
|
|
/* user-mode: call with mmap_lock held */
|
|
void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
|
|
{
|
|
TranslationBlock *tb;
|
|
|
|
assert_memory_lock();
|
|
|
|
tb = tcg_tb_lookup(retaddr);
|
|
if (tb) {
|
|
/* We can use retranslation to find the PC. */
|
|
cpu_restore_state_from_tb(cpu, tb, retaddr);
|
|
tb_phys_invalidate(tb, -1);
|
|
} else {
|
|
/* The exception probably happened in a helper. The CPU state should
|
|
have been saved before calling it. Fetch the PC from there. */
|
|
CPUArchState *env = cpu->env_ptr;
|
|
vaddr pc;
|
|
uint64_t cs_base;
|
|
tb_page_addr_t addr;
|
|
uint32_t flags;
|
|
|
|
cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
|
|
addr = get_page_addr_code(env, pc);
|
|
if (addr != -1) {
|
|
tb_invalidate_phys_range(addr, addr);
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifndef CONFIG_USER_ONLY
|
|
/*
|
|
* In deterministic execution mode, instructions doing device I/Os
|
|
* must be at the end of the TB.
|
|
*
|
|
* Called by softmmu_template.h, with iothread mutex not held.
|
|
*/
|
|
void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
|
|
{
|
|
TranslationBlock *tb;
|
|
CPUClass *cc;
|
|
uint32_t n;
|
|
|
|
tb = tcg_tb_lookup(retaddr);
|
|
if (!tb) {
|
|
cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
|
|
(void *)retaddr);
|
|
}
|
|
cpu_restore_state_from_tb(cpu, tb, retaddr);
|
|
|
|
/*
|
|
* Some guests must re-execute the branch when re-executing a delay
|
|
* slot instruction. When this is the case, adjust icount and N
|
|
* to account for the re-execution of the branch.
|
|
*/
|
|
n = 1;
|
|
cc = CPU_GET_CLASS(cpu);
|
|
if (cc->tcg_ops->io_recompile_replay_branch &&
|
|
cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
|
|
cpu_neg(cpu)->icount_decr.u16.low++;
|
|
n = 2;
|
|
}
|
|
|
|
/*
|
|
* Exit the loop and potentially generate a new TB executing the
|
|
* just the I/O insns. We also limit instrumentation to memory
|
|
* operations only (which execute after completion) so we don't
|
|
* double instrument the instruction.
|
|
*/
|
|
cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
|
|
|
|
if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
|
|
vaddr pc = log_pc(cpu, tb);
|
|
if (qemu_log_in_addr_range(pc)) {
|
|
qemu_log("cpu_io_recompile: rewound execution of TB to %016"
|
|
VADDR_PRIx "\n", pc);
|
|
}
|
|
}
|
|
|
|
cpu_loop_exit_noexc(cpu);
|
|
}
|
|
|
|
static void print_qht_statistics(struct qht_stats hst, GString *buf)
|
|
{
|
|
uint32_t hgram_opts;
|
|
size_t hgram_bins;
|
|
char *hgram;
|
|
|
|
if (!hst.head_buckets) {
|
|
return;
|
|
}
|
|
g_string_append_printf(buf, "TB hash buckets %zu/%zu "
|
|
"(%0.2f%% head buckets used)\n",
|
|
hst.used_head_buckets, hst.head_buckets,
|
|
(double)hst.used_head_buckets /
|
|
hst.head_buckets * 100);
|
|
|
|
hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
|
|
hgram_opts |= QDIST_PR_100X | QDIST_PR_PERCENT;
|
|
if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
|
|
hgram_opts |= QDIST_PR_NODECIMAL;
|
|
}
|
|
hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
|
|
g_string_append_printf(buf, "TB hash occupancy %0.2f%% avg chain occ. "
|
|
"Histogram: %s\n",
|
|
qdist_avg(&hst.occupancy) * 100, hgram);
|
|
g_free(hgram);
|
|
|
|
hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
|
|
hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
|
|
if (hgram_bins > 10) {
|
|
hgram_bins = 10;
|
|
} else {
|
|
hgram_bins = 0;
|
|
hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
|
|
}
|
|
hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
|
|
g_string_append_printf(buf, "TB hash avg chain %0.3f buckets. "
|
|
"Histogram: %s\n",
|
|
qdist_avg(&hst.chain), hgram);
|
|
g_free(hgram);
|
|
}
|
|
|
|
struct tb_tree_stats {
|
|
size_t nb_tbs;
|
|
size_t host_size;
|
|
size_t target_size;
|
|
size_t max_target_size;
|
|
size_t direct_jmp_count;
|
|
size_t direct_jmp2_count;
|
|
size_t cross_page;
|
|
};
|
|
|
|
static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
|
|
{
|
|
const TranslationBlock *tb = value;
|
|
struct tb_tree_stats *tst = data;
|
|
|
|
tst->nb_tbs++;
|
|
tst->host_size += tb->tc.size;
|
|
tst->target_size += tb->size;
|
|
if (tb->size > tst->max_target_size) {
|
|
tst->max_target_size = tb->size;
|
|
}
|
|
if (tb_page_addr1(tb) != -1) {
|
|
tst->cross_page++;
|
|
}
|
|
if (tb->jmp_reset_offset[0] != TB_JMP_OFFSET_INVALID) {
|
|
tst->direct_jmp_count++;
|
|
if (tb->jmp_reset_offset[1] != TB_JMP_OFFSET_INVALID) {
|
|
tst->direct_jmp2_count++;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void dump_exec_info(GString *buf)
|
|
{
|
|
struct tb_tree_stats tst = {};
|
|
struct qht_stats hst;
|
|
size_t nb_tbs, flush_full, flush_part, flush_elide;
|
|
|
|
tcg_tb_foreach(tb_tree_stats_iter, &tst);
|
|
nb_tbs = tst.nb_tbs;
|
|
/* XXX: avoid using doubles ? */
|
|
g_string_append_printf(buf, "Translation buffer state:\n");
|
|
/*
|
|
* Report total code size including the padding and TB structs;
|
|
* otherwise users might think "-accel tcg,tb-size" is not honoured.
|
|
* For avg host size we use the precise numbers from tb_tree_stats though.
|
|
*/
|
|
g_string_append_printf(buf, "gen code size %zu/%zu\n",
|
|
tcg_code_size(), tcg_code_capacity());
|
|
g_string_append_printf(buf, "TB count %zu\n", nb_tbs);
|
|
g_string_append_printf(buf, "TB avg target size %zu max=%zu bytes\n",
|
|
nb_tbs ? tst.target_size / nb_tbs : 0,
|
|
tst.max_target_size);
|
|
g_string_append_printf(buf, "TB avg host size %zu bytes "
|
|
"(expansion ratio: %0.1f)\n",
|
|
nb_tbs ? tst.host_size / nb_tbs : 0,
|
|
tst.target_size ?
|
|
(double)tst.host_size / tst.target_size : 0);
|
|
g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n",
|
|
tst.cross_page,
|
|
nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
|
|
g_string_append_printf(buf, "direct jump count %zu (%zu%%) "
|
|
"(2 jumps=%zu %zu%%)\n",
|
|
tst.direct_jmp_count,
|
|
nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
|
|
tst.direct_jmp2_count,
|
|
nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
|
|
|
|
qht_statistics_init(&tb_ctx.htable, &hst);
|
|
print_qht_statistics(hst, buf);
|
|
qht_statistics_destroy(&hst);
|
|
|
|
g_string_append_printf(buf, "\nStatistics:\n");
|
|
g_string_append_printf(buf, "TB flush count %u\n",
|
|
qatomic_read(&tb_ctx.tb_flush_count));
|
|
g_string_append_printf(buf, "TB invalidate count %u\n",
|
|
qatomic_read(&tb_ctx.tb_phys_invalidate_count));
|
|
|
|
tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
|
|
g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full);
|
|
g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part);
|
|
g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide);
|
|
tcg_dump_info(buf);
|
|
}
|
|
|
|
#else /* CONFIG_USER_ONLY */
|
|
|
|
void cpu_interrupt(CPUState *cpu, int mask)
|
|
{
|
|
g_assert(qemu_mutex_iothread_locked());
|
|
cpu->interrupt_request |= mask;
|
|
qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
|
|
}
|
|
|
|
#endif /* CONFIG_USER_ONLY */
|
|
|
|
/*
|
|
* Called by generic code at e.g. cpu reset after cpu creation,
|
|
* therefore we must be prepared to allocate the jump cache.
|
|
*/
|
|
void tcg_flush_jmp_cache(CPUState *cpu)
|
|
{
|
|
CPUJumpCache *jc = cpu->tb_jmp_cache;
|
|
|
|
/* During early initialization, the cache may not yet be allocated. */
|
|
if (unlikely(jc == NULL)) {
|
|
return;
|
|
}
|
|
|
|
for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) {
|
|
qatomic_set(&jc->array[i].tb, NULL);
|
|
}
|
|
}
|
|
|
|
/* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
|
|
void tcg_flush_softmmu_tlb(CPUState *cs)
|
|
{
|
|
#ifdef CONFIG_SOFTMMU
|
|
tlb_flush(cs);
|
|
#endif
|
|
}
|