50d25c8aec
Make bsd-user match linux-user in not marking host pages as reserved. This isn't especially effective anyway, as it doesn't take into account any heap memory that qemu may allocate after startup. Reviewed-by: Warner Losh <imp@bsdimp.com> Tested-by: Warner Losh <imp@bsdimp.com> Reviewed-by: Alex Bennée <alex.bennee@linaro.org> Tested-by: Alex Bennée <alex.bennee@linaro.org> Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
1533 lines
45 KiB
C
1533 lines
45 KiB
C
/*
|
|
* Host code generation
|
|
*
|
|
* Copyright (c) 2003 Fabrice Bellard
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2.1 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
|
|
#define NO_CPU_IO_DEFS
|
|
#include "trace.h"
|
|
#include "disas/disas.h"
|
|
#include "exec/exec-all.h"
|
|
#include "tcg/tcg.h"
|
|
#if defined(CONFIG_USER_ONLY)
|
|
#include "qemu.h"
|
|
#if defined(__FreeBSD__) || defined(__FreeBSD_kernel__)
|
|
#include <sys/param.h>
|
|
#if __FreeBSD_version >= 700104
|
|
#define HAVE_KINFO_GETVMMAP
|
|
#define sigqueue sigqueue_freebsd /* avoid redefinition */
|
|
#include <sys/proc.h>
|
|
#include <machine/profile.h>
|
|
#define _KERNEL
|
|
#include <sys/user.h>
|
|
#undef _KERNEL
|
|
#undef sigqueue
|
|
#include <libutil.h>
|
|
#endif
|
|
#endif
|
|
#else
|
|
#include "exec/ram_addr.h"
|
|
#endif
|
|
|
|
#include "exec/cputlb.h"
|
|
#include "exec/translate-all.h"
|
|
#include "exec/translator.h"
|
|
#include "qemu/bitmap.h"
|
|
#include "qemu/qemu-print.h"
|
|
#include "qemu/timer.h"
|
|
#include "qemu/main-loop.h"
|
|
#include "qemu/cacheinfo.h"
|
|
#include "exec/log.h"
|
|
#include "sysemu/cpus.h"
|
|
#include "sysemu/cpu-timers.h"
|
|
#include "sysemu/tcg.h"
|
|
#include "qapi/error.h"
|
|
#include "hw/core/tcg-cpu-ops.h"
|
|
#include "tb-jmp-cache.h"
|
|
#include "tb-hash.h"
|
|
#include "tb-context.h"
|
|
#include "internal.h"
|
|
|
|
/* make various TB consistency checks */
|
|
|
|
/**
|
|
* struct page_entry - page descriptor entry
|
|
* @pd: pointer to the &struct PageDesc of the page this entry represents
|
|
* @index: page index of the page
|
|
* @locked: whether the page is locked
|
|
*
|
|
* This struct helps us keep track of the locked state of a page, without
|
|
* bloating &struct PageDesc.
|
|
*
|
|
* A page lock protects accesses to all fields of &struct PageDesc.
|
|
*
|
|
* See also: &struct page_collection.
|
|
*/
|
|
struct page_entry {
|
|
PageDesc *pd;
|
|
tb_page_addr_t index;
|
|
bool locked;
|
|
};
|
|
|
|
/**
|
|
* struct page_collection - tracks a set of pages (i.e. &struct page_entry's)
|
|
* @tree: Binary search tree (BST) of the pages, with key == page index
|
|
* @max: Pointer to the page in @tree with the highest page index
|
|
*
|
|
* To avoid deadlock we lock pages in ascending order of page index.
|
|
* When operating on a set of pages, we need to keep track of them so that
|
|
* we can lock them in order and also unlock them later. For this we collect
|
|
* pages (i.e. &struct page_entry's) in a binary search @tree. Given that the
|
|
* @tree implementation we use does not provide an O(1) operation to obtain the
|
|
* highest-ranked element, we use @max to keep track of the inserted page
|
|
* with the highest index. This is valuable because if a page is not in
|
|
* the tree and its index is higher than @max's, then we can lock it
|
|
* without breaking the locking order rule.
|
|
*
|
|
* Note on naming: 'struct page_set' would be shorter, but we already have a few
|
|
* page_set_*() helpers, so page_collection is used instead to avoid confusion.
|
|
*
|
|
* See also: page_collection_lock().
|
|
*/
|
|
struct page_collection {
|
|
GTree *tree;
|
|
struct page_entry *max;
|
|
};
|
|
|
|
/*
|
|
* In system mode we want L1_MAP to be based on ram offsets,
|
|
* while in user mode we want it to be based on virtual addresses.
|
|
*
|
|
* TODO: For user mode, see the caveat re host vs guest virtual
|
|
* address spaces near GUEST_ADDR_MAX.
|
|
*/
|
|
#if !defined(CONFIG_USER_ONLY)
|
|
#if HOST_LONG_BITS < TARGET_PHYS_ADDR_SPACE_BITS
|
|
# define L1_MAP_ADDR_SPACE_BITS HOST_LONG_BITS
|
|
#else
|
|
# define L1_MAP_ADDR_SPACE_BITS TARGET_PHYS_ADDR_SPACE_BITS
|
|
#endif
|
|
#else
|
|
# define L1_MAP_ADDR_SPACE_BITS MIN(HOST_LONG_BITS, TARGET_ABI_BITS)
|
|
#endif
|
|
|
|
/* Make sure all possible CPU event bits fit in tb->trace_vcpu_dstate */
|
|
QEMU_BUILD_BUG_ON(CPU_TRACE_DSTATE_MAX_EVENTS >
|
|
sizeof_field(TranslationBlock, trace_vcpu_dstate)
|
|
* BITS_PER_BYTE);
|
|
|
|
/*
|
|
* L1 Mapping properties
|
|
*/
|
|
int v_l1_size;
|
|
int v_l1_shift;
|
|
int v_l2_levels;
|
|
|
|
void *l1_map[V_L1_MAX_SIZE];
|
|
|
|
TBContext tb_ctx;
|
|
|
|
static void page_table_config_init(void)
|
|
{
|
|
uint32_t v_l1_bits;
|
|
|
|
assert(TARGET_PAGE_BITS);
|
|
/* The bits remaining after N lower levels of page tables. */
|
|
v_l1_bits = (L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS) % V_L2_BITS;
|
|
if (v_l1_bits < V_L1_MIN_BITS) {
|
|
v_l1_bits += V_L2_BITS;
|
|
}
|
|
|
|
v_l1_size = 1 << v_l1_bits;
|
|
v_l1_shift = L1_MAP_ADDR_SPACE_BITS - TARGET_PAGE_BITS - v_l1_bits;
|
|
v_l2_levels = v_l1_shift / V_L2_BITS - 1;
|
|
|
|
assert(v_l1_bits <= V_L1_MAX_BITS);
|
|
assert(v_l1_shift % V_L2_BITS == 0);
|
|
assert(v_l2_levels >= 0);
|
|
}
|
|
|
|
/* Encode VAL as a signed leb128 sequence at P.
|
|
Return P incremented past the encoded value. */
|
|
static uint8_t *encode_sleb128(uint8_t *p, target_long val)
|
|
{
|
|
int more, byte;
|
|
|
|
do {
|
|
byte = val & 0x7f;
|
|
val >>= 7;
|
|
more = !((val == 0 && (byte & 0x40) == 0)
|
|
|| (val == -1 && (byte & 0x40) != 0));
|
|
if (more) {
|
|
byte |= 0x80;
|
|
}
|
|
*p++ = byte;
|
|
} while (more);
|
|
|
|
return p;
|
|
}
|
|
|
|
/* Decode a signed leb128 sequence at *PP; increment *PP past the
|
|
decoded value. Return the decoded value. */
|
|
static target_long decode_sleb128(const uint8_t **pp)
|
|
{
|
|
const uint8_t *p = *pp;
|
|
target_long val = 0;
|
|
int byte, shift = 0;
|
|
|
|
do {
|
|
byte = *p++;
|
|
val |= (target_ulong)(byte & 0x7f) << shift;
|
|
shift += 7;
|
|
} while (byte & 0x80);
|
|
if (shift < TARGET_LONG_BITS && (byte & 0x40)) {
|
|
val |= -(target_ulong)1 << shift;
|
|
}
|
|
|
|
*pp = p;
|
|
return val;
|
|
}
|
|
|
|
/* Encode the data collected about the instructions while compiling TB.
|
|
Place the data at BLOCK, and return the number of bytes consumed.
|
|
|
|
The logical table consists of TARGET_INSN_START_WORDS target_ulong's,
|
|
which come from the target's insn_start data, followed by a uintptr_t
|
|
which comes from the host pc of the end of the code implementing the insn.
|
|
|
|
Each line of the table is encoded as sleb128 deltas from the previous
|
|
line. The seed for the first line is { tb->pc, 0..., tb->tc.ptr }.
|
|
That is, the first column is seeded with the guest pc, the last column
|
|
with the host pc, and the middle columns with zeros. */
|
|
|
|
static int encode_search(TranslationBlock *tb, uint8_t *block)
|
|
{
|
|
uint8_t *highwater = tcg_ctx->code_gen_highwater;
|
|
uint8_t *p = block;
|
|
int i, j, n;
|
|
|
|
for (i = 0, n = tb->icount; i < n; ++i) {
|
|
target_ulong prev;
|
|
|
|
for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
|
|
if (i == 0) {
|
|
prev = (!TARGET_TB_PCREL && j == 0 ? tb_pc(tb) : 0);
|
|
} else {
|
|
prev = tcg_ctx->gen_insn_data[i - 1][j];
|
|
}
|
|
p = encode_sleb128(p, tcg_ctx->gen_insn_data[i][j] - prev);
|
|
}
|
|
prev = (i == 0 ? 0 : tcg_ctx->gen_insn_end_off[i - 1]);
|
|
p = encode_sleb128(p, tcg_ctx->gen_insn_end_off[i] - prev);
|
|
|
|
/* Test for (pending) buffer overflow. The assumption is that any
|
|
one row beginning below the high water mark cannot overrun
|
|
the buffer completely. Thus we can test for overflow after
|
|
encoding a row without having to check during encoding. */
|
|
if (unlikely(p > highwater)) {
|
|
return -1;
|
|
}
|
|
}
|
|
|
|
return p - block;
|
|
}
|
|
|
|
static int cpu_unwind_data_from_tb(TranslationBlock *tb, uintptr_t host_pc,
|
|
uint64_t *data)
|
|
{
|
|
uintptr_t iter_pc = (uintptr_t)tb->tc.ptr;
|
|
const uint8_t *p = tb->tc.ptr + tb->tc.size;
|
|
int i, j, num_insns = tb->icount;
|
|
|
|
host_pc -= GETPC_ADJ;
|
|
|
|
if (host_pc < iter_pc) {
|
|
return -1;
|
|
}
|
|
|
|
memset(data, 0, sizeof(uint64_t) * TARGET_INSN_START_WORDS);
|
|
if (!TARGET_TB_PCREL) {
|
|
data[0] = tb_pc(tb);
|
|
}
|
|
|
|
/*
|
|
* Reconstruct the stored insn data while looking for the point
|
|
* at which the end of the insn exceeds host_pc.
|
|
*/
|
|
for (i = 0; i < num_insns; ++i) {
|
|
for (j = 0; j < TARGET_INSN_START_WORDS; ++j) {
|
|
data[j] += decode_sleb128(&p);
|
|
}
|
|
iter_pc += decode_sleb128(&p);
|
|
if (iter_pc > host_pc) {
|
|
return num_insns - i;
|
|
}
|
|
}
|
|
return -1;
|
|
}
|
|
|
|
/*
|
|
* The cpu state corresponding to 'host_pc' is restored in
|
|
* preparation for exiting the TB.
|
|
*/
|
|
void cpu_restore_state_from_tb(CPUState *cpu, TranslationBlock *tb,
|
|
uintptr_t host_pc)
|
|
{
|
|
uint64_t data[TARGET_INSN_START_WORDS];
|
|
#ifdef CONFIG_PROFILER
|
|
TCGProfile *prof = &tcg_ctx->prof;
|
|
int64_t ti = profile_getclock();
|
|
#endif
|
|
int insns_left = cpu_unwind_data_from_tb(tb, host_pc, data);
|
|
|
|
if (insns_left < 0) {
|
|
return;
|
|
}
|
|
|
|
if (tb_cflags(tb) & CF_USE_ICOUNT) {
|
|
assert(icount_enabled());
|
|
/*
|
|
* Reset the cycle counter to the start of the block and
|
|
* shift if to the number of actually executed instructions.
|
|
*/
|
|
cpu_neg(cpu)->icount_decr.u16.low += insns_left;
|
|
}
|
|
|
|
cpu->cc->tcg_ops->restore_state_to_opc(cpu, tb, data);
|
|
|
|
#ifdef CONFIG_PROFILER
|
|
qatomic_set(&prof->restore_time,
|
|
prof->restore_time + profile_getclock() - ti);
|
|
qatomic_set(&prof->restore_count, prof->restore_count + 1);
|
|
#endif
|
|
}
|
|
|
|
bool cpu_restore_state(CPUState *cpu, uintptr_t host_pc)
|
|
{
|
|
/*
|
|
* The host_pc has to be in the rx region of the code buffer.
|
|
* If it is not we will not be able to resolve it here.
|
|
* The two cases where host_pc will not be correct are:
|
|
*
|
|
* - fault during translation (instruction fetch)
|
|
* - fault from helper (not using GETPC() macro)
|
|
*
|
|
* Either way we need return early as we can't resolve it here.
|
|
*/
|
|
if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
|
|
TranslationBlock *tb = tcg_tb_lookup(host_pc);
|
|
if (tb) {
|
|
cpu_restore_state_from_tb(cpu, tb, host_pc);
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
bool cpu_unwind_state_data(CPUState *cpu, uintptr_t host_pc, uint64_t *data)
|
|
{
|
|
if (in_code_gen_buffer((const void *)(host_pc - tcg_splitwx_diff))) {
|
|
TranslationBlock *tb = tcg_tb_lookup(host_pc);
|
|
if (tb) {
|
|
return cpu_unwind_data_from_tb(tb, host_pc, data) >= 0;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void page_init(void)
|
|
{
|
|
page_size_init();
|
|
page_table_config_init();
|
|
}
|
|
|
|
PageDesc *page_find_alloc(tb_page_addr_t index, bool alloc)
|
|
{
|
|
PageDesc *pd;
|
|
void **lp;
|
|
int i;
|
|
|
|
/* Level 1. Always allocated. */
|
|
lp = l1_map + ((index >> v_l1_shift) & (v_l1_size - 1));
|
|
|
|
/* Level 2..N-1. */
|
|
for (i = v_l2_levels; i > 0; i--) {
|
|
void **p = qatomic_rcu_read(lp);
|
|
|
|
if (p == NULL) {
|
|
void *existing;
|
|
|
|
if (!alloc) {
|
|
return NULL;
|
|
}
|
|
p = g_new0(void *, V_L2_SIZE);
|
|
existing = qatomic_cmpxchg(lp, NULL, p);
|
|
if (unlikely(existing)) {
|
|
g_free(p);
|
|
p = existing;
|
|
}
|
|
}
|
|
|
|
lp = p + ((index >> (i * V_L2_BITS)) & (V_L2_SIZE - 1));
|
|
}
|
|
|
|
pd = qatomic_rcu_read(lp);
|
|
if (pd == NULL) {
|
|
void *existing;
|
|
|
|
if (!alloc) {
|
|
return NULL;
|
|
}
|
|
pd = g_new0(PageDesc, V_L2_SIZE);
|
|
#ifndef CONFIG_USER_ONLY
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < V_L2_SIZE; i++) {
|
|
qemu_spin_init(&pd[i].lock);
|
|
}
|
|
}
|
|
#endif
|
|
existing = qatomic_cmpxchg(lp, NULL, pd);
|
|
if (unlikely(existing)) {
|
|
#ifndef CONFIG_USER_ONLY
|
|
{
|
|
int i;
|
|
|
|
for (i = 0; i < V_L2_SIZE; i++) {
|
|
qemu_spin_destroy(&pd[i].lock);
|
|
}
|
|
}
|
|
#endif
|
|
g_free(pd);
|
|
pd = existing;
|
|
}
|
|
}
|
|
|
|
return pd + (index & (V_L2_SIZE - 1));
|
|
}
|
|
|
|
/* In user-mode page locks aren't used; mmap_lock is enough */
|
|
#ifdef CONFIG_USER_ONLY
|
|
struct page_collection *
|
|
page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
|
|
{
|
|
return NULL;
|
|
}
|
|
|
|
void page_collection_unlock(struct page_collection *set)
|
|
{ }
|
|
#else /* !CONFIG_USER_ONLY */
|
|
|
|
#ifdef CONFIG_DEBUG_TCG
|
|
|
|
static __thread GHashTable *ht_pages_locked_debug;
|
|
|
|
static void ht_pages_locked_debug_init(void)
|
|
{
|
|
if (ht_pages_locked_debug) {
|
|
return;
|
|
}
|
|
ht_pages_locked_debug = g_hash_table_new(NULL, NULL);
|
|
}
|
|
|
|
static bool page_is_locked(const PageDesc *pd)
|
|
{
|
|
PageDesc *found;
|
|
|
|
ht_pages_locked_debug_init();
|
|
found = g_hash_table_lookup(ht_pages_locked_debug, pd);
|
|
return !!found;
|
|
}
|
|
|
|
static void page_lock__debug(PageDesc *pd)
|
|
{
|
|
ht_pages_locked_debug_init();
|
|
g_assert(!page_is_locked(pd));
|
|
g_hash_table_insert(ht_pages_locked_debug, pd, pd);
|
|
}
|
|
|
|
static void page_unlock__debug(const PageDesc *pd)
|
|
{
|
|
bool removed;
|
|
|
|
ht_pages_locked_debug_init();
|
|
g_assert(page_is_locked(pd));
|
|
removed = g_hash_table_remove(ht_pages_locked_debug, pd);
|
|
g_assert(removed);
|
|
}
|
|
|
|
void do_assert_page_locked(const PageDesc *pd, const char *file, int line)
|
|
{
|
|
if (unlikely(!page_is_locked(pd))) {
|
|
error_report("assert_page_lock: PageDesc %p not locked @ %s:%d",
|
|
pd, file, line);
|
|
abort();
|
|
}
|
|
}
|
|
|
|
void assert_no_pages_locked(void)
|
|
{
|
|
ht_pages_locked_debug_init();
|
|
g_assert(g_hash_table_size(ht_pages_locked_debug) == 0);
|
|
}
|
|
|
|
#else /* !CONFIG_DEBUG_TCG */
|
|
|
|
static inline void page_lock__debug(const PageDesc *pd) { }
|
|
static inline void page_unlock__debug(const PageDesc *pd) { }
|
|
|
|
#endif /* CONFIG_DEBUG_TCG */
|
|
|
|
void page_lock(PageDesc *pd)
|
|
{
|
|
page_lock__debug(pd);
|
|
qemu_spin_lock(&pd->lock);
|
|
}
|
|
|
|
void page_unlock(PageDesc *pd)
|
|
{
|
|
qemu_spin_unlock(&pd->lock);
|
|
page_unlock__debug(pd);
|
|
}
|
|
|
|
static inline struct page_entry *
|
|
page_entry_new(PageDesc *pd, tb_page_addr_t index)
|
|
{
|
|
struct page_entry *pe = g_malloc(sizeof(*pe));
|
|
|
|
pe->index = index;
|
|
pe->pd = pd;
|
|
pe->locked = false;
|
|
return pe;
|
|
}
|
|
|
|
static void page_entry_destroy(gpointer p)
|
|
{
|
|
struct page_entry *pe = p;
|
|
|
|
g_assert(pe->locked);
|
|
page_unlock(pe->pd);
|
|
g_free(pe);
|
|
}
|
|
|
|
/* returns false on success */
|
|
static bool page_entry_trylock(struct page_entry *pe)
|
|
{
|
|
bool busy;
|
|
|
|
busy = qemu_spin_trylock(&pe->pd->lock);
|
|
if (!busy) {
|
|
g_assert(!pe->locked);
|
|
pe->locked = true;
|
|
page_lock__debug(pe->pd);
|
|
}
|
|
return busy;
|
|
}
|
|
|
|
static void do_page_entry_lock(struct page_entry *pe)
|
|
{
|
|
page_lock(pe->pd);
|
|
g_assert(!pe->locked);
|
|
pe->locked = true;
|
|
}
|
|
|
|
static gboolean page_entry_lock(gpointer key, gpointer value, gpointer data)
|
|
{
|
|
struct page_entry *pe = value;
|
|
|
|
do_page_entry_lock(pe);
|
|
return FALSE;
|
|
}
|
|
|
|
static gboolean page_entry_unlock(gpointer key, gpointer value, gpointer data)
|
|
{
|
|
struct page_entry *pe = value;
|
|
|
|
if (pe->locked) {
|
|
pe->locked = false;
|
|
page_unlock(pe->pd);
|
|
}
|
|
return FALSE;
|
|
}
|
|
|
|
/*
|
|
* Trylock a page, and if successful, add the page to a collection.
|
|
* Returns true ("busy") if the page could not be locked; false otherwise.
|
|
*/
|
|
static bool page_trylock_add(struct page_collection *set, tb_page_addr_t addr)
|
|
{
|
|
tb_page_addr_t index = addr >> TARGET_PAGE_BITS;
|
|
struct page_entry *pe;
|
|
PageDesc *pd;
|
|
|
|
pe = g_tree_lookup(set->tree, &index);
|
|
if (pe) {
|
|
return false;
|
|
}
|
|
|
|
pd = page_find(index);
|
|
if (pd == NULL) {
|
|
return false;
|
|
}
|
|
|
|
pe = page_entry_new(pd, index);
|
|
g_tree_insert(set->tree, &pe->index, pe);
|
|
|
|
/*
|
|
* If this is either (1) the first insertion or (2) a page whose index
|
|
* is higher than any other so far, just lock the page and move on.
|
|
*/
|
|
if (set->max == NULL || pe->index > set->max->index) {
|
|
set->max = pe;
|
|
do_page_entry_lock(pe);
|
|
return false;
|
|
}
|
|
/*
|
|
* Try to acquire out-of-order lock; if busy, return busy so that we acquire
|
|
* locks in order.
|
|
*/
|
|
return page_entry_trylock(pe);
|
|
}
|
|
|
|
static gint tb_page_addr_cmp(gconstpointer ap, gconstpointer bp, gpointer udata)
|
|
{
|
|
tb_page_addr_t a = *(const tb_page_addr_t *)ap;
|
|
tb_page_addr_t b = *(const tb_page_addr_t *)bp;
|
|
|
|
if (a == b) {
|
|
return 0;
|
|
} else if (a < b) {
|
|
return -1;
|
|
}
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Lock a range of pages ([@start,@end[) as well as the pages of all
|
|
* intersecting TBs.
|
|
* Locking order: acquire locks in ascending order of page index.
|
|
*/
|
|
struct page_collection *
|
|
page_collection_lock(tb_page_addr_t start, tb_page_addr_t end)
|
|
{
|
|
struct page_collection *set = g_malloc(sizeof(*set));
|
|
tb_page_addr_t index;
|
|
PageDesc *pd;
|
|
|
|
start >>= TARGET_PAGE_BITS;
|
|
end >>= TARGET_PAGE_BITS;
|
|
g_assert(start <= end);
|
|
|
|
set->tree = g_tree_new_full(tb_page_addr_cmp, NULL, NULL,
|
|
page_entry_destroy);
|
|
set->max = NULL;
|
|
assert_no_pages_locked();
|
|
|
|
retry:
|
|
g_tree_foreach(set->tree, page_entry_lock, NULL);
|
|
|
|
for (index = start; index <= end; index++) {
|
|
TranslationBlock *tb;
|
|
PageForEachNext n;
|
|
|
|
pd = page_find(index);
|
|
if (pd == NULL) {
|
|
continue;
|
|
}
|
|
if (page_trylock_add(set, index << TARGET_PAGE_BITS)) {
|
|
g_tree_foreach(set->tree, page_entry_unlock, NULL);
|
|
goto retry;
|
|
}
|
|
assert_page_locked(pd);
|
|
PAGE_FOR_EACH_TB(unused, unused, pd, tb, n) {
|
|
if (page_trylock_add(set, tb_page_addr0(tb)) ||
|
|
(tb_page_addr1(tb) != -1 &&
|
|
page_trylock_add(set, tb_page_addr1(tb)))) {
|
|
/* drop all locks, and reacquire in order */
|
|
g_tree_foreach(set->tree, page_entry_unlock, NULL);
|
|
goto retry;
|
|
}
|
|
}
|
|
}
|
|
return set;
|
|
}
|
|
|
|
void page_collection_unlock(struct page_collection *set)
|
|
{
|
|
/* entries are unlocked and freed via page_entry_destroy */
|
|
g_tree_destroy(set->tree);
|
|
g_free(set);
|
|
}
|
|
|
|
#endif /* !CONFIG_USER_ONLY */
|
|
|
|
/*
|
|
* Isolate the portion of code gen which can setjmp/longjmp.
|
|
* Return the size of the generated code, or negative on error.
|
|
*/
|
|
static int setjmp_gen_code(CPUArchState *env, TranslationBlock *tb,
|
|
target_ulong pc, void *host_pc,
|
|
int *max_insns, int64_t *ti)
|
|
{
|
|
int ret = sigsetjmp(tcg_ctx->jmp_trans, 0);
|
|
if (unlikely(ret != 0)) {
|
|
return ret;
|
|
}
|
|
|
|
tcg_func_start(tcg_ctx);
|
|
|
|
tcg_ctx->cpu = env_cpu(env);
|
|
gen_intermediate_code(env_cpu(env), tb, *max_insns, pc, host_pc);
|
|
assert(tb->size != 0);
|
|
tcg_ctx->cpu = NULL;
|
|
*max_insns = tb->icount;
|
|
|
|
#ifdef CONFIG_PROFILER
|
|
qatomic_set(&tcg_ctx->prof.tb_count, tcg_ctx->prof.tb_count + 1);
|
|
qatomic_set(&tcg_ctx->prof.interm_time,
|
|
tcg_ctx->prof.interm_time + profile_getclock() - *ti);
|
|
*ti = profile_getclock();
|
|
#endif
|
|
|
|
return tcg_gen_code(tcg_ctx, tb, pc);
|
|
}
|
|
|
|
/* Called with mmap_lock held for user mode emulation. */
|
|
TranslationBlock *tb_gen_code(CPUState *cpu,
|
|
target_ulong pc, target_ulong cs_base,
|
|
uint32_t flags, int cflags)
|
|
{
|
|
CPUArchState *env = cpu->env_ptr;
|
|
TranslationBlock *tb, *existing_tb;
|
|
tb_page_addr_t phys_pc;
|
|
tcg_insn_unit *gen_code_buf;
|
|
int gen_code_size, search_size, max_insns;
|
|
#ifdef CONFIG_PROFILER
|
|
TCGProfile *prof = &tcg_ctx->prof;
|
|
#endif
|
|
int64_t ti;
|
|
void *host_pc;
|
|
|
|
assert_memory_lock();
|
|
qemu_thread_jit_write();
|
|
|
|
phys_pc = get_page_addr_code_hostp(env, pc, &host_pc);
|
|
|
|
if (phys_pc == -1) {
|
|
/* Generate a one-shot TB with 1 insn in it */
|
|
cflags = (cflags & ~CF_COUNT_MASK) | CF_LAST_IO | 1;
|
|
}
|
|
|
|
max_insns = cflags & CF_COUNT_MASK;
|
|
if (max_insns == 0) {
|
|
max_insns = TCG_MAX_INSNS;
|
|
}
|
|
QEMU_BUILD_BUG_ON(CF_COUNT_MASK + 1 != TCG_MAX_INSNS);
|
|
|
|
buffer_overflow:
|
|
tb = tcg_tb_alloc(tcg_ctx);
|
|
if (unlikely(!tb)) {
|
|
/* flush must be done */
|
|
tb_flush(cpu);
|
|
mmap_unlock();
|
|
/* Make the execution loop process the flush as soon as possible. */
|
|
cpu->exception_index = EXCP_INTERRUPT;
|
|
cpu_loop_exit(cpu);
|
|
}
|
|
|
|
gen_code_buf = tcg_ctx->code_gen_ptr;
|
|
tb->tc.ptr = tcg_splitwx_to_rx(gen_code_buf);
|
|
#if !TARGET_TB_PCREL
|
|
tb->pc = pc;
|
|
#endif
|
|
tb->cs_base = cs_base;
|
|
tb->flags = flags;
|
|
tb->cflags = cflags;
|
|
tb->trace_vcpu_dstate = *cpu->trace_dstate;
|
|
tb_set_page_addr0(tb, phys_pc);
|
|
tb_set_page_addr1(tb, -1);
|
|
tcg_ctx->tb_cflags = cflags;
|
|
tb_overflow:
|
|
|
|
#ifdef CONFIG_PROFILER
|
|
/* includes aborted translations because of exceptions */
|
|
qatomic_set(&prof->tb_count1, prof->tb_count1 + 1);
|
|
ti = profile_getclock();
|
|
#endif
|
|
|
|
trace_translate_block(tb, pc, tb->tc.ptr);
|
|
|
|
gen_code_size = setjmp_gen_code(env, tb, pc, host_pc, &max_insns, &ti);
|
|
if (unlikely(gen_code_size < 0)) {
|
|
switch (gen_code_size) {
|
|
case -1:
|
|
/*
|
|
* Overflow of code_gen_buffer, or the current slice of it.
|
|
*
|
|
* TODO: We don't need to re-do gen_intermediate_code, nor
|
|
* should we re-do the tcg optimization currently hidden
|
|
* inside tcg_gen_code. All that should be required is to
|
|
* flush the TBs, allocate a new TB, re-initialize it per
|
|
* above, and re-do the actual code generation.
|
|
*/
|
|
qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
|
|
"Restarting code generation for "
|
|
"code_gen_buffer overflow\n");
|
|
goto buffer_overflow;
|
|
|
|
case -2:
|
|
/*
|
|
* The code generated for the TranslationBlock is too large.
|
|
* The maximum size allowed by the unwind info is 64k.
|
|
* There may be stricter constraints from relocations
|
|
* in the tcg backend.
|
|
*
|
|
* Try again with half as many insns as we attempted this time.
|
|
* If a single insn overflows, there's a bug somewhere...
|
|
*/
|
|
assert(max_insns > 1);
|
|
max_insns /= 2;
|
|
qemu_log_mask(CPU_LOG_TB_OP | CPU_LOG_TB_OP_OPT,
|
|
"Restarting code generation with "
|
|
"smaller translation block (max %d insns)\n",
|
|
max_insns);
|
|
goto tb_overflow;
|
|
|
|
default:
|
|
g_assert_not_reached();
|
|
}
|
|
}
|
|
search_size = encode_search(tb, (void *)gen_code_buf + gen_code_size);
|
|
if (unlikely(search_size < 0)) {
|
|
goto buffer_overflow;
|
|
}
|
|
tb->tc.size = gen_code_size;
|
|
|
|
#ifdef CONFIG_PROFILER
|
|
qatomic_set(&prof->code_time, prof->code_time + profile_getclock() - ti);
|
|
qatomic_set(&prof->code_in_len, prof->code_in_len + tb->size);
|
|
qatomic_set(&prof->code_out_len, prof->code_out_len + gen_code_size);
|
|
qatomic_set(&prof->search_out_len, prof->search_out_len + search_size);
|
|
#endif
|
|
|
|
#ifdef DEBUG_DISAS
|
|
if (qemu_loglevel_mask(CPU_LOG_TB_OUT_ASM) &&
|
|
qemu_log_in_addr_range(pc)) {
|
|
FILE *logfile = qemu_log_trylock();
|
|
if (logfile) {
|
|
int code_size, data_size;
|
|
const tcg_target_ulong *rx_data_gen_ptr;
|
|
size_t chunk_start;
|
|
int insn = 0;
|
|
|
|
if (tcg_ctx->data_gen_ptr) {
|
|
rx_data_gen_ptr = tcg_splitwx_to_rx(tcg_ctx->data_gen_ptr);
|
|
code_size = (const void *)rx_data_gen_ptr - tb->tc.ptr;
|
|
data_size = gen_code_size - code_size;
|
|
} else {
|
|
rx_data_gen_ptr = 0;
|
|
code_size = gen_code_size;
|
|
data_size = 0;
|
|
}
|
|
|
|
/* Dump header and the first instruction */
|
|
fprintf(logfile, "OUT: [size=%d]\n", gen_code_size);
|
|
fprintf(logfile,
|
|
" -- guest addr 0x" TARGET_FMT_lx " + tb prologue\n",
|
|
tcg_ctx->gen_insn_data[insn][0]);
|
|
chunk_start = tcg_ctx->gen_insn_end_off[insn];
|
|
disas(logfile, tb->tc.ptr, chunk_start);
|
|
|
|
/*
|
|
* Dump each instruction chunk, wrapping up empty chunks into
|
|
* the next instruction. The whole array is offset so the
|
|
* first entry is the beginning of the 2nd instruction.
|
|
*/
|
|
while (insn < tb->icount) {
|
|
size_t chunk_end = tcg_ctx->gen_insn_end_off[insn];
|
|
if (chunk_end > chunk_start) {
|
|
fprintf(logfile, " -- guest addr 0x" TARGET_FMT_lx "\n",
|
|
tcg_ctx->gen_insn_data[insn][0]);
|
|
disas(logfile, tb->tc.ptr + chunk_start,
|
|
chunk_end - chunk_start);
|
|
chunk_start = chunk_end;
|
|
}
|
|
insn++;
|
|
}
|
|
|
|
if (chunk_start < code_size) {
|
|
fprintf(logfile, " -- tb slow paths + alignment\n");
|
|
disas(logfile, tb->tc.ptr + chunk_start,
|
|
code_size - chunk_start);
|
|
}
|
|
|
|
/* Finally dump any data we may have after the block */
|
|
if (data_size) {
|
|
int i;
|
|
fprintf(logfile, " data: [size=%d]\n", data_size);
|
|
for (i = 0; i < data_size / sizeof(tcg_target_ulong); i++) {
|
|
if (sizeof(tcg_target_ulong) == 8) {
|
|
fprintf(logfile,
|
|
"0x%08" PRIxPTR ": .quad 0x%016" TCG_PRIlx "\n",
|
|
(uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
|
|
} else if (sizeof(tcg_target_ulong) == 4) {
|
|
fprintf(logfile,
|
|
"0x%08" PRIxPTR ": .long 0x%08" TCG_PRIlx "\n",
|
|
(uintptr_t)&rx_data_gen_ptr[i], rx_data_gen_ptr[i]);
|
|
} else {
|
|
qemu_build_not_reached();
|
|
}
|
|
}
|
|
}
|
|
fprintf(logfile, "\n");
|
|
qemu_log_unlock(logfile);
|
|
}
|
|
}
|
|
#endif
|
|
|
|
qatomic_set(&tcg_ctx->code_gen_ptr, (void *)
|
|
ROUND_UP((uintptr_t)gen_code_buf + gen_code_size + search_size,
|
|
CODE_GEN_ALIGN));
|
|
|
|
/* init jump list */
|
|
qemu_spin_init(&tb->jmp_lock);
|
|
tb->jmp_list_head = (uintptr_t)NULL;
|
|
tb->jmp_list_next[0] = (uintptr_t)NULL;
|
|
tb->jmp_list_next[1] = (uintptr_t)NULL;
|
|
tb->jmp_dest[0] = (uintptr_t)NULL;
|
|
tb->jmp_dest[1] = (uintptr_t)NULL;
|
|
|
|
/* init original jump addresses which have been set during tcg_gen_code() */
|
|
if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
|
|
tb_reset_jump(tb, 0);
|
|
}
|
|
if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
|
|
tb_reset_jump(tb, 1);
|
|
}
|
|
|
|
/*
|
|
* If the TB is not associated with a physical RAM page then it must be
|
|
* a temporary one-insn TB, and we have nothing left to do. Return early
|
|
* before attempting to link to other TBs or add to the lookup table.
|
|
*/
|
|
if (tb_page_addr0(tb) == -1) {
|
|
return tb;
|
|
}
|
|
|
|
/*
|
|
* Insert TB into the corresponding region tree before publishing it
|
|
* through QHT. Otherwise rewinding happened in the TB might fail to
|
|
* lookup itself using host PC.
|
|
*/
|
|
tcg_tb_insert(tb);
|
|
|
|
/*
|
|
* No explicit memory barrier is required -- tb_link_page() makes the
|
|
* TB visible in a consistent state.
|
|
*/
|
|
existing_tb = tb_link_page(tb, tb_page_addr0(tb), tb_page_addr1(tb));
|
|
/* if the TB already exists, discard what we just translated */
|
|
if (unlikely(existing_tb != tb)) {
|
|
uintptr_t orig_aligned = (uintptr_t)gen_code_buf;
|
|
|
|
orig_aligned -= ROUND_UP(sizeof(*tb), qemu_icache_linesize);
|
|
qatomic_set(&tcg_ctx->code_gen_ptr, (void *)orig_aligned);
|
|
tcg_tb_remove(tb);
|
|
return existing_tb;
|
|
}
|
|
return tb;
|
|
}
|
|
|
|
/* user-mode: call with mmap_lock held */
|
|
void tb_check_watchpoint(CPUState *cpu, uintptr_t retaddr)
|
|
{
|
|
TranslationBlock *tb;
|
|
|
|
assert_memory_lock();
|
|
|
|
tb = tcg_tb_lookup(retaddr);
|
|
if (tb) {
|
|
/* We can use retranslation to find the PC. */
|
|
cpu_restore_state_from_tb(cpu, tb, retaddr);
|
|
tb_phys_invalidate(tb, -1);
|
|
} else {
|
|
/* The exception probably happened in a helper. The CPU state should
|
|
have been saved before calling it. Fetch the PC from there. */
|
|
CPUArchState *env = cpu->env_ptr;
|
|
target_ulong pc, cs_base;
|
|
tb_page_addr_t addr;
|
|
uint32_t flags;
|
|
|
|
cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
|
|
addr = get_page_addr_code(env, pc);
|
|
if (addr != -1) {
|
|
tb_invalidate_phys_range(addr, addr + 1);
|
|
}
|
|
}
|
|
}
|
|
|
|
#ifndef CONFIG_USER_ONLY
|
|
/*
|
|
* In deterministic execution mode, instructions doing device I/Os
|
|
* must be at the end of the TB.
|
|
*
|
|
* Called by softmmu_template.h, with iothread mutex not held.
|
|
*/
|
|
void cpu_io_recompile(CPUState *cpu, uintptr_t retaddr)
|
|
{
|
|
TranslationBlock *tb;
|
|
CPUClass *cc;
|
|
uint32_t n;
|
|
|
|
tb = tcg_tb_lookup(retaddr);
|
|
if (!tb) {
|
|
cpu_abort(cpu, "cpu_io_recompile: could not find TB for pc=%p",
|
|
(void *)retaddr);
|
|
}
|
|
cpu_restore_state_from_tb(cpu, tb, retaddr);
|
|
|
|
/*
|
|
* Some guests must re-execute the branch when re-executing a delay
|
|
* slot instruction. When this is the case, adjust icount and N
|
|
* to account for the re-execution of the branch.
|
|
*/
|
|
n = 1;
|
|
cc = CPU_GET_CLASS(cpu);
|
|
if (cc->tcg_ops->io_recompile_replay_branch &&
|
|
cc->tcg_ops->io_recompile_replay_branch(cpu, tb)) {
|
|
cpu_neg(cpu)->icount_decr.u16.low++;
|
|
n = 2;
|
|
}
|
|
|
|
/*
|
|
* Exit the loop and potentially generate a new TB executing the
|
|
* just the I/O insns. We also limit instrumentation to memory
|
|
* operations only (which execute after completion) so we don't
|
|
* double instrument the instruction.
|
|
*/
|
|
cpu->cflags_next_tb = curr_cflags(cpu) | CF_MEMI_ONLY | CF_LAST_IO | n;
|
|
|
|
if (qemu_loglevel_mask(CPU_LOG_EXEC)) {
|
|
target_ulong pc = log_pc(cpu, tb);
|
|
if (qemu_log_in_addr_range(pc)) {
|
|
qemu_log("cpu_io_recompile: rewound execution of TB to "
|
|
TARGET_FMT_lx "\n", pc);
|
|
}
|
|
}
|
|
|
|
cpu_loop_exit_noexc(cpu);
|
|
}
|
|
|
|
static void print_qht_statistics(struct qht_stats hst, GString *buf)
|
|
{
|
|
uint32_t hgram_opts;
|
|
size_t hgram_bins;
|
|
char *hgram;
|
|
|
|
if (!hst.head_buckets) {
|
|
return;
|
|
}
|
|
g_string_append_printf(buf, "TB hash buckets %zu/%zu "
|
|
"(%0.2f%% head buckets used)\n",
|
|
hst.used_head_buckets, hst.head_buckets,
|
|
(double)hst.used_head_buckets /
|
|
hst.head_buckets * 100);
|
|
|
|
hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
|
|
hgram_opts |= QDIST_PR_100X | QDIST_PR_PERCENT;
|
|
if (qdist_xmax(&hst.occupancy) - qdist_xmin(&hst.occupancy) == 1) {
|
|
hgram_opts |= QDIST_PR_NODECIMAL;
|
|
}
|
|
hgram = qdist_pr(&hst.occupancy, 10, hgram_opts);
|
|
g_string_append_printf(buf, "TB hash occupancy %0.2f%% avg chain occ. "
|
|
"Histogram: %s\n",
|
|
qdist_avg(&hst.occupancy) * 100, hgram);
|
|
g_free(hgram);
|
|
|
|
hgram_opts = QDIST_PR_BORDER | QDIST_PR_LABELS;
|
|
hgram_bins = qdist_xmax(&hst.chain) - qdist_xmin(&hst.chain);
|
|
if (hgram_bins > 10) {
|
|
hgram_bins = 10;
|
|
} else {
|
|
hgram_bins = 0;
|
|
hgram_opts |= QDIST_PR_NODECIMAL | QDIST_PR_NOBINRANGE;
|
|
}
|
|
hgram = qdist_pr(&hst.chain, hgram_bins, hgram_opts);
|
|
g_string_append_printf(buf, "TB hash avg chain %0.3f buckets. "
|
|
"Histogram: %s\n",
|
|
qdist_avg(&hst.chain), hgram);
|
|
g_free(hgram);
|
|
}
|
|
|
|
struct tb_tree_stats {
|
|
size_t nb_tbs;
|
|
size_t host_size;
|
|
size_t target_size;
|
|
size_t max_target_size;
|
|
size_t direct_jmp_count;
|
|
size_t direct_jmp2_count;
|
|
size_t cross_page;
|
|
};
|
|
|
|
static gboolean tb_tree_stats_iter(gpointer key, gpointer value, gpointer data)
|
|
{
|
|
const TranslationBlock *tb = value;
|
|
struct tb_tree_stats *tst = data;
|
|
|
|
tst->nb_tbs++;
|
|
tst->host_size += tb->tc.size;
|
|
tst->target_size += tb->size;
|
|
if (tb->size > tst->max_target_size) {
|
|
tst->max_target_size = tb->size;
|
|
}
|
|
if (tb_page_addr1(tb) != -1) {
|
|
tst->cross_page++;
|
|
}
|
|
if (tb->jmp_reset_offset[0] != TB_JMP_RESET_OFFSET_INVALID) {
|
|
tst->direct_jmp_count++;
|
|
if (tb->jmp_reset_offset[1] != TB_JMP_RESET_OFFSET_INVALID) {
|
|
tst->direct_jmp2_count++;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
void dump_exec_info(GString *buf)
|
|
{
|
|
struct tb_tree_stats tst = {};
|
|
struct qht_stats hst;
|
|
size_t nb_tbs, flush_full, flush_part, flush_elide;
|
|
|
|
tcg_tb_foreach(tb_tree_stats_iter, &tst);
|
|
nb_tbs = tst.nb_tbs;
|
|
/* XXX: avoid using doubles ? */
|
|
g_string_append_printf(buf, "Translation buffer state:\n");
|
|
/*
|
|
* Report total code size including the padding and TB structs;
|
|
* otherwise users might think "-accel tcg,tb-size" is not honoured.
|
|
* For avg host size we use the precise numbers from tb_tree_stats though.
|
|
*/
|
|
g_string_append_printf(buf, "gen code size %zu/%zu\n",
|
|
tcg_code_size(), tcg_code_capacity());
|
|
g_string_append_printf(buf, "TB count %zu\n", nb_tbs);
|
|
g_string_append_printf(buf, "TB avg target size %zu max=%zu bytes\n",
|
|
nb_tbs ? tst.target_size / nb_tbs : 0,
|
|
tst.max_target_size);
|
|
g_string_append_printf(buf, "TB avg host size %zu bytes "
|
|
"(expansion ratio: %0.1f)\n",
|
|
nb_tbs ? tst.host_size / nb_tbs : 0,
|
|
tst.target_size ?
|
|
(double)tst.host_size / tst.target_size : 0);
|
|
g_string_append_printf(buf, "cross page TB count %zu (%zu%%)\n",
|
|
tst.cross_page,
|
|
nb_tbs ? (tst.cross_page * 100) / nb_tbs : 0);
|
|
g_string_append_printf(buf, "direct jump count %zu (%zu%%) "
|
|
"(2 jumps=%zu %zu%%)\n",
|
|
tst.direct_jmp_count,
|
|
nb_tbs ? (tst.direct_jmp_count * 100) / nb_tbs : 0,
|
|
tst.direct_jmp2_count,
|
|
nb_tbs ? (tst.direct_jmp2_count * 100) / nb_tbs : 0);
|
|
|
|
qht_statistics_init(&tb_ctx.htable, &hst);
|
|
print_qht_statistics(hst, buf);
|
|
qht_statistics_destroy(&hst);
|
|
|
|
g_string_append_printf(buf, "\nStatistics:\n");
|
|
g_string_append_printf(buf, "TB flush count %u\n",
|
|
qatomic_read(&tb_ctx.tb_flush_count));
|
|
g_string_append_printf(buf, "TB invalidate count %u\n",
|
|
qatomic_read(&tb_ctx.tb_phys_invalidate_count));
|
|
|
|
tlb_flush_counts(&flush_full, &flush_part, &flush_elide);
|
|
g_string_append_printf(buf, "TLB full flushes %zu\n", flush_full);
|
|
g_string_append_printf(buf, "TLB partial flushes %zu\n", flush_part);
|
|
g_string_append_printf(buf, "TLB elided flushes %zu\n", flush_elide);
|
|
tcg_dump_info(buf);
|
|
}
|
|
|
|
#else /* CONFIG_USER_ONLY */
|
|
|
|
void cpu_interrupt(CPUState *cpu, int mask)
|
|
{
|
|
g_assert(qemu_mutex_iothread_locked());
|
|
cpu->interrupt_request |= mask;
|
|
qatomic_set(&cpu_neg(cpu)->icount_decr.u16.high, -1);
|
|
}
|
|
|
|
/*
|
|
* Walks guest process memory "regions" one by one
|
|
* and calls callback function 'fn' for each region.
|
|
*/
|
|
struct walk_memory_regions_data {
|
|
walk_memory_regions_fn fn;
|
|
void *priv;
|
|
target_ulong start;
|
|
int prot;
|
|
};
|
|
|
|
static int walk_memory_regions_end(struct walk_memory_regions_data *data,
|
|
target_ulong end, int new_prot)
|
|
{
|
|
if (data->start != -1u) {
|
|
int rc = data->fn(data->priv, data->start, end, data->prot);
|
|
if (rc != 0) {
|
|
return rc;
|
|
}
|
|
}
|
|
|
|
data->start = (new_prot ? end : -1u);
|
|
data->prot = new_prot;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int walk_memory_regions_1(struct walk_memory_regions_data *data,
|
|
target_ulong base, int level, void **lp)
|
|
{
|
|
target_ulong pa;
|
|
int i, rc;
|
|
|
|
if (*lp == NULL) {
|
|
return walk_memory_regions_end(data, base, 0);
|
|
}
|
|
|
|
if (level == 0) {
|
|
PageDesc *pd = *lp;
|
|
|
|
for (i = 0; i < V_L2_SIZE; ++i) {
|
|
int prot = pd[i].flags;
|
|
|
|
pa = base | (i << TARGET_PAGE_BITS);
|
|
if (prot != data->prot) {
|
|
rc = walk_memory_regions_end(data, pa, prot);
|
|
if (rc != 0) {
|
|
return rc;
|
|
}
|
|
}
|
|
}
|
|
} else {
|
|
void **pp = *lp;
|
|
|
|
for (i = 0; i < V_L2_SIZE; ++i) {
|
|
pa = base | ((target_ulong)i <<
|
|
(TARGET_PAGE_BITS + V_L2_BITS * level));
|
|
rc = walk_memory_regions_1(data, pa, level - 1, pp + i);
|
|
if (rc != 0) {
|
|
return rc;
|
|
}
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int walk_memory_regions(void *priv, walk_memory_regions_fn fn)
|
|
{
|
|
struct walk_memory_regions_data data;
|
|
uintptr_t i, l1_sz = v_l1_size;
|
|
|
|
data.fn = fn;
|
|
data.priv = priv;
|
|
data.start = -1u;
|
|
data.prot = 0;
|
|
|
|
for (i = 0; i < l1_sz; i++) {
|
|
target_ulong base = i << (v_l1_shift + TARGET_PAGE_BITS);
|
|
int rc = walk_memory_regions_1(&data, base, v_l2_levels, l1_map + i);
|
|
if (rc != 0) {
|
|
return rc;
|
|
}
|
|
}
|
|
|
|
return walk_memory_regions_end(&data, 0, 0);
|
|
}
|
|
|
|
static int dump_region(void *priv, target_ulong start,
|
|
target_ulong end, unsigned long prot)
|
|
{
|
|
FILE *f = (FILE *)priv;
|
|
|
|
(void) fprintf(f, TARGET_FMT_lx"-"TARGET_FMT_lx
|
|
" "TARGET_FMT_lx" %c%c%c\n",
|
|
start, end, end - start,
|
|
((prot & PAGE_READ) ? 'r' : '-'),
|
|
((prot & PAGE_WRITE) ? 'w' : '-'),
|
|
((prot & PAGE_EXEC) ? 'x' : '-'));
|
|
|
|
return 0;
|
|
}
|
|
|
|
/* dump memory mappings */
|
|
void page_dump(FILE *f)
|
|
{
|
|
const int length = sizeof(target_ulong) * 2;
|
|
(void) fprintf(f, "%-*s %-*s %-*s %s\n",
|
|
length, "start", length, "end", length, "size", "prot");
|
|
walk_memory_regions(f, dump_region);
|
|
}
|
|
|
|
int page_get_flags(target_ulong address)
|
|
{
|
|
PageDesc *p;
|
|
|
|
p = page_find(address >> TARGET_PAGE_BITS);
|
|
if (!p) {
|
|
return 0;
|
|
}
|
|
return p->flags;
|
|
}
|
|
|
|
/*
|
|
* Allow the target to decide if PAGE_TARGET_[12] may be reset.
|
|
* By default, they are not kept.
|
|
*/
|
|
#ifndef PAGE_TARGET_STICKY
|
|
#define PAGE_TARGET_STICKY 0
|
|
#endif
|
|
#define PAGE_STICKY (PAGE_ANON | PAGE_PASSTHROUGH | PAGE_TARGET_STICKY)
|
|
|
|
/* Modify the flags of a page and invalidate the code if necessary.
|
|
The flag PAGE_WRITE_ORG is positioned automatically depending
|
|
on PAGE_WRITE. The mmap_lock should already be held. */
|
|
void page_set_flags(target_ulong start, target_ulong end, int flags)
|
|
{
|
|
target_ulong addr, len;
|
|
bool reset, inval_tb = false;
|
|
|
|
/* This function should never be called with addresses outside the
|
|
guest address space. If this assert fires, it probably indicates
|
|
a missing call to h2g_valid. */
|
|
assert(end - 1 <= GUEST_ADDR_MAX);
|
|
assert(start < end);
|
|
/* Only set PAGE_ANON with new mappings. */
|
|
assert(!(flags & PAGE_ANON) || (flags & PAGE_RESET));
|
|
assert_memory_lock();
|
|
|
|
start = start & TARGET_PAGE_MASK;
|
|
end = TARGET_PAGE_ALIGN(end);
|
|
|
|
if (flags & PAGE_WRITE) {
|
|
flags |= PAGE_WRITE_ORG;
|
|
}
|
|
reset = !(flags & PAGE_VALID) || (flags & PAGE_RESET);
|
|
if (reset) {
|
|
page_reset_target_data(start, end);
|
|
}
|
|
flags &= ~PAGE_RESET;
|
|
|
|
for (addr = start, len = end - start;
|
|
len != 0;
|
|
len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
|
|
PageDesc *p = page_find_alloc(addr >> TARGET_PAGE_BITS, true);
|
|
|
|
/*
|
|
* If the page was executable, but is reset, or is no longer
|
|
* executable, or has become writable, then invalidate any code.
|
|
*/
|
|
if ((p->flags & PAGE_EXEC)
|
|
&& (reset ||
|
|
!(flags & PAGE_EXEC) ||
|
|
(flags & ~p->flags & PAGE_WRITE))) {
|
|
inval_tb = true;
|
|
}
|
|
/* Using mprotect on a page does not change sticky bits. */
|
|
p->flags = (reset ? 0 : p->flags & PAGE_STICKY) | flags;
|
|
}
|
|
|
|
if (inval_tb) {
|
|
tb_invalidate_phys_range(start, end);
|
|
}
|
|
}
|
|
|
|
int page_check_range(target_ulong start, target_ulong len, int flags)
|
|
{
|
|
PageDesc *p;
|
|
target_ulong end;
|
|
target_ulong addr;
|
|
|
|
/* This function should never be called with addresses outside the
|
|
guest address space. If this assert fires, it probably indicates
|
|
a missing call to h2g_valid. */
|
|
if (TARGET_ABI_BITS > L1_MAP_ADDR_SPACE_BITS) {
|
|
assert(start < ((target_ulong)1 << L1_MAP_ADDR_SPACE_BITS));
|
|
}
|
|
|
|
if (len == 0) {
|
|
return 0;
|
|
}
|
|
if (start + len - 1 < start) {
|
|
/* We've wrapped around. */
|
|
return -1;
|
|
}
|
|
|
|
/* must do before we loose bits in the next step */
|
|
end = TARGET_PAGE_ALIGN(start + len);
|
|
start = start & TARGET_PAGE_MASK;
|
|
|
|
for (addr = start, len = end - start;
|
|
len != 0;
|
|
len -= TARGET_PAGE_SIZE, addr += TARGET_PAGE_SIZE) {
|
|
p = page_find(addr >> TARGET_PAGE_BITS);
|
|
if (!p) {
|
|
return -1;
|
|
}
|
|
if (!(p->flags & PAGE_VALID)) {
|
|
return -1;
|
|
}
|
|
|
|
if ((flags & PAGE_READ) && !(p->flags & PAGE_READ)) {
|
|
return -1;
|
|
}
|
|
if (flags & PAGE_WRITE) {
|
|
if (!(p->flags & PAGE_WRITE_ORG)) {
|
|
return -1;
|
|
}
|
|
/* unprotect the page if it was put read-only because it
|
|
contains translated code */
|
|
if (!(p->flags & PAGE_WRITE)) {
|
|
if (!page_unprotect(addr, 0)) {
|
|
return -1;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
void page_protect(tb_page_addr_t page_addr)
|
|
{
|
|
target_ulong addr;
|
|
PageDesc *p;
|
|
int prot;
|
|
|
|
p = page_find(page_addr >> TARGET_PAGE_BITS);
|
|
if (p && (p->flags & PAGE_WRITE)) {
|
|
/*
|
|
* Force the host page as non writable (writes will have a page fault +
|
|
* mprotect overhead).
|
|
*/
|
|
page_addr &= qemu_host_page_mask;
|
|
prot = 0;
|
|
for (addr = page_addr; addr < page_addr + qemu_host_page_size;
|
|
addr += TARGET_PAGE_SIZE) {
|
|
|
|
p = page_find(addr >> TARGET_PAGE_BITS);
|
|
if (!p) {
|
|
continue;
|
|
}
|
|
prot |= p->flags;
|
|
p->flags &= ~PAGE_WRITE;
|
|
}
|
|
mprotect(g2h_untagged(page_addr), qemu_host_page_size,
|
|
(prot & PAGE_BITS) & ~PAGE_WRITE);
|
|
}
|
|
}
|
|
|
|
/* called from signal handler: invalidate the code and unprotect the
|
|
* page. Return 0 if the fault was not handled, 1 if it was handled,
|
|
* and 2 if it was handled but the caller must cause the TB to be
|
|
* immediately exited. (We can only return 2 if the 'pc' argument is
|
|
* non-zero.)
|
|
*/
|
|
int page_unprotect(target_ulong address, uintptr_t pc)
|
|
{
|
|
unsigned int prot;
|
|
bool current_tb_invalidated;
|
|
PageDesc *p;
|
|
target_ulong host_start, host_end, addr;
|
|
|
|
/* Technically this isn't safe inside a signal handler. However we
|
|
know this only ever happens in a synchronous SEGV handler, so in
|
|
practice it seems to be ok. */
|
|
mmap_lock();
|
|
|
|
p = page_find(address >> TARGET_PAGE_BITS);
|
|
if (!p) {
|
|
mmap_unlock();
|
|
return 0;
|
|
}
|
|
|
|
/* if the page was really writable, then we change its
|
|
protection back to writable */
|
|
if (p->flags & PAGE_WRITE_ORG) {
|
|
current_tb_invalidated = false;
|
|
if (p->flags & PAGE_WRITE) {
|
|
/* If the page is actually marked WRITE then assume this is because
|
|
* this thread raced with another one which got here first and
|
|
* set the page to PAGE_WRITE and did the TB invalidate for us.
|
|
*/
|
|
#ifdef TARGET_HAS_PRECISE_SMC
|
|
TranslationBlock *current_tb = tcg_tb_lookup(pc);
|
|
if (current_tb) {
|
|
current_tb_invalidated = tb_cflags(current_tb) & CF_INVALID;
|
|
}
|
|
#endif
|
|
} else {
|
|
host_start = address & qemu_host_page_mask;
|
|
host_end = host_start + qemu_host_page_size;
|
|
|
|
prot = 0;
|
|
for (addr = host_start; addr < host_end; addr += TARGET_PAGE_SIZE) {
|
|
p = page_find(addr >> TARGET_PAGE_BITS);
|
|
p->flags |= PAGE_WRITE;
|
|
prot |= p->flags;
|
|
|
|
/* and since the content will be modified, we must invalidate
|
|
the corresponding translated code. */
|
|
current_tb_invalidated |=
|
|
tb_invalidate_phys_page_unwind(addr, pc);
|
|
}
|
|
mprotect((void *)g2h_untagged(host_start), qemu_host_page_size,
|
|
prot & PAGE_BITS);
|
|
}
|
|
mmap_unlock();
|
|
/* If current TB was invalidated return to main loop */
|
|
return current_tb_invalidated ? 2 : 1;
|
|
}
|
|
mmap_unlock();
|
|
return 0;
|
|
}
|
|
#endif /* CONFIG_USER_ONLY */
|
|
|
|
/*
|
|
* Called by generic code at e.g. cpu reset after cpu creation,
|
|
* therefore we must be prepared to allocate the jump cache.
|
|
*/
|
|
void tcg_flush_jmp_cache(CPUState *cpu)
|
|
{
|
|
CPUJumpCache *jc = cpu->tb_jmp_cache;
|
|
|
|
/* During early initialization, the cache may not yet be allocated. */
|
|
if (unlikely(jc == NULL)) {
|
|
return;
|
|
}
|
|
|
|
for (int i = 0; i < TB_JMP_CACHE_SIZE; i++) {
|
|
qatomic_set(&jc->array[i].tb, NULL);
|
|
}
|
|
}
|
|
|
|
/* This is a wrapper for common code that can not use CONFIG_SOFTMMU */
|
|
void tcg_flush_softmmu_tlb(CPUState *cs)
|
|
{
|
|
#ifdef CONFIG_SOFTMMU
|
|
tlb_flush(cs);
|
|
#endif
|
|
}
|