TCG patch queue:
Workaround macos assembler lossage. Eliminate tb_lock. Fix TB code generation overflow. -----BEGIN PGP SIGNATURE----- iQEcBAABAgAGBQJbJBZIAAoJEGTfOOivfiFfy0gH/1brodMhJbTS6/k9+FyXWEy5 zYjCGKKlMZk//Y+4wcF5tXY/qDRNWk80j6KyxumNp3gCBehx6u59EEsrJRQaxBHm nYbDoE3Fy0J4KgRzdGmkYtl89XDK1++Ea9uL9N/stg2MSodzqoV6uudLYr/f+nRj 4MkS+7BI+aJ4/XIKLU+/+cRo+5FdD0hNEabjlUxTOSrfJbr/YxbnVINX01A4yD6q LSzwLAEqpJehFBQjeSLu93ztrapj/1vEaguPOf04F6pXgOLpvSPlPahqwwk4qRwS OFgWwSPby3jrNLYZcufx2cY5pG3i4wDGK3z/B35hnDEGwYp1fNt6xdq+EzmHhaM= =ibt/ -----END PGP SIGNATURE----- Merge remote-tracking branch 'remotes/rth/tags/pull-tcg-20180615' into staging TCG patch queue: Workaround macos assembler lossage. Eliminate tb_lock. Fix TB code generation overflow. # gpg: Signature made Fri 15 Jun 2018 20:40:56 BST # gpg: using RSA key 64DF38E8AF7E215F # gpg: Good signature from "Richard Henderson <richard.henderson@linaro.org>" # Primary key fingerprint: 7A48 1E78 868B 4DB6 A85A 05C0 64DF 38E8 AF7E 215F * remotes/rth/tags/pull-tcg-20180615: tcg: Reduce max TB opcode count tcg: remove tb_lock translate-all: remove tb_lock mention from cpu_restore_state_from_tb cputlb: remove tb_lock from tlb_flush functions translate-all: protect TB jumps with a per-destination-TB lock translate-all: discard TB when tb_link_page returns an existing matching TB translate-all: introduce assert_no_pages_locked translate-all: add page_locked assertions translate-all: use per-page locking in !user-mode translate-all: move tb_invalidate_phys_page_range up in the file translate-all: work page-by-page in tb_invalidate_phys_range_1 translate-all: remove hole in PageDesc translate-all: make l1_map lockless translate-all: iterate over TBs in a page with PAGE_FOR_EACH_TB tcg: move tb_ctx.tb_phys_invalidate_count to tcg_ctx tcg: track TBs with per-region BST's qht: return existing entry when qht_insert fails qht: require a default comparison function tcg/i386: Use byte form of xgetbv instruction Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
commit
33836a7315
@ -212,20 +212,20 @@ static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
|
||||
We only end up here when an existing TB is too long. */
|
||||
cflags |= MIN(max_cycles, CF_COUNT_MASK);
|
||||
|
||||
tb_lock();
|
||||
mmap_lock();
|
||||
tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base,
|
||||
orig_tb->flags, cflags);
|
||||
tb->orig_tb = orig_tb;
|
||||
tb_unlock();
|
||||
mmap_unlock();
|
||||
|
||||
/* execute the generated code */
|
||||
trace_exec_tb_nocache(tb, tb->pc);
|
||||
cpu_tb_exec(cpu, tb);
|
||||
|
||||
tb_lock();
|
||||
mmap_lock();
|
||||
tb_phys_invalidate(tb, -1);
|
||||
tb_remove(tb);
|
||||
tb_unlock();
|
||||
mmap_unlock();
|
||||
tcg_tb_remove(tb);
|
||||
}
|
||||
#endif
|
||||
|
||||
@ -244,12 +244,7 @@ void cpu_exec_step_atomic(CPUState *cpu)
|
||||
tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask);
|
||||
if (tb == NULL) {
|
||||
mmap_lock();
|
||||
tb_lock();
|
||||
tb = tb_htable_lookup(cpu, pc, cs_base, flags, cf_mask);
|
||||
if (likely(tb == NULL)) {
|
||||
tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
|
||||
}
|
||||
tb_unlock();
|
||||
tb = tb_gen_code(cpu, pc, cs_base, flags, cflags);
|
||||
mmap_unlock();
|
||||
}
|
||||
|
||||
@ -264,15 +259,14 @@ void cpu_exec_step_atomic(CPUState *cpu)
|
||||
cpu_tb_exec(cpu, tb);
|
||||
cc->cpu_exec_exit(cpu);
|
||||
} else {
|
||||
/* We may have exited due to another problem here, so we need
|
||||
* to reset any tb_locks we may have taken but didn't release.
|
||||
/*
|
||||
* The mmap_lock is dropped by tb_gen_code if it runs out of
|
||||
* memory.
|
||||
*/
|
||||
#ifndef CONFIG_SOFTMMU
|
||||
tcg_debug_assert(!have_mmap_lock());
|
||||
#endif
|
||||
tb_lock_reset();
|
||||
assert_no_pages_locked();
|
||||
}
|
||||
|
||||
if (in_exclusive_region) {
|
||||
@ -295,7 +289,7 @@ struct tb_desc {
|
||||
uint32_t trace_vcpu_dstate;
|
||||
};
|
||||
|
||||
static bool tb_cmp(const void *p, const void *d)
|
||||
static bool tb_lookup_cmp(const void *p, const void *d)
|
||||
{
|
||||
const TranslationBlock *tb = p;
|
||||
const struct tb_desc *desc = d;
|
||||
@ -340,7 +334,7 @@ TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
|
||||
phys_pc = get_page_addr_code(desc.env, pc);
|
||||
desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
|
||||
h = tb_hash_func(phys_pc, pc, flags, cf_mask, *cpu->trace_dstate);
|
||||
return qht_lookup(&tb_ctx.htable, tb_cmp, &desc, h);
|
||||
return qht_lookup_custom(&tb_ctx.htable, &desc, h, tb_lookup_cmp);
|
||||
}
|
||||
|
||||
void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr)
|
||||
@ -354,28 +348,43 @@ void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr)
|
||||
}
|
||||
}
|
||||
|
||||
/* Called with tb_lock held. */
|
||||
static inline void tb_add_jump(TranslationBlock *tb, int n,
|
||||
TranslationBlock *tb_next)
|
||||
{
|
||||
uintptr_t old;
|
||||
|
||||
assert(n < ARRAY_SIZE(tb->jmp_list_next));
|
||||
if (tb->jmp_list_next[n]) {
|
||||
/* Another thread has already done this while we were
|
||||
* outside of the lock; nothing to do in this case */
|
||||
return;
|
||||
qemu_spin_lock(&tb_next->jmp_lock);
|
||||
|
||||
/* make sure the destination TB is valid */
|
||||
if (tb_next->cflags & CF_INVALID) {
|
||||
goto out_unlock_next;
|
||||
}
|
||||
/* Atomically claim the jump destination slot only if it was NULL */
|
||||
old = atomic_cmpxchg(&tb->jmp_dest[n], (uintptr_t)NULL, (uintptr_t)tb_next);
|
||||
if (old) {
|
||||
goto out_unlock_next;
|
||||
}
|
||||
|
||||
/* patch the native jump address */
|
||||
tb_set_jmp_target(tb, n, (uintptr_t)tb_next->tc.ptr);
|
||||
|
||||
/* add in TB jmp list */
|
||||
tb->jmp_list_next[n] = tb_next->jmp_list_head;
|
||||
tb_next->jmp_list_head = (uintptr_t)tb | n;
|
||||
|
||||
qemu_spin_unlock(&tb_next->jmp_lock);
|
||||
|
||||
qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
|
||||
"Linking TBs %p [" TARGET_FMT_lx
|
||||
"] index %d -> %p [" TARGET_FMT_lx "]\n",
|
||||
tb->tc.ptr, tb->pc, n,
|
||||
tb_next->tc.ptr, tb_next->pc);
|
||||
return;
|
||||
|
||||
/* patch the native jump address */
|
||||
tb_set_jmp_target(tb, n, (uintptr_t)tb_next->tc.ptr);
|
||||
|
||||
/* add in TB jmp circular list */
|
||||
tb->jmp_list_next[n] = tb_next->jmp_list_first;
|
||||
tb_next->jmp_list_first = (uintptr_t)tb | n;
|
||||
out_unlock_next:
|
||||
qemu_spin_unlock(&tb_next->jmp_lock);
|
||||
return;
|
||||
}
|
||||
|
||||
static inline TranslationBlock *tb_find(CPUState *cpu,
|
||||
@ -385,27 +394,11 @@ static inline TranslationBlock *tb_find(CPUState *cpu,
|
||||
TranslationBlock *tb;
|
||||
target_ulong cs_base, pc;
|
||||
uint32_t flags;
|
||||
bool acquired_tb_lock = false;
|
||||
|
||||
tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags, cf_mask);
|
||||
if (tb == NULL) {
|
||||
/* mmap_lock is needed by tb_gen_code, and mmap_lock must be
|
||||
* taken outside tb_lock. As system emulation is currently
|
||||
* single threaded the locks are NOPs.
|
||||
*/
|
||||
mmap_lock();
|
||||
tb_lock();
|
||||
acquired_tb_lock = true;
|
||||
|
||||
/* There's a chance that our desired tb has been translated while
|
||||
* taking the locks so we check again inside the lock.
|
||||
*/
|
||||
tb = tb_htable_lookup(cpu, pc, cs_base, flags, cf_mask);
|
||||
if (likely(tb == NULL)) {
|
||||
/* if no translated code available, then translate it now */
|
||||
tb = tb_gen_code(cpu, pc, cs_base, flags, cf_mask);
|
||||
}
|
||||
|
||||
tb = tb_gen_code(cpu, pc, cs_base, flags, cf_mask);
|
||||
mmap_unlock();
|
||||
/* We add the TB in the virtual pc hash table for the fast lookup */
|
||||
atomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb);
|
||||
@ -421,16 +414,7 @@ static inline TranslationBlock *tb_find(CPUState *cpu,
|
||||
#endif
|
||||
/* See if we can patch the calling TB. */
|
||||
if (last_tb && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
|
||||
if (!acquired_tb_lock) {
|
||||
tb_lock();
|
||||
acquired_tb_lock = true;
|
||||
}
|
||||
if (!(tb->cflags & CF_INVALID)) {
|
||||
tb_add_jump(last_tb, tb_exit, tb);
|
||||
}
|
||||
}
|
||||
if (acquired_tb_lock) {
|
||||
tb_unlock();
|
||||
tb_add_jump(last_tb, tb_exit, tb);
|
||||
}
|
||||
return tb;
|
||||
}
|
||||
@ -706,7 +690,9 @@ int cpu_exec(CPUState *cpu)
|
||||
g_assert(cpu == current_cpu);
|
||||
g_assert(cc == CPU_GET_CLASS(cpu));
|
||||
#endif /* buggy compiler */
|
||||
tb_lock_reset();
|
||||
#ifndef CONFIG_SOFTMMU
|
||||
tcg_debug_assert(!have_mmap_lock());
|
||||
#endif
|
||||
if (qemu_mutex_iothread_locked()) {
|
||||
qemu_mutex_unlock_iothread();
|
||||
}
|
||||
|
@ -125,8 +125,6 @@ static void tlb_flush_nocheck(CPUState *cpu)
|
||||
atomic_set(&env->tlb_flush_count, env->tlb_flush_count + 1);
|
||||
tlb_debug("(count: %zu)\n", tlb_flush_count());
|
||||
|
||||
tb_lock();
|
||||
|
||||
memset(env->tlb_table, -1, sizeof(env->tlb_table));
|
||||
memset(env->tlb_v_table, -1, sizeof(env->tlb_v_table));
|
||||
cpu_tb_jmp_cache_clear(cpu);
|
||||
@ -135,8 +133,6 @@ static void tlb_flush_nocheck(CPUState *cpu)
|
||||
env->tlb_flush_addr = -1;
|
||||
env->tlb_flush_mask = 0;
|
||||
|
||||
tb_unlock();
|
||||
|
||||
atomic_mb_set(&cpu->pending_tlb_flush, 0);
|
||||
}
|
||||
|
||||
@ -180,8 +176,6 @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
|
||||
|
||||
assert_cpu_is_self(cpu);
|
||||
|
||||
tb_lock();
|
||||
|
||||
tlb_debug("start: mmu_idx:0x%04lx\n", mmu_idx_bitmask);
|
||||
|
||||
for (mmu_idx = 0; mmu_idx < NB_MMU_MODES; mmu_idx++) {
|
||||
@ -197,8 +191,6 @@ static void tlb_flush_by_mmuidx_async_work(CPUState *cpu, run_on_cpu_data data)
|
||||
cpu_tb_jmp_cache_clear(cpu);
|
||||
|
||||
tlb_debug("done\n");
|
||||
|
||||
tb_unlock();
|
||||
}
|
||||
|
||||
void tlb_flush_by_mmuidx(CPUState *cpu, uint16_t idxmap)
|
||||
|
File diff suppressed because it is too large
Load Diff
@ -23,7 +23,11 @@
|
||||
|
||||
|
||||
/* translate-all.c */
|
||||
void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len);
|
||||
struct page_collection *page_collection_lock(tb_page_addr_t start,
|
||||
tb_page_addr_t end);
|
||||
void page_collection_unlock(struct page_collection *set);
|
||||
void tb_invalidate_phys_page_fast(struct page_collection *pages,
|
||||
tb_page_addr_t start, int len);
|
||||
void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end,
|
||||
int is_cpu_write_access);
|
||||
void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end);
|
||||
|
@ -61,6 +61,7 @@ have their block-to-block jumps patched.
|
||||
Global TCG State
|
||||
----------------
|
||||
|
||||
### User-mode emulation
|
||||
We need to protect the entire code generation cycle including any post
|
||||
generation patching of the translated code. This also implies a shared
|
||||
translation buffer which contains code running on all cores. Any
|
||||
@ -75,9 +76,11 @@ patching.
|
||||
|
||||
(Current solution)
|
||||
|
||||
Mainly as part of the linux-user work all code generation is
|
||||
serialised with a tb_lock(). For the SoftMMU tb_lock() also takes the
|
||||
place of mmap_lock() in linux-user.
|
||||
Code generation is serialised with mmap_lock().
|
||||
|
||||
### !User-mode emulation
|
||||
Each vCPU has its own TCG context and associated TCG region, thereby
|
||||
requiring no locking.
|
||||
|
||||
Translation Blocks
|
||||
------------------
|
||||
@ -131,15 +134,20 @@ DESIGN REQUIREMENT: Safely handle invalidation of TBs
|
||||
|
||||
The direct jump themselves are updated atomically by the TCG
|
||||
tb_set_jmp_target() code. Modification to the linked lists that allow
|
||||
searching for linked pages are done under the protect of the
|
||||
tb_lock().
|
||||
searching for linked pages are done under the protection of tb->jmp_lock,
|
||||
where tb is the destination block of a jump. Each origin block keeps a
|
||||
pointer to its destinations so that the appropriate lock can be acquired before
|
||||
iterating over a jump list.
|
||||
|
||||
The global page table is protected by the tb_lock() in system-mode and
|
||||
mmap_lock() in linux-user mode.
|
||||
The global page table is a lockless radix tree; cmpxchg is used
|
||||
to atomically insert new elements.
|
||||
|
||||
The lookup caches are updated atomically and the lookup hash uses QHT
|
||||
which is designed for concurrent safe lookup.
|
||||
|
||||
Parallel code generation is supported. QHT is used at insertion time
|
||||
as the synchronization point across threads, thereby ensuring that we only
|
||||
keep track of a single TranslationBlock for each guest code block.
|
||||
|
||||
Memory maps and TLBs
|
||||
--------------------
|
||||
@ -190,7 +198,7 @@ work as "safe work" and exiting the cpu run loop. This ensure by the
|
||||
time execution restarts all flush operations have completed.
|
||||
|
||||
TLB flag updates are all done atomically and are also protected by the
|
||||
tb_lock() which is used by the functions that update the TLB in bulk.
|
||||
corresponding page lock.
|
||||
|
||||
(Known limitation)
|
||||
|
||||
|
26
exec.c
26
exec.c
@ -1031,9 +1031,7 @@ const char *parse_cpu_model(const char *cpu_model)
|
||||
static void breakpoint_invalidate(CPUState *cpu, target_ulong pc)
|
||||
{
|
||||
mmap_lock();
|
||||
tb_lock();
|
||||
tb_invalidate_phys_page_range(pc, pc + 1, 0);
|
||||
tb_unlock();
|
||||
mmap_unlock();
|
||||
}
|
||||
#else
|
||||
@ -2644,21 +2642,21 @@ void memory_notdirty_write_prepare(NotDirtyInfo *ndi,
|
||||
ndi->ram_addr = ram_addr;
|
||||
ndi->mem_vaddr = mem_vaddr;
|
||||
ndi->size = size;
|
||||
ndi->locked = false;
|
||||
ndi->pages = NULL;
|
||||
|
||||
assert(tcg_enabled());
|
||||
if (!cpu_physical_memory_get_dirty_flag(ram_addr, DIRTY_MEMORY_CODE)) {
|
||||
ndi->locked = true;
|
||||
tb_lock();
|
||||
tb_invalidate_phys_page_fast(ram_addr, size);
|
||||
ndi->pages = page_collection_lock(ram_addr, ram_addr + size);
|
||||
tb_invalidate_phys_page_fast(ndi->pages, ram_addr, size);
|
||||
}
|
||||
}
|
||||
|
||||
/* Called within RCU critical section. */
|
||||
void memory_notdirty_write_complete(NotDirtyInfo *ndi)
|
||||
{
|
||||
if (ndi->locked) {
|
||||
tb_unlock();
|
||||
if (ndi->pages) {
|
||||
page_collection_unlock(ndi->pages);
|
||||
ndi->pages = NULL;
|
||||
}
|
||||
|
||||
/* Set both VGA and migration bits for simplicity and to remove
|
||||
@ -2745,18 +2743,16 @@ static void check_watchpoint(int offset, int len, MemTxAttrs attrs, int flags)
|
||||
}
|
||||
cpu->watchpoint_hit = wp;
|
||||
|
||||
/* Both tb_lock and iothread_mutex will be reset when
|
||||
* cpu_loop_exit or cpu_loop_exit_noexc longjmp
|
||||
* back into the cpu_exec main loop.
|
||||
*/
|
||||
tb_lock();
|
||||
mmap_lock();
|
||||
tb_check_watchpoint(cpu);
|
||||
if (wp->flags & BP_STOP_BEFORE_ACCESS) {
|
||||
cpu->exception_index = EXCP_DEBUG;
|
||||
mmap_unlock();
|
||||
cpu_loop_exit(cpu);
|
||||
} else {
|
||||
/* Force execution of one insn next time. */
|
||||
cpu->cflags_next_tb = 1 | curr_cflags();
|
||||
mmap_unlock();
|
||||
cpu_loop_exit_noexc(cpu);
|
||||
}
|
||||
}
|
||||
@ -3147,9 +3143,9 @@ static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr,
|
||||
}
|
||||
if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) {
|
||||
assert(tcg_enabled());
|
||||
tb_lock();
|
||||
mmap_lock();
|
||||
tb_invalidate_phys_range(addr, addr + length);
|
||||
tb_unlock();
|
||||
mmap_unlock();
|
||||
dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE);
|
||||
}
|
||||
cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask);
|
||||
|
@ -23,7 +23,7 @@ typedef struct CPUListState {
|
||||
FILE *file;
|
||||
} CPUListState;
|
||||
|
||||
/* The CPU list lock nests outside tb_lock/tb_unlock. */
|
||||
/* The CPU list lock nests outside page_(un)lock or mmap_(un)lock */
|
||||
void qemu_init_cpu_list(void);
|
||||
void cpu_list_lock(void);
|
||||
void cpu_list_unlock(void);
|
||||
|
@ -345,7 +345,7 @@ struct TranslationBlock {
|
||||
#define CF_LAST_IO 0x00008000 /* Last insn may be an IO access. */
|
||||
#define CF_NOCACHE 0x00010000 /* To be freed after execution */
|
||||
#define CF_USE_ICOUNT 0x00020000
|
||||
#define CF_INVALID 0x00040000 /* TB is stale. Setters need tb_lock */
|
||||
#define CF_INVALID 0x00040000 /* TB is stale. Set with @jmp_lock held */
|
||||
#define CF_PARALLEL 0x00080000 /* Generate code for a parallel context */
|
||||
/* cflags' mask for hashing/comparison */
|
||||
#define CF_HASH_MASK \
|
||||
@ -359,10 +359,14 @@ struct TranslationBlock {
|
||||
/* original tb when cflags has CF_NOCACHE */
|
||||
struct TranslationBlock *orig_tb;
|
||||
/* first and second physical page containing code. The lower bit
|
||||
of the pointer tells the index in page_next[] */
|
||||
struct TranslationBlock *page_next[2];
|
||||
of the pointer tells the index in page_next[].
|
||||
The list is protected by the TB's page('s) lock(s) */
|
||||
uintptr_t page_next[2];
|
||||
tb_page_addr_t page_addr[2];
|
||||
|
||||
/* jmp_lock placed here to fill a 4-byte hole. Its documentation is below */
|
||||
QemuSpin jmp_lock;
|
||||
|
||||
/* The following data are used to directly call another TB from
|
||||
* the code of this one. This can be done either by emitting direct or
|
||||
* indirect native jump instructions. These jumps are reset so that the TB
|
||||
@ -374,20 +378,26 @@ struct TranslationBlock {
|
||||
#define TB_JMP_RESET_OFFSET_INVALID 0xffff /* indicates no jump generated */
|
||||
uintptr_t jmp_target_arg[2]; /* target address or offset */
|
||||
|
||||
/* Each TB has an associated circular list of TBs jumping to this one.
|
||||
* jmp_list_first points to the first TB jumping to this one.
|
||||
* jmp_list_next is used to point to the next TB in a list.
|
||||
* Since each TB can have two jumps, it can participate in two lists.
|
||||
* jmp_list_first and jmp_list_next are 4-byte aligned pointers to a
|
||||
* TranslationBlock structure, but the two least significant bits of
|
||||
* them are used to encode which data field of the pointed TB should
|
||||
* be used to traverse the list further from that TB:
|
||||
* 0 => jmp_list_next[0], 1 => jmp_list_next[1], 2 => jmp_list_first.
|
||||
* In other words, 0/1 tells which jump is used in the pointed TB,
|
||||
* and 2 means that this is a pointer back to the target TB of this list.
|
||||
/*
|
||||
* Each TB has a NULL-terminated list (jmp_list_head) of incoming jumps.
|
||||
* Each TB can have two outgoing jumps, and therefore can participate
|
||||
* in two lists. The list entries are kept in jmp_list_next[2]. The least
|
||||
* significant bit (LSB) of the pointers in these lists is used to encode
|
||||
* which of the two list entries is to be used in the pointed TB.
|
||||
*
|
||||
* List traversals are protected by jmp_lock. The destination TB of each
|
||||
* outgoing jump is kept in jmp_dest[] so that the appropriate jmp_lock
|
||||
* can be acquired from any origin TB.
|
||||
*
|
||||
* jmp_dest[] are tagged pointers as well. The LSB is set when the TB is
|
||||
* being invalidated, so that no further outgoing jumps from it can be set.
|
||||
*
|
||||
* jmp_lock also protects the CF_INVALID cflag; a jump must not be chained
|
||||
* to a destination TB that has CF_INVALID set.
|
||||
*/
|
||||
uintptr_t jmp_list_head;
|
||||
uintptr_t jmp_list_next[2];
|
||||
uintptr_t jmp_list_first;
|
||||
uintptr_t jmp_dest[2];
|
||||
};
|
||||
|
||||
extern bool parallel_cpus;
|
||||
@ -405,7 +415,6 @@ static inline uint32_t curr_cflags(void)
|
||||
| (use_icount ? CF_USE_ICOUNT : 0);
|
||||
}
|
||||
|
||||
void tb_remove(TranslationBlock *tb);
|
||||
void tb_flush(CPUState *cpu);
|
||||
void tb_phys_invalidate(TranslationBlock *tb, tb_page_addr_t page_addr);
|
||||
TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
|
||||
@ -431,9 +440,13 @@ extern uintptr_t tci_tb_ptr;
|
||||
smaller than 4 bytes, so we don't worry about special-casing this. */
|
||||
#define GETPC_ADJ 2
|
||||
|
||||
void tb_lock(void);
|
||||
void tb_unlock(void);
|
||||
void tb_lock_reset(void);
|
||||
#if !defined(CONFIG_USER_ONLY) && defined(CONFIG_DEBUG_TCG)
|
||||
void assert_no_pages_locked(void);
|
||||
#else
|
||||
static inline void assert_no_pages_locked(void)
|
||||
{
|
||||
}
|
||||
#endif
|
||||
|
||||
#if !defined(CONFIG_USER_ONLY)
|
||||
|
||||
|
@ -49,6 +49,8 @@ void mtree_print_dispatch(fprintf_function mon, void *f,
|
||||
struct AddressSpaceDispatch *d,
|
||||
MemoryRegion *root);
|
||||
|
||||
struct page_collection;
|
||||
|
||||
/* Opaque struct for passing info from memory_notdirty_write_prepare()
|
||||
* to memory_notdirty_write_complete(). Callers should treat all fields
|
||||
* as private, with the exception of @active.
|
||||
@ -60,10 +62,10 @@ void mtree_print_dispatch(fprintf_function mon, void *f,
|
||||
*/
|
||||
typedef struct {
|
||||
CPUState *cpu;
|
||||
struct page_collection *pages;
|
||||
ram_addr_t ram_addr;
|
||||
vaddr mem_vaddr;
|
||||
unsigned size;
|
||||
bool locked;
|
||||
bool active;
|
||||
} NotDirtyInfo;
|
||||
|
||||
@ -91,7 +93,7 @@ typedef struct {
|
||||
*
|
||||
* This must only be called if we are using TCG; it will assert otherwise.
|
||||
*
|
||||
* We may take a lock in the prepare call, so callers must ensure that
|
||||
* We may take locks in the prepare call, so callers must ensure that
|
||||
* they don't exit (via longjump or otherwise) without calling complete.
|
||||
*
|
||||
* This call must only be made inside an RCU critical section.
|
||||
|
@ -31,14 +31,10 @@ typedef struct TBContext TBContext;
|
||||
|
||||
struct TBContext {
|
||||
|
||||
GTree *tb_tree;
|
||||
struct qht htable;
|
||||
/* any access to the tbs or the page table must use this lock */
|
||||
QemuMutex tb_lock;
|
||||
|
||||
/* statistics */
|
||||
unsigned tb_flush_count;
|
||||
int tb_phys_invalidate_count;
|
||||
};
|
||||
|
||||
extern TBContext tb_ctx;
|
||||
|
@ -11,8 +11,11 @@
|
||||
#include "qemu/thread.h"
|
||||
#include "qemu/qdist.h"
|
||||
|
||||
typedef bool (*qht_cmp_func_t)(const void *a, const void *b);
|
||||
|
||||
struct qht {
|
||||
struct qht_map *map;
|
||||
qht_cmp_func_t cmp;
|
||||
QemuMutex lock; /* serializes setters of ht->map */
|
||||
unsigned int mode;
|
||||
};
|
||||
@ -47,10 +50,12 @@ typedef void (*qht_iter_func_t)(struct qht *ht, void *p, uint32_t h, void *up);
|
||||
/**
|
||||
* qht_init - Initialize a QHT
|
||||
* @ht: QHT to be initialized
|
||||
* @cmp: default comparison function. Cannot be NULL.
|
||||
* @n_elems: number of entries the hash table should be optimized for.
|
||||
* @mode: bitmask with OR'ed QHT_MODE_*
|
||||
*/
|
||||
void qht_init(struct qht *ht, size_t n_elems, unsigned int mode);
|
||||
void qht_init(struct qht *ht, qht_cmp_func_t cmp, size_t n_elems,
|
||||
unsigned int mode);
|
||||
|
||||
/**
|
||||
* qht_destroy - destroy a previously initialized QHT
|
||||
@ -65,6 +70,7 @@ void qht_destroy(struct qht *ht);
|
||||
* @ht: QHT to insert to
|
||||
* @p: pointer to be inserted
|
||||
* @hash: hash corresponding to @p
|
||||
* @existing: address where the pointer to an existing entry can be copied to
|
||||
*
|
||||
* Attempting to insert a NULL @p is a bug.
|
||||
* Inserting the same pointer @p with different @hash values is a bug.
|
||||
@ -73,16 +79,18 @@ void qht_destroy(struct qht *ht);
|
||||
* inserted into the hash table.
|
||||
*
|
||||
* Returns true on success.
|
||||
* Returns false if the @p-@hash pair already exists in the hash table.
|
||||
* Returns false if there is an existing entry in the table that is equivalent
|
||||
* (i.e. ht->cmp matches and the hash is the same) to @p-@h. If @existing
|
||||
* is !NULL, a pointer to this existing entry is copied to it.
|
||||
*/
|
||||
bool qht_insert(struct qht *ht, void *p, uint32_t hash);
|
||||
bool qht_insert(struct qht *ht, void *p, uint32_t hash, void **existing);
|
||||
|
||||
/**
|
||||
* qht_lookup - Look up a pointer in a QHT
|
||||
* qht_lookup_custom - Look up a pointer using a custom comparison function.
|
||||
* @ht: QHT to be looked up
|
||||
* @func: function to compare existing pointers against @userp
|
||||
* @userp: pointer to pass to @func
|
||||
* @hash: hash of the pointer to be looked up
|
||||
* @func: function to compare existing pointers against @userp
|
||||
*
|
||||
* Needs to be called under an RCU read-critical section.
|
||||
*
|
||||
@ -94,8 +102,18 @@ bool qht_insert(struct qht *ht, void *p, uint32_t hash);
|
||||
* Returns the corresponding pointer when a match is found.
|
||||
* Returns NULL otherwise.
|
||||
*/
|
||||
void *qht_lookup(struct qht *ht, qht_lookup_func_t func, const void *userp,
|
||||
uint32_t hash);
|
||||
void *qht_lookup_custom(struct qht *ht, const void *userp, uint32_t hash,
|
||||
qht_lookup_func_t func);
|
||||
|
||||
/**
|
||||
* qht_lookup - Look up a pointer in a QHT
|
||||
* @ht: QHT to be looked up
|
||||
* @userp: pointer to pass to the comparison function
|
||||
* @hash: hash of the pointer to be looked up
|
||||
*
|
||||
* Calls qht_lookup_custom() using @ht's default comparison function.
|
||||
*/
|
||||
void *qht_lookup(struct qht *ht, const void *userp, uint32_t hash);
|
||||
|
||||
/**
|
||||
* qht_remove - remove a pointer from the hash table
|
||||
|
@ -120,7 +120,6 @@ void fork_start(void)
|
||||
{
|
||||
start_exclusive();
|
||||
mmap_fork_start();
|
||||
qemu_mutex_lock(&tb_ctx.tb_lock);
|
||||
cpu_list_lock();
|
||||
}
|
||||
|
||||
@ -136,14 +135,12 @@ void fork_end(int child)
|
||||
QTAILQ_REMOVE(&cpus, cpu, node);
|
||||
}
|
||||
}
|
||||
qemu_mutex_init(&tb_ctx.tb_lock);
|
||||
qemu_init_cpu_list();
|
||||
gdbserver_fork(thread_cpu);
|
||||
/* qemu_init_cpu_list() takes care of reinitializing the
|
||||
* exclusive state, so we don't need to end_exclusive() here.
|
||||
*/
|
||||
} else {
|
||||
qemu_mutex_unlock(&tb_ctx.tb_lock);
|
||||
cpu_list_unlock();
|
||||
end_exclusive();
|
||||
}
|
||||
|
@ -1733,7 +1733,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
||||
tcg_out_insn(s, 3305, LDR, offset, TCG_REG_TMP);
|
||||
}
|
||||
tcg_out_insn(s, 3207, BR, TCG_REG_TMP);
|
||||
s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
|
||||
set_jmp_reset_offset(s, a0);
|
||||
break;
|
||||
|
||||
case INDEX_op_goto_ptr:
|
||||
|
@ -1822,7 +1822,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
||||
tcg_out_movi32(s, COND_AL, base, ptr - dil);
|
||||
}
|
||||
tcg_out_ld32_12(s, COND_AL, TCG_REG_PC, base, dil);
|
||||
s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
|
||||
set_jmp_reset_offset(s, args[0]);
|
||||
}
|
||||
break;
|
||||
case INDEX_op_goto_ptr:
|
||||
|
@ -2245,7 +2245,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
||||
tcg_out_modrm_offset(s, OPC_GRP5, EXT5_JMPN_Ev, -1,
|
||||
(intptr_t)(s->tb_jmp_target_addr + a0));
|
||||
}
|
||||
s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
|
||||
set_jmp_reset_offset(s, a0);
|
||||
break;
|
||||
case INDEX_op_goto_ptr:
|
||||
/* jmp to the given host address (could be epilogue) */
|
||||
@ -3501,7 +3501,10 @@ static void tcg_target_init(TCGContext *s)
|
||||
sure of not hitting invalid opcode. */
|
||||
if (c & bit_OSXSAVE) {
|
||||
unsigned xcrl, xcrh;
|
||||
asm ("xgetbv" : "=a" (xcrl), "=d" (xcrh) : "c" (0));
|
||||
/* The xgetbv instruction is not available to older versions of
|
||||
* the assembler, so we encode the instruction manually.
|
||||
*/
|
||||
asm(".byte 0x0f, 0x01, 0xd0" : "=a" (xcrl), "=d" (xcrh) : "c" (0));
|
||||
if ((xcrl & 6) == 6) {
|
||||
have_avx1 = (c & bit_AVX) != 0;
|
||||
have_avx2 = (b7 & bit_AVX2) != 0;
|
||||
|
@ -1744,7 +1744,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
||||
tcg_out_opc_reg(s, OPC_JR, 0, TCG_TMP0, 0);
|
||||
}
|
||||
tcg_out_nop(s);
|
||||
s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
|
||||
set_jmp_reset_offset(s, a0);
|
||||
break;
|
||||
case INDEX_op_goto_ptr:
|
||||
/* jmp to the given host address (could be epilogue) */
|
||||
|
@ -2025,10 +2025,10 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
|
||||
}
|
||||
tcg_out32(s, MTSPR | RS(TCG_REG_TB) | CTR);
|
||||
tcg_out32(s, BCCTR | BO_ALWAYS);
|
||||
s->tb_jmp_reset_offset[args[0]] = c = tcg_current_code_size(s);
|
||||
set_jmp_reset_offset(s, args[0]);
|
||||
if (USE_REG_TB) {
|
||||
/* For the unlinked case, need to reset TCG_REG_TB. */
|
||||
c = -c;
|
||||
c = -tcg_current_code_size(s);
|
||||
assert(c == (int16_t)c);
|
||||
tcg_out32(s, ADDI | TAI(TCG_REG_TB, TCG_REG_TB, c));
|
||||
}
|
||||
|
@ -1783,7 +1783,7 @@ static inline void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
||||
/* and go there */
|
||||
tcg_out_insn(s, RR, BCR, S390_CC_ALWAYS, TCG_REG_TB);
|
||||
}
|
||||
s->tb_jmp_reset_offset[a0] = tcg_current_code_size(s);
|
||||
set_jmp_reset_offset(s, a0);
|
||||
|
||||
/* For the unlinked path of goto_tb, we need to reset
|
||||
TCG_REG_TB to the beginning of this TB. */
|
||||
|
@ -1388,12 +1388,12 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc,
|
||||
tcg_out_arithi(s, TCG_REG_G0, TCG_REG_TB, 0, JMPL);
|
||||
tcg_out_nop(s);
|
||||
}
|
||||
s->tb_jmp_reset_offset[a0] = c = tcg_current_code_size(s);
|
||||
set_jmp_reset_offset(s, a0);
|
||||
|
||||
/* For the unlinked path of goto_tb, we need to reset
|
||||
TCG_REG_TB to the beginning of this TB. */
|
||||
if (USE_REG_TB) {
|
||||
c = -c;
|
||||
c = -tcg_current_code_size(s);
|
||||
if (check_fit_i32(c, 13)) {
|
||||
tcg_out_arithi(s, TCG_REG_TB, TCG_REG_TB, c, ARITH_ADD);
|
||||
} else {
|
||||
|
218
tcg/tcg.c
218
tcg/tcg.c
@ -135,6 +135,12 @@ static TCGContext **tcg_ctxs;
|
||||
static unsigned int n_tcg_ctxs;
|
||||
TCGv_env cpu_env = 0;
|
||||
|
||||
struct tcg_region_tree {
|
||||
QemuMutex lock;
|
||||
GTree *tree;
|
||||
/* padding to avoid false sharing is computed at run-time */
|
||||
};
|
||||
|
||||
/*
|
||||
* We divide code_gen_buffer into equally-sized "regions" that TCG threads
|
||||
* dynamically allocate from as demand dictates. Given appropriate region
|
||||
@ -158,6 +164,13 @@ struct tcg_region_state {
|
||||
};
|
||||
|
||||
static struct tcg_region_state region;
|
||||
/*
|
||||
* This is an array of struct tcg_region_tree's, with padding.
|
||||
* We use void * to simplify the computation of region_trees[i]; each
|
||||
* struct is found every tree_size bytes.
|
||||
*/
|
||||
static void *region_trees;
|
||||
static size_t tree_size;
|
||||
static TCGRegSet tcg_target_available_regs[TCG_TYPE_COUNT];
|
||||
static TCGRegSet tcg_target_call_clobber_regs;
|
||||
|
||||
@ -293,8 +306,190 @@ TCGLabel *gen_new_label(void)
|
||||
return l;
|
||||
}
|
||||
|
||||
static void set_jmp_reset_offset(TCGContext *s, int which)
|
||||
{
|
||||
size_t off = tcg_current_code_size(s);
|
||||
s->tb_jmp_reset_offset[which] = off;
|
||||
/* Make sure that we didn't overflow the stored offset. */
|
||||
assert(s->tb_jmp_reset_offset[which] == off);
|
||||
}
|
||||
|
||||
#include "tcg-target.inc.c"
|
||||
|
||||
/* compare a pointer @ptr and a tb_tc @s */
|
||||
static int ptr_cmp_tb_tc(const void *ptr, const struct tb_tc *s)
|
||||
{
|
||||
if (ptr >= s->ptr + s->size) {
|
||||
return 1;
|
||||
} else if (ptr < s->ptr) {
|
||||
return -1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
static gint tb_tc_cmp(gconstpointer ap, gconstpointer bp)
|
||||
{
|
||||
const struct tb_tc *a = ap;
|
||||
const struct tb_tc *b = bp;
|
||||
|
||||
/*
|
||||
* When both sizes are set, we know this isn't a lookup.
|
||||
* This is the most likely case: every TB must be inserted; lookups
|
||||
* are a lot less frequent.
|
||||
*/
|
||||
if (likely(a->size && b->size)) {
|
||||
if (a->ptr > b->ptr) {
|
||||
return 1;
|
||||
} else if (a->ptr < b->ptr) {
|
||||
return -1;
|
||||
}
|
||||
/* a->ptr == b->ptr should happen only on deletions */
|
||||
g_assert(a->size == b->size);
|
||||
return 0;
|
||||
}
|
||||
/*
|
||||
* All lookups have either .size field set to 0.
|
||||
* From the glib sources we see that @ap is always the lookup key. However
|
||||
* the docs provide no guarantee, so we just mark this case as likely.
|
||||
*/
|
||||
if (likely(a->size == 0)) {
|
||||
return ptr_cmp_tb_tc(a->ptr, b);
|
||||
}
|
||||
return ptr_cmp_tb_tc(b->ptr, a);
|
||||
}
|
||||
|
||||
static void tcg_region_trees_init(void)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
tree_size = ROUND_UP(sizeof(struct tcg_region_tree), qemu_dcache_linesize);
|
||||
region_trees = qemu_memalign(qemu_dcache_linesize, region.n * tree_size);
|
||||
for (i = 0; i < region.n; i++) {
|
||||
struct tcg_region_tree *rt = region_trees + i * tree_size;
|
||||
|
||||
qemu_mutex_init(&rt->lock);
|
||||
rt->tree = g_tree_new(tb_tc_cmp);
|
||||
}
|
||||
}
|
||||
|
||||
static struct tcg_region_tree *tc_ptr_to_region_tree(void *p)
|
||||
{
|
||||
size_t region_idx;
|
||||
|
||||
if (p < region.start_aligned) {
|
||||
region_idx = 0;
|
||||
} else {
|
||||
ptrdiff_t offset = p - region.start_aligned;
|
||||
|
||||
if (offset > region.stride * (region.n - 1)) {
|
||||
region_idx = region.n - 1;
|
||||
} else {
|
||||
region_idx = offset / region.stride;
|
||||
}
|
||||
}
|
||||
return region_trees + region_idx * tree_size;
|
||||
}
|
||||
|
||||
void tcg_tb_insert(TranslationBlock *tb)
|
||||
{
|
||||
struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
|
||||
|
||||
qemu_mutex_lock(&rt->lock);
|
||||
g_tree_insert(rt->tree, &tb->tc, tb);
|
||||
qemu_mutex_unlock(&rt->lock);
|
||||
}
|
||||
|
||||
void tcg_tb_remove(TranslationBlock *tb)
|
||||
{
|
||||
struct tcg_region_tree *rt = tc_ptr_to_region_tree(tb->tc.ptr);
|
||||
|
||||
qemu_mutex_lock(&rt->lock);
|
||||
g_tree_remove(rt->tree, &tb->tc);
|
||||
qemu_mutex_unlock(&rt->lock);
|
||||
}
|
||||
|
||||
/*
|
||||
* Find the TB 'tb' such that
|
||||
* tb->tc.ptr <= tc_ptr < tb->tc.ptr + tb->tc.size
|
||||
* Return NULL if not found.
|
||||
*/
|
||||
TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr)
|
||||
{
|
||||
struct tcg_region_tree *rt = tc_ptr_to_region_tree((void *)tc_ptr);
|
||||
TranslationBlock *tb;
|
||||
struct tb_tc s = { .ptr = (void *)tc_ptr };
|
||||
|
||||
qemu_mutex_lock(&rt->lock);
|
||||
tb = g_tree_lookup(rt->tree, &s);
|
||||
qemu_mutex_unlock(&rt->lock);
|
||||
return tb;
|
||||
}
|
||||
|
||||
static void tcg_region_tree_lock_all(void)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < region.n; i++) {
|
||||
struct tcg_region_tree *rt = region_trees + i * tree_size;
|
||||
|
||||
qemu_mutex_lock(&rt->lock);
|
||||
}
|
||||
}
|
||||
|
||||
static void tcg_region_tree_unlock_all(void)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
for (i = 0; i < region.n; i++) {
|
||||
struct tcg_region_tree *rt = region_trees + i * tree_size;
|
||||
|
||||
qemu_mutex_unlock(&rt->lock);
|
||||
}
|
||||
}
|
||||
|
||||
void tcg_tb_foreach(GTraverseFunc func, gpointer user_data)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
tcg_region_tree_lock_all();
|
||||
for (i = 0; i < region.n; i++) {
|
||||
struct tcg_region_tree *rt = region_trees + i * tree_size;
|
||||
|
||||
g_tree_foreach(rt->tree, func, user_data);
|
||||
}
|
||||
tcg_region_tree_unlock_all();
|
||||
}
|
||||
|
||||
size_t tcg_nb_tbs(void)
|
||||
{
|
||||
size_t nb_tbs = 0;
|
||||
size_t i;
|
||||
|
||||
tcg_region_tree_lock_all();
|
||||
for (i = 0; i < region.n; i++) {
|
||||
struct tcg_region_tree *rt = region_trees + i * tree_size;
|
||||
|
||||
nb_tbs += g_tree_nnodes(rt->tree);
|
||||
}
|
||||
tcg_region_tree_unlock_all();
|
||||
return nb_tbs;
|
||||
}
|
||||
|
||||
static void tcg_region_tree_reset_all(void)
|
||||
{
|
||||
size_t i;
|
||||
|
||||
tcg_region_tree_lock_all();
|
||||
for (i = 0; i < region.n; i++) {
|
||||
struct tcg_region_tree *rt = region_trees + i * tree_size;
|
||||
|
||||
/* Increment the refcount first so that destroy acts as a reset */
|
||||
g_tree_ref(rt->tree);
|
||||
g_tree_destroy(rt->tree);
|
||||
}
|
||||
tcg_region_tree_unlock_all();
|
||||
}
|
||||
|
||||
static void tcg_region_bounds(size_t curr_region, void **pstart, void **pend)
|
||||
{
|
||||
void *start, *end;
|
||||
@ -380,6 +575,8 @@ void tcg_region_reset_all(void)
|
||||
g_assert(!err);
|
||||
}
|
||||
qemu_mutex_unlock(®ion.lock);
|
||||
|
||||
tcg_region_tree_reset_all();
|
||||
}
|
||||
|
||||
#ifdef CONFIG_USER_ONLY
|
||||
@ -496,6 +693,8 @@ void tcg_region_init(void)
|
||||
g_assert(!rc);
|
||||
}
|
||||
|
||||
tcg_region_trees_init();
|
||||
|
||||
/* In user-mode we support only one ctx, so do the initial allocation now */
|
||||
#ifdef CONFIG_USER_ONLY
|
||||
{
|
||||
@ -600,6 +799,20 @@ size_t tcg_code_capacity(void)
|
||||
return capacity;
|
||||
}
|
||||
|
||||
size_t tcg_tb_phys_invalidate_count(void)
|
||||
{
|
||||
unsigned int n_ctxs = atomic_read(&n_tcg_ctxs);
|
||||
unsigned int i;
|
||||
size_t total = 0;
|
||||
|
||||
for (i = 0; i < n_ctxs; i++) {
|
||||
const TCGContext *s = atomic_read(&tcg_ctxs[i]);
|
||||
|
||||
total += atomic_read(&s->tb_phys_invalidate_count);
|
||||
}
|
||||
return total;
|
||||
}
|
||||
|
||||
/* pool based memory allocation */
|
||||
void *tcg_malloc_internal(TCGContext *s, int size)
|
||||
{
|
||||
@ -3327,7 +3540,10 @@ int tcg_gen_code(TCGContext *s, TranslationBlock *tb)
|
||||
break;
|
||||
case INDEX_op_insn_start:
|
||||
if (num_insns >= 0) {
|
||||
s->gen_insn_end_off[num_insns] = tcg_current_code_size(s);
|
||||
size_t off = tcg_current_code_size(s);
|
||||
s->gen_insn_end_off[num_insns] = off;
|
||||
/* Assert that we do not overflow our stored offset. */
|
||||
assert(s->gen_insn_end_off[num_insns] == off);
|
||||
}
|
||||
num_insns++;
|
||||
for (i = 0; i < TARGET_INSN_START_WORDS; ++i) {
|
||||
|
19
tcg/tcg.h
19
tcg/tcg.h
@ -695,6 +695,8 @@ struct TCGContext {
|
||||
/* Threshold to flush the translated code buffer. */
|
||||
void *code_gen_highwater;
|
||||
|
||||
size_t tb_phys_invalidate_count;
|
||||
|
||||
/* Track which vCPU triggers events */
|
||||
CPUState *cpu; /* *_trans */
|
||||
|
||||
@ -848,14 +850,16 @@ static inline bool tcg_op_buf_full(void)
|
||||
/* This is not a hard limit, it merely stops translation when
|
||||
* we have produced "enough" opcodes. We want to limit TB size
|
||||
* such that a RISC host can reasonably use a 16-bit signed
|
||||
* branch within the TB.
|
||||
* branch within the TB. We also need to be mindful of the
|
||||
* 16-bit unsigned offsets, TranslationBlock.jmp_reset_offset[]
|
||||
* and TCGContext.gen_insn_end_off[].
|
||||
*/
|
||||
return tcg_ctx->nb_ops >= 8000;
|
||||
return tcg_ctx->nb_ops >= 4000;
|
||||
}
|
||||
|
||||
/* pool based memory allocation */
|
||||
|
||||
/* user-mode: tb_lock must be held for tcg_malloc_internal. */
|
||||
/* user-mode: mmap_lock must be held for tcg_malloc_internal. */
|
||||
void *tcg_malloc_internal(TCGContext *s, int size);
|
||||
void tcg_pool_reset(TCGContext *s);
|
||||
TranslationBlock *tcg_tb_alloc(TCGContext *s);
|
||||
@ -866,7 +870,14 @@ void tcg_region_reset_all(void);
|
||||
size_t tcg_code_size(void);
|
||||
size_t tcg_code_capacity(void);
|
||||
|
||||
/* user-mode: Called with tb_lock held. */
|
||||
void tcg_tb_insert(TranslationBlock *tb);
|
||||
void tcg_tb_remove(TranslationBlock *tb);
|
||||
size_t tcg_tb_phys_invalidate_count(void);
|
||||
TranslationBlock *tcg_tb_lookup(uintptr_t tc_ptr);
|
||||
void tcg_tb_foreach(GTraverseFunc func, gpointer user_data);
|
||||
size_t tcg_nb_tbs(void);
|
||||
|
||||
/* user-mode: Called with mmap_lock held. */
|
||||
static inline void *tcg_malloc(int size)
|
||||
{
|
||||
TCGContext *s = tcg_ctx;
|
||||
|
@ -574,7 +574,7 @@ static void tcg_out_op(TCGContext *s, TCGOpcode opc, const TCGArg *args,
|
||||
/* Indirect jump method. */
|
||||
TODO();
|
||||
}
|
||||
s->tb_jmp_reset_offset[args[0]] = tcg_current_code_size(s);
|
||||
set_jmp_reset_offset(s, args[0]);
|
||||
break;
|
||||
case INDEX_op_br:
|
||||
tci_out_label(s, arg_label(args[0]));
|
||||
|
@ -93,10 +93,10 @@ static void usage_complete(int argc, char *argv[])
|
||||
exit(-1);
|
||||
}
|
||||
|
||||
static bool is_equal(const void *obj, const void *userp)
|
||||
static bool is_equal(const void *ap, const void *bp)
|
||||
{
|
||||
const long *a = obj;
|
||||
const long *b = userp;
|
||||
const long *a = ap;
|
||||
const long *b = bp;
|
||||
|
||||
return *a == *b;
|
||||
}
|
||||
@ -150,7 +150,7 @@ static void do_rw(struct thread_info *info)
|
||||
|
||||
p = &keys[info->r & (lookup_range - 1)];
|
||||
hash = h(*p);
|
||||
read = qht_lookup(&ht, is_equal, p, hash);
|
||||
read = qht_lookup(&ht, p, hash);
|
||||
if (read) {
|
||||
stats->rd++;
|
||||
} else {
|
||||
@ -162,8 +162,8 @@ static void do_rw(struct thread_info *info)
|
||||
if (info->write_op) {
|
||||
bool written = false;
|
||||
|
||||
if (qht_lookup(&ht, is_equal, p, hash) == NULL) {
|
||||
written = qht_insert(&ht, p, hash);
|
||||
if (qht_lookup(&ht, p, hash) == NULL) {
|
||||
written = qht_insert(&ht, p, hash, NULL);
|
||||
}
|
||||
if (written) {
|
||||
stats->in++;
|
||||
@ -173,7 +173,7 @@ static void do_rw(struct thread_info *info)
|
||||
} else {
|
||||
bool removed = false;
|
||||
|
||||
if (qht_lookup(&ht, is_equal, p, hash)) {
|
||||
if (qht_lookup(&ht, p, hash)) {
|
||||
removed = qht_remove(&ht, p, hash);
|
||||
}
|
||||
if (removed) {
|
||||
@ -308,7 +308,7 @@ static void htable_init(void)
|
||||
}
|
||||
|
||||
/* initialize the hash table */
|
||||
qht_init(&ht, qht_n_elems, qht_mode);
|
||||
qht_init(&ht, is_equal, qht_n_elems, qht_mode);
|
||||
assert(init_size <= init_range);
|
||||
|
||||
pr_params();
|
||||
@ -322,7 +322,7 @@ static void htable_init(void)
|
||||
r = xorshift64star(r);
|
||||
p = &keys[r & (init_range - 1)];
|
||||
hash = h(*p);
|
||||
if (qht_insert(&ht, p, hash)) {
|
||||
if (qht_insert(&ht, p, hash, NULL)) {
|
||||
break;
|
||||
}
|
||||
retries++;
|
||||
|
@ -13,10 +13,10 @@
|
||||
static struct qht ht;
|
||||
static int32_t arr[N * 2];
|
||||
|
||||
static bool is_equal(const void *obj, const void *userp)
|
||||
static bool is_equal(const void *ap, const void *bp)
|
||||
{
|
||||
const int32_t *a = obj;
|
||||
const int32_t *b = userp;
|
||||
const int32_t *a = ap;
|
||||
const int32_t *b = bp;
|
||||
|
||||
return *a == *b;
|
||||
}
|
||||
@ -27,11 +27,17 @@ static void insert(int a, int b)
|
||||
|
||||
for (i = a; i < b; i++) {
|
||||
uint32_t hash;
|
||||
void *existing;
|
||||
bool inserted;
|
||||
|
||||
arr[i] = i;
|
||||
hash = i;
|
||||
|
||||
qht_insert(&ht, &arr[i], hash);
|
||||
inserted = qht_insert(&ht, &arr[i], hash, NULL);
|
||||
g_assert_true(inserted);
|
||||
inserted = qht_insert(&ht, &arr[i], hash, &existing);
|
||||
g_assert_false(inserted);
|
||||
g_assert_true(existing == &arr[i]);
|
||||
}
|
||||
}
|
||||
|
||||
@ -60,7 +66,12 @@ static void check(int a, int b, bool expected)
|
||||
|
||||
val = i;
|
||||
hash = i;
|
||||
p = qht_lookup(&ht, is_equal, &val, hash);
|
||||
/* test both lookup variants; results should be the same */
|
||||
if (i % 2) {
|
||||
p = qht_lookup(&ht, &val, hash);
|
||||
} else {
|
||||
p = qht_lookup_custom(&ht, &val, hash, is_equal);
|
||||
}
|
||||
g_assert_true(!!p == expected);
|
||||
}
|
||||
rcu_read_unlock();
|
||||
@ -102,7 +113,7 @@ static void qht_do_test(unsigned int mode, size_t init_entries)
|
||||
/* under KVM we might fetch stats from an uninitialized qht */
|
||||
check_n(0);
|
||||
|
||||
qht_init(&ht, 0, mode);
|
||||
qht_init(&ht, is_equal, 0, mode);
|
||||
|
||||
check_n(0);
|
||||
insert(0, N);
|
||||
|
41
util/qht.c
41
util/qht.c
@ -351,11 +351,14 @@ static struct qht_map *qht_map_create(size_t n_buckets)
|
||||
return map;
|
||||
}
|
||||
|
||||
void qht_init(struct qht *ht, size_t n_elems, unsigned int mode)
|
||||
void qht_init(struct qht *ht, qht_cmp_func_t cmp, size_t n_elems,
|
||||
unsigned int mode)
|
||||
{
|
||||
struct qht_map *map;
|
||||
size_t n_buckets = qht_elems_to_buckets(n_elems);
|
||||
|
||||
g_assert(cmp);
|
||||
ht->cmp = cmp;
|
||||
ht->mode = mode;
|
||||
qemu_mutex_init(&ht->lock);
|
||||
map = qht_map_create(n_buckets);
|
||||
@ -479,8 +482,8 @@ void *qht_lookup__slowpath(struct qht_bucket *b, qht_lookup_func_t func,
|
||||
return ret;
|
||||
}
|
||||
|
||||
void *qht_lookup(struct qht *ht, qht_lookup_func_t func, const void *userp,
|
||||
uint32_t hash)
|
||||
void *qht_lookup_custom(struct qht *ht, const void *userp, uint32_t hash,
|
||||
qht_lookup_func_t func)
|
||||
{
|
||||
struct qht_bucket *b;
|
||||
struct qht_map *map;
|
||||
@ -502,10 +505,15 @@ void *qht_lookup(struct qht *ht, qht_lookup_func_t func, const void *userp,
|
||||
return qht_lookup__slowpath(b, func, userp, hash);
|
||||
}
|
||||
|
||||
void *qht_lookup(struct qht *ht, const void *userp, uint32_t hash)
|
||||
{
|
||||
return qht_lookup_custom(ht, userp, hash, ht->cmp);
|
||||
}
|
||||
|
||||
/* call with head->lock held */
|
||||
static bool qht_insert__locked(struct qht *ht, struct qht_map *map,
|
||||
struct qht_bucket *head, void *p, uint32_t hash,
|
||||
bool *needs_resize)
|
||||
static void *qht_insert__locked(struct qht *ht, struct qht_map *map,
|
||||
struct qht_bucket *head, void *p, uint32_t hash,
|
||||
bool *needs_resize)
|
||||
{
|
||||
struct qht_bucket *b = head;
|
||||
struct qht_bucket *prev = NULL;
|
||||
@ -515,8 +523,9 @@ static bool qht_insert__locked(struct qht *ht, struct qht_map *map,
|
||||
do {
|
||||
for (i = 0; i < QHT_BUCKET_ENTRIES; i++) {
|
||||
if (b->pointers[i]) {
|
||||
if (unlikely(b->pointers[i] == p)) {
|
||||
return false;
|
||||
if (unlikely(b->hashes[i] == hash &&
|
||||
ht->cmp(b->pointers[i], p))) {
|
||||
return b->pointers[i];
|
||||
}
|
||||
} else {
|
||||
goto found;
|
||||
@ -545,7 +554,7 @@ static bool qht_insert__locked(struct qht *ht, struct qht_map *map,
|
||||
atomic_set(&b->hashes[i], hash);
|
||||
atomic_set(&b->pointers[i], p);
|
||||
seqlock_write_end(&head->sequence);
|
||||
return true;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
static __attribute__((noinline)) void qht_grow_maybe(struct qht *ht)
|
||||
@ -569,25 +578,31 @@ static __attribute__((noinline)) void qht_grow_maybe(struct qht *ht)
|
||||
qemu_mutex_unlock(&ht->lock);
|
||||
}
|
||||
|
||||
bool qht_insert(struct qht *ht, void *p, uint32_t hash)
|
||||
bool qht_insert(struct qht *ht, void *p, uint32_t hash, void **existing)
|
||||
{
|
||||
struct qht_bucket *b;
|
||||
struct qht_map *map;
|
||||
bool needs_resize = false;
|
||||
bool ret;
|
||||
void *prev;
|
||||
|
||||
/* NULL pointers are not supported */
|
||||
qht_debug_assert(p);
|
||||
|
||||
b = qht_bucket_lock__no_stale(ht, hash, &map);
|
||||
ret = qht_insert__locked(ht, map, b, p, hash, &needs_resize);
|
||||
prev = qht_insert__locked(ht, map, b, p, hash, &needs_resize);
|
||||
qht_bucket_debug__locked(b);
|
||||
qemu_spin_unlock(&b->lock);
|
||||
|
||||
if (unlikely(needs_resize) && ht->mode & QHT_MODE_AUTO_RESIZE) {
|
||||
qht_grow_maybe(ht);
|
||||
}
|
||||
return ret;
|
||||
if (likely(prev == NULL)) {
|
||||
return true;
|
||||
}
|
||||
if (existing) {
|
||||
*existing = prev;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
static inline bool qht_entry_is_last(struct qht_bucket *b, int pos)
|
||||
|
Loading…
Reference in New Issue
Block a user