f6bb84d531
This avoids duplicating code. cpu_exec_step will also use the new common function once we integrate parallel_cpus into tb->cflags. Note that in this commit we also fix a race, described by Richard Henderson during review. Think of this scenario with threads A and B: (A) Lookup succeeds for TB in hash without tb_lock (B) Sets the TB's tb->invalid flag (B) Removes the TB from tb_htable (B) Clears all CPU's tb_jmp_cache (A) Store TB into local tb_jmp_cache Given that order of events, (A) will keep executing that invalid TB until another flush of its tb_jmp_cache happens, which in theory might never happen. We can fix this by checking the tb->invalid flag every time we look up a TB from tb_jmp_cache, so that in the above scenario, next time we try to find that TB in tb_jmp_cache, we won't, and will therefore be forced to look it up in tb_htable. Performance-wise, I measured a small improvement when booting debian-arm. Note that inlining pays off: Performance counter stats for 'taskset -c 0 qemu-system-arm \ -machine type=virt -nographic -smp 1 -m 4096 \ -netdev user,id=unet,hostfwd=tcp::2222-:22 \ -device virtio-net-device,netdev=unet \ -drive file=jessie.qcow2,id=myblock,index=0,if=none \ -device virtio-blk-device,drive=myblock \ -kernel kernel.img -append console=ttyAMA0 root=/dev/vda1 \ -name arm,debug-threads=on -smp 1' (10 runs): Before: 18714.917392 task-clock # 0.952 CPUs utilized ( +- 0.95% ) 23,142 context-switches # 0.001 M/sec ( +- 0.50% ) 1 CPU-migrations # 0.000 M/sec 10,558 page-faults # 0.001 M/sec ( +- 0.95% ) 53,957,727,252 cycles # 2.883 GHz ( +- 0.91% ) [83.33%] 24,440,599,852 stalled-cycles-frontend # 45.30% frontend cycles idle ( +- 1.20% ) [83.33%] 16,495,714,424 stalled-cycles-backend # 30.57% backend cycles idle ( +- 0.95% ) [66.66%] 76,267,572,582 instructions # 1.41 insns per cycle # 0.32 stalled cycles per insn ( +- 0.87% ) [83.34%] 12,692,186,323 branches # 678.186 M/sec ( +- 0.92% ) [83.35%] 263,486,879 branch-misses # 2.08% of all branches ( +- 0.73% ) [83.34%] 19.648474449 seconds time elapsed ( +- 0.82% ) After, w/ inline (this patch): 18471.376627 task-clock # 0.955 CPUs utilized ( +- 0.96% ) 23,048 context-switches # 0.001 M/sec ( +- 0.48% ) 1 CPU-migrations # 0.000 M/sec 10,708 page-faults # 0.001 M/sec ( +- 0.81% ) 53,208,990,796 cycles # 2.881 GHz ( +- 0.98% ) [83.34%] 23,941,071,673 stalled-cycles-frontend # 44.99% frontend cycles idle ( +- 0.95% ) [83.34%] 16,161,773,848 stalled-cycles-backend # 30.37% backend cycles idle ( +- 0.76% ) [66.67%] 75,786,269,766 instructions # 1.42 insns per cycle # 0.32 stalled cycles per insn ( +- 1.24% ) [83.34%] 12,573,617,143 branches # 680.708 M/sec ( +- 1.34% ) [83.33%] 260,235,550 branch-misses # 2.07% of all branches ( +- 0.66% ) [83.33%] 19.340502161 seconds time elapsed ( +- 0.56% ) After, w/o inline: 18791.253967 task-clock # 0.954 CPUs utilized ( +- 0.78% ) 23,230 context-switches # 0.001 M/sec ( +- 0.42% ) 1 CPU-migrations # 0.000 M/sec 10,563 page-faults # 0.001 M/sec ( +- 1.27% ) 54,168,674,622 cycles # 2.883 GHz ( +- 0.80% ) [83.34%] 24,244,712,629 stalled-cycles-frontend # 44.76% frontend cycles idle ( +- 1.37% ) [83.33%] 16,288,648,572 stalled-cycles-backend # 30.07% backend cycles idle ( +- 0.95% ) [66.66%] 77,659,755,503 instructions # 1.43 insns per cycle # 0.31 stalled cycles per insn ( +- 0.97% ) [83.34%] 12,922,780,045 branches # 687.702 M/sec ( +- 1.06% ) [83.34%] 261,962,386 branch-misses # 2.03% of all branches ( +- 0.71% ) [83.35%] 19.700174670 seconds time elapsed ( +- 0.56% ) Reviewed-by: Richard Henderson <rth@twiddle.net> Signed-off-by: Emilio G. Cota <cota@braap.org> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
712 lines
22 KiB
C
712 lines
22 KiB
C
/*
|
|
* emulator main execution loop
|
|
*
|
|
* Copyright (c) 2003-2005 Fabrice Bellard
|
|
*
|
|
* This library is free software; you can redistribute it and/or
|
|
* modify it under the terms of the GNU Lesser General Public
|
|
* License as published by the Free Software Foundation; either
|
|
* version 2 of the License, or (at your option) any later version.
|
|
*
|
|
* This library is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
* Lesser General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU Lesser General Public
|
|
* License along with this library; if not, see <http://www.gnu.org/licenses/>.
|
|
*/
|
|
#include "qemu/osdep.h"
|
|
#include "cpu.h"
|
|
#include "trace.h"
|
|
#include "disas/disas.h"
|
|
#include "exec/exec-all.h"
|
|
#include "tcg.h"
|
|
#include "qemu/atomic.h"
|
|
#include "sysemu/qtest.h"
|
|
#include "qemu/timer.h"
|
|
#include "exec/address-spaces.h"
|
|
#include "qemu/rcu.h"
|
|
#include "exec/tb-hash.h"
|
|
#include "exec/tb-lookup.h"
|
|
#include "exec/log.h"
|
|
#include "qemu/main-loop.h"
|
|
#if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY)
|
|
#include "hw/i386/apic.h"
|
|
#endif
|
|
#include "sysemu/cpus.h"
|
|
#include "sysemu/replay.h"
|
|
|
|
/* -icount align implementation. */
|
|
|
|
typedef struct SyncClocks {
|
|
int64_t diff_clk;
|
|
int64_t last_cpu_icount;
|
|
int64_t realtime_clock;
|
|
} SyncClocks;
|
|
|
|
#if !defined(CONFIG_USER_ONLY)
|
|
/* Allow the guest to have a max 3ms advance.
|
|
* The difference between the 2 clocks could therefore
|
|
* oscillate around 0.
|
|
*/
|
|
#define VM_CLOCK_ADVANCE 3000000
|
|
#define THRESHOLD_REDUCE 1.5
|
|
#define MAX_DELAY_PRINT_RATE 2000000000LL
|
|
#define MAX_NB_PRINTS 100
|
|
|
|
static void align_clocks(SyncClocks *sc, const CPUState *cpu)
|
|
{
|
|
int64_t cpu_icount;
|
|
|
|
if (!icount_align_option) {
|
|
return;
|
|
}
|
|
|
|
cpu_icount = cpu->icount_extra + cpu->icount_decr.u16.low;
|
|
sc->diff_clk += cpu_icount_to_ns(sc->last_cpu_icount - cpu_icount);
|
|
sc->last_cpu_icount = cpu_icount;
|
|
|
|
if (sc->diff_clk > VM_CLOCK_ADVANCE) {
|
|
#ifndef _WIN32
|
|
struct timespec sleep_delay, rem_delay;
|
|
sleep_delay.tv_sec = sc->diff_clk / 1000000000LL;
|
|
sleep_delay.tv_nsec = sc->diff_clk % 1000000000LL;
|
|
if (nanosleep(&sleep_delay, &rem_delay) < 0) {
|
|
sc->diff_clk = rem_delay.tv_sec * 1000000000LL + rem_delay.tv_nsec;
|
|
} else {
|
|
sc->diff_clk = 0;
|
|
}
|
|
#else
|
|
Sleep(sc->diff_clk / SCALE_MS);
|
|
sc->diff_clk = 0;
|
|
#endif
|
|
}
|
|
}
|
|
|
|
static void print_delay(const SyncClocks *sc)
|
|
{
|
|
static float threshold_delay;
|
|
static int64_t last_realtime_clock;
|
|
static int nb_prints;
|
|
|
|
if (icount_align_option &&
|
|
sc->realtime_clock - last_realtime_clock >= MAX_DELAY_PRINT_RATE &&
|
|
nb_prints < MAX_NB_PRINTS) {
|
|
if ((-sc->diff_clk / (float)1000000000LL > threshold_delay) ||
|
|
(-sc->diff_clk / (float)1000000000LL <
|
|
(threshold_delay - THRESHOLD_REDUCE))) {
|
|
threshold_delay = (-sc->diff_clk / 1000000000LL) + 1;
|
|
printf("Warning: The guest is now late by %.1f to %.1f seconds\n",
|
|
threshold_delay - 1,
|
|
threshold_delay);
|
|
nb_prints++;
|
|
last_realtime_clock = sc->realtime_clock;
|
|
}
|
|
}
|
|
}
|
|
|
|
static void init_delay_params(SyncClocks *sc,
|
|
const CPUState *cpu)
|
|
{
|
|
if (!icount_align_option) {
|
|
return;
|
|
}
|
|
sc->realtime_clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT);
|
|
sc->diff_clk = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL) - sc->realtime_clock;
|
|
sc->last_cpu_icount = cpu->icount_extra + cpu->icount_decr.u16.low;
|
|
if (sc->diff_clk < max_delay) {
|
|
max_delay = sc->diff_clk;
|
|
}
|
|
if (sc->diff_clk > max_advance) {
|
|
max_advance = sc->diff_clk;
|
|
}
|
|
|
|
/* Print every 2s max if the guest is late. We limit the number
|
|
of printed messages to NB_PRINT_MAX(currently 100) */
|
|
print_delay(sc);
|
|
}
|
|
#else
|
|
static void align_clocks(SyncClocks *sc, const CPUState *cpu)
|
|
{
|
|
}
|
|
|
|
static void init_delay_params(SyncClocks *sc, const CPUState *cpu)
|
|
{
|
|
}
|
|
#endif /* CONFIG USER ONLY */
|
|
|
|
/* Execute a TB, and fix up the CPU state afterwards if necessary */
|
|
static inline tcg_target_ulong cpu_tb_exec(CPUState *cpu, TranslationBlock *itb)
|
|
{
|
|
CPUArchState *env = cpu->env_ptr;
|
|
uintptr_t ret;
|
|
TranslationBlock *last_tb;
|
|
int tb_exit;
|
|
uint8_t *tb_ptr = itb->tc_ptr;
|
|
|
|
qemu_log_mask_and_addr(CPU_LOG_EXEC, itb->pc,
|
|
"Trace %p [%d: " TARGET_FMT_lx "] %s\n",
|
|
itb->tc_ptr, cpu->cpu_index, itb->pc,
|
|
lookup_symbol(itb->pc));
|
|
|
|
#if defined(DEBUG_DISAS)
|
|
if (qemu_loglevel_mask(CPU_LOG_TB_CPU)
|
|
&& qemu_log_in_addr_range(itb->pc)) {
|
|
qemu_log_lock();
|
|
#if defined(TARGET_I386)
|
|
log_cpu_state(cpu, CPU_DUMP_CCOP);
|
|
#else
|
|
log_cpu_state(cpu, 0);
|
|
#endif
|
|
qemu_log_unlock();
|
|
}
|
|
#endif /* DEBUG_DISAS */
|
|
|
|
cpu->can_do_io = !use_icount;
|
|
ret = tcg_qemu_tb_exec(env, tb_ptr);
|
|
cpu->can_do_io = 1;
|
|
last_tb = (TranslationBlock *)(ret & ~TB_EXIT_MASK);
|
|
tb_exit = ret & TB_EXIT_MASK;
|
|
trace_exec_tb_exit(last_tb, tb_exit);
|
|
|
|
if (tb_exit > TB_EXIT_IDX1) {
|
|
/* We didn't start executing this TB (eg because the instruction
|
|
* counter hit zero); we must restore the guest PC to the address
|
|
* of the start of the TB.
|
|
*/
|
|
CPUClass *cc = CPU_GET_CLASS(cpu);
|
|
qemu_log_mask_and_addr(CPU_LOG_EXEC, last_tb->pc,
|
|
"Stopped execution of TB chain before %p ["
|
|
TARGET_FMT_lx "] %s\n",
|
|
last_tb->tc_ptr, last_tb->pc,
|
|
lookup_symbol(last_tb->pc));
|
|
if (cc->synchronize_from_tb) {
|
|
cc->synchronize_from_tb(cpu, last_tb);
|
|
} else {
|
|
assert(cc->set_pc);
|
|
cc->set_pc(cpu, last_tb->pc);
|
|
}
|
|
}
|
|
return ret;
|
|
}
|
|
|
|
#ifndef CONFIG_USER_ONLY
|
|
/* Execute the code without caching the generated code. An interpreter
|
|
could be used if available. */
|
|
static void cpu_exec_nocache(CPUState *cpu, int max_cycles,
|
|
TranslationBlock *orig_tb, bool ignore_icount)
|
|
{
|
|
TranslationBlock *tb;
|
|
|
|
/* Should never happen.
|
|
We only end up here when an existing TB is too long. */
|
|
if (max_cycles > CF_COUNT_MASK)
|
|
max_cycles = CF_COUNT_MASK;
|
|
|
|
tb_lock();
|
|
tb = tb_gen_code(cpu, orig_tb->pc, orig_tb->cs_base, orig_tb->flags,
|
|
max_cycles | CF_NOCACHE
|
|
| (ignore_icount ? CF_IGNORE_ICOUNT : 0));
|
|
tb->orig_tb = orig_tb;
|
|
tb_unlock();
|
|
|
|
/* execute the generated code */
|
|
trace_exec_tb_nocache(tb, tb->pc);
|
|
cpu_tb_exec(cpu, tb);
|
|
|
|
tb_lock();
|
|
tb_phys_invalidate(tb, -1);
|
|
tb_free(tb);
|
|
tb_unlock();
|
|
}
|
|
#endif
|
|
|
|
static void cpu_exec_step(CPUState *cpu)
|
|
{
|
|
CPUClass *cc = CPU_GET_CLASS(cpu);
|
|
CPUArchState *env = (CPUArchState *)cpu->env_ptr;
|
|
TranslationBlock *tb;
|
|
target_ulong cs_base, pc;
|
|
uint32_t flags;
|
|
|
|
cpu_get_tb_cpu_state(env, &pc, &cs_base, &flags);
|
|
if (sigsetjmp(cpu->jmp_env, 0) == 0) {
|
|
mmap_lock();
|
|
tb_lock();
|
|
tb = tb_gen_code(cpu, pc, cs_base, flags,
|
|
1 | CF_NOCACHE | CF_IGNORE_ICOUNT);
|
|
tb->orig_tb = NULL;
|
|
tb_unlock();
|
|
mmap_unlock();
|
|
|
|
cc->cpu_exec_enter(cpu);
|
|
/* execute the generated code */
|
|
trace_exec_tb_nocache(tb, pc);
|
|
cpu_tb_exec(cpu, tb);
|
|
cc->cpu_exec_exit(cpu);
|
|
|
|
tb_lock();
|
|
tb_phys_invalidate(tb, -1);
|
|
tb_free(tb);
|
|
tb_unlock();
|
|
} else {
|
|
/* We may have exited due to another problem here, so we need
|
|
* to reset any tb_locks we may have taken but didn't release.
|
|
* The mmap_lock is dropped by tb_gen_code if it runs out of
|
|
* memory.
|
|
*/
|
|
#ifndef CONFIG_SOFTMMU
|
|
tcg_debug_assert(!have_mmap_lock());
|
|
#endif
|
|
tb_lock_reset();
|
|
}
|
|
}
|
|
|
|
void cpu_exec_step_atomic(CPUState *cpu)
|
|
{
|
|
start_exclusive();
|
|
|
|
/* Since we got here, we know that parallel_cpus must be true. */
|
|
parallel_cpus = false;
|
|
cpu_exec_step(cpu);
|
|
parallel_cpus = true;
|
|
|
|
end_exclusive();
|
|
}
|
|
|
|
struct tb_desc {
|
|
target_ulong pc;
|
|
target_ulong cs_base;
|
|
CPUArchState *env;
|
|
tb_page_addr_t phys_page1;
|
|
uint32_t flags;
|
|
uint32_t trace_vcpu_dstate;
|
|
};
|
|
|
|
static bool tb_cmp(const void *p, const void *d)
|
|
{
|
|
const TranslationBlock *tb = p;
|
|
const struct tb_desc *desc = d;
|
|
|
|
if (tb->pc == desc->pc &&
|
|
tb->page_addr[0] == desc->phys_page1 &&
|
|
tb->cs_base == desc->cs_base &&
|
|
tb->flags == desc->flags &&
|
|
tb->trace_vcpu_dstate == desc->trace_vcpu_dstate &&
|
|
!atomic_read(&tb->invalid)) {
|
|
/* check next page if needed */
|
|
if (tb->page_addr[1] == -1) {
|
|
return true;
|
|
} else {
|
|
tb_page_addr_t phys_page2;
|
|
target_ulong virt_page2;
|
|
|
|
virt_page2 = (desc->pc & TARGET_PAGE_MASK) + TARGET_PAGE_SIZE;
|
|
phys_page2 = get_page_addr_code(desc->env, virt_page2);
|
|
if (tb->page_addr[1] == phys_page2) {
|
|
return true;
|
|
}
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
TranslationBlock *tb_htable_lookup(CPUState *cpu, target_ulong pc,
|
|
target_ulong cs_base, uint32_t flags)
|
|
{
|
|
tb_page_addr_t phys_pc;
|
|
struct tb_desc desc;
|
|
uint32_t h;
|
|
|
|
desc.env = (CPUArchState *)cpu->env_ptr;
|
|
desc.cs_base = cs_base;
|
|
desc.flags = flags;
|
|
desc.trace_vcpu_dstate = *cpu->trace_dstate;
|
|
desc.pc = pc;
|
|
phys_pc = get_page_addr_code(desc.env, pc);
|
|
desc.phys_page1 = phys_pc & TARGET_PAGE_MASK;
|
|
h = tb_hash_func(phys_pc, pc, flags, *cpu->trace_dstate);
|
|
return qht_lookup(&tcg_ctx.tb_ctx.htable, tb_cmp, &desc, h);
|
|
}
|
|
|
|
void tb_set_jmp_target(TranslationBlock *tb, int n, uintptr_t addr)
|
|
{
|
|
if (TCG_TARGET_HAS_direct_jump) {
|
|
uintptr_t offset = tb->jmp_target_arg[n];
|
|
uintptr_t tc_ptr = (uintptr_t)tb->tc_ptr;
|
|
tb_target_set_jmp_target(tc_ptr, tc_ptr + offset, addr);
|
|
} else {
|
|
tb->jmp_target_arg[n] = addr;
|
|
}
|
|
}
|
|
|
|
/* Called with tb_lock held. */
|
|
static inline void tb_add_jump(TranslationBlock *tb, int n,
|
|
TranslationBlock *tb_next)
|
|
{
|
|
assert(n < ARRAY_SIZE(tb->jmp_list_next));
|
|
if (tb->jmp_list_next[n]) {
|
|
/* Another thread has already done this while we were
|
|
* outside of the lock; nothing to do in this case */
|
|
return;
|
|
}
|
|
qemu_log_mask_and_addr(CPU_LOG_EXEC, tb->pc,
|
|
"Linking TBs %p [" TARGET_FMT_lx
|
|
"] index %d -> %p [" TARGET_FMT_lx "]\n",
|
|
tb->tc_ptr, tb->pc, n,
|
|
tb_next->tc_ptr, tb_next->pc);
|
|
|
|
/* patch the native jump address */
|
|
tb_set_jmp_target(tb, n, (uintptr_t)tb_next->tc_ptr);
|
|
|
|
/* add in TB jmp circular list */
|
|
tb->jmp_list_next[n] = tb_next->jmp_list_first;
|
|
tb_next->jmp_list_first = (uintptr_t)tb | n;
|
|
}
|
|
|
|
static inline TranslationBlock *tb_find(CPUState *cpu,
|
|
TranslationBlock *last_tb,
|
|
int tb_exit)
|
|
{
|
|
TranslationBlock *tb;
|
|
target_ulong cs_base, pc;
|
|
uint32_t flags;
|
|
bool acquired_tb_lock = false;
|
|
|
|
tb = tb_lookup__cpu_state(cpu, &pc, &cs_base, &flags);
|
|
if (tb == NULL) {
|
|
/* mmap_lock is needed by tb_gen_code, and mmap_lock must be
|
|
* taken outside tb_lock. As system emulation is currently
|
|
* single threaded the locks are NOPs.
|
|
*/
|
|
mmap_lock();
|
|
tb_lock();
|
|
acquired_tb_lock = true;
|
|
|
|
/* There's a chance that our desired tb has been translated while
|
|
* taking the locks so we check again inside the lock.
|
|
*/
|
|
tb = tb_htable_lookup(cpu, pc, cs_base, flags);
|
|
if (likely(tb == NULL)) {
|
|
/* if no translated code available, then translate it now */
|
|
tb = tb_gen_code(cpu, pc, cs_base, flags, 0);
|
|
}
|
|
|
|
mmap_unlock();
|
|
/* We add the TB in the virtual pc hash table for the fast lookup */
|
|
atomic_set(&cpu->tb_jmp_cache[tb_jmp_cache_hash_func(pc)], tb);
|
|
}
|
|
#ifndef CONFIG_USER_ONLY
|
|
/* We don't take care of direct jumps when address mapping changes in
|
|
* system emulation. So it's not safe to make a direct jump to a TB
|
|
* spanning two pages because the mapping for the second page can change.
|
|
*/
|
|
if (tb->page_addr[1] != -1) {
|
|
last_tb = NULL;
|
|
}
|
|
#endif
|
|
/* See if we can patch the calling TB. */
|
|
if (last_tb && !qemu_loglevel_mask(CPU_LOG_TB_NOCHAIN)) {
|
|
if (!acquired_tb_lock) {
|
|
tb_lock();
|
|
acquired_tb_lock = true;
|
|
}
|
|
if (!tb->invalid) {
|
|
tb_add_jump(last_tb, tb_exit, tb);
|
|
}
|
|
}
|
|
if (acquired_tb_lock) {
|
|
tb_unlock();
|
|
}
|
|
return tb;
|
|
}
|
|
|
|
static inline bool cpu_handle_halt(CPUState *cpu)
|
|
{
|
|
if (cpu->halted) {
|
|
#if defined(TARGET_I386) && !defined(CONFIG_USER_ONLY)
|
|
if ((cpu->interrupt_request & CPU_INTERRUPT_POLL)
|
|
&& replay_interrupt()) {
|
|
X86CPU *x86_cpu = X86_CPU(cpu);
|
|
qemu_mutex_lock_iothread();
|
|
apic_poll_irq(x86_cpu->apic_state);
|
|
cpu_reset_interrupt(cpu, CPU_INTERRUPT_POLL);
|
|
qemu_mutex_unlock_iothread();
|
|
}
|
|
#endif
|
|
if (!cpu_has_work(cpu)) {
|
|
return true;
|
|
}
|
|
|
|
cpu->halted = 0;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static inline void cpu_handle_debug_exception(CPUState *cpu)
|
|
{
|
|
CPUClass *cc = CPU_GET_CLASS(cpu);
|
|
CPUWatchpoint *wp;
|
|
|
|
if (!cpu->watchpoint_hit) {
|
|
QTAILQ_FOREACH(wp, &cpu->watchpoints, entry) {
|
|
wp->flags &= ~BP_WATCHPOINT_HIT;
|
|
}
|
|
}
|
|
|
|
cc->debug_excp_handler(cpu);
|
|
}
|
|
|
|
static inline bool cpu_handle_exception(CPUState *cpu, int *ret)
|
|
{
|
|
if (cpu->exception_index >= 0) {
|
|
if (cpu->exception_index >= EXCP_INTERRUPT) {
|
|
/* exit request from the cpu execution loop */
|
|
*ret = cpu->exception_index;
|
|
if (*ret == EXCP_DEBUG) {
|
|
cpu_handle_debug_exception(cpu);
|
|
}
|
|
cpu->exception_index = -1;
|
|
return true;
|
|
} else {
|
|
#if defined(CONFIG_USER_ONLY)
|
|
/* if user mode only, we simulate a fake exception
|
|
which will be handled outside the cpu execution
|
|
loop */
|
|
#if defined(TARGET_I386)
|
|
CPUClass *cc = CPU_GET_CLASS(cpu);
|
|
cc->do_interrupt(cpu);
|
|
#endif
|
|
*ret = cpu->exception_index;
|
|
cpu->exception_index = -1;
|
|
return true;
|
|
#else
|
|
if (replay_exception()) {
|
|
CPUClass *cc = CPU_GET_CLASS(cpu);
|
|
qemu_mutex_lock_iothread();
|
|
cc->do_interrupt(cpu);
|
|
qemu_mutex_unlock_iothread();
|
|
cpu->exception_index = -1;
|
|
} else if (!replay_has_interrupt()) {
|
|
/* give a chance to iothread in replay mode */
|
|
*ret = EXCP_INTERRUPT;
|
|
return true;
|
|
}
|
|
#endif
|
|
}
|
|
#ifndef CONFIG_USER_ONLY
|
|
} else if (replay_has_exception()
|
|
&& cpu->icount_decr.u16.low + cpu->icount_extra == 0) {
|
|
/* try to cause an exception pending in the log */
|
|
cpu_exec_nocache(cpu, 1, tb_find(cpu, NULL, 0), true);
|
|
*ret = -1;
|
|
return true;
|
|
#endif
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static inline bool cpu_handle_interrupt(CPUState *cpu,
|
|
TranslationBlock **last_tb)
|
|
{
|
|
CPUClass *cc = CPU_GET_CLASS(cpu);
|
|
|
|
if (unlikely(atomic_read(&cpu->interrupt_request))) {
|
|
int interrupt_request;
|
|
qemu_mutex_lock_iothread();
|
|
interrupt_request = cpu->interrupt_request;
|
|
if (unlikely(cpu->singlestep_enabled & SSTEP_NOIRQ)) {
|
|
/* Mask out external interrupts for this step. */
|
|
interrupt_request &= ~CPU_INTERRUPT_SSTEP_MASK;
|
|
}
|
|
if (interrupt_request & CPU_INTERRUPT_DEBUG) {
|
|
cpu->interrupt_request &= ~CPU_INTERRUPT_DEBUG;
|
|
cpu->exception_index = EXCP_DEBUG;
|
|
qemu_mutex_unlock_iothread();
|
|
return true;
|
|
}
|
|
if (replay_mode == REPLAY_MODE_PLAY && !replay_has_interrupt()) {
|
|
/* Do nothing */
|
|
} else if (interrupt_request & CPU_INTERRUPT_HALT) {
|
|
replay_interrupt();
|
|
cpu->interrupt_request &= ~CPU_INTERRUPT_HALT;
|
|
cpu->halted = 1;
|
|
cpu->exception_index = EXCP_HLT;
|
|
qemu_mutex_unlock_iothread();
|
|
return true;
|
|
}
|
|
#if defined(TARGET_I386)
|
|
else if (interrupt_request & CPU_INTERRUPT_INIT) {
|
|
X86CPU *x86_cpu = X86_CPU(cpu);
|
|
CPUArchState *env = &x86_cpu->env;
|
|
replay_interrupt();
|
|
cpu_svm_check_intercept_param(env, SVM_EXIT_INIT, 0, 0);
|
|
do_cpu_init(x86_cpu);
|
|
cpu->exception_index = EXCP_HALTED;
|
|
qemu_mutex_unlock_iothread();
|
|
return true;
|
|
}
|
|
#else
|
|
else if (interrupt_request & CPU_INTERRUPT_RESET) {
|
|
replay_interrupt();
|
|
cpu_reset(cpu);
|
|
qemu_mutex_unlock_iothread();
|
|
return true;
|
|
}
|
|
#endif
|
|
/* The target hook has 3 exit conditions:
|
|
False when the interrupt isn't processed,
|
|
True when it is, and we should restart on a new TB,
|
|
and via longjmp via cpu_loop_exit. */
|
|
else {
|
|
if (cc->cpu_exec_interrupt(cpu, interrupt_request)) {
|
|
replay_interrupt();
|
|
*last_tb = NULL;
|
|
}
|
|
/* The target hook may have updated the 'cpu->interrupt_request';
|
|
* reload the 'interrupt_request' value */
|
|
interrupt_request = cpu->interrupt_request;
|
|
}
|
|
if (interrupt_request & CPU_INTERRUPT_EXITTB) {
|
|
cpu->interrupt_request &= ~CPU_INTERRUPT_EXITTB;
|
|
/* ensure that no TB jump will be modified as
|
|
the program flow was changed */
|
|
*last_tb = NULL;
|
|
}
|
|
|
|
/* If we exit via cpu_loop_exit/longjmp it is reset in cpu_exec */
|
|
qemu_mutex_unlock_iothread();
|
|
}
|
|
|
|
/* Finally, check if we need to exit to the main loop. */
|
|
if (unlikely(atomic_read(&cpu->exit_request)
|
|
|| (use_icount && cpu->icount_decr.u16.low + cpu->icount_extra == 0))) {
|
|
atomic_set(&cpu->exit_request, 0);
|
|
cpu->exception_index = EXCP_INTERRUPT;
|
|
return true;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static inline void cpu_loop_exec_tb(CPUState *cpu, TranslationBlock *tb,
|
|
TranslationBlock **last_tb, int *tb_exit)
|
|
{
|
|
uintptr_t ret;
|
|
int32_t insns_left;
|
|
|
|
trace_exec_tb(tb, tb->pc);
|
|
ret = cpu_tb_exec(cpu, tb);
|
|
tb = (TranslationBlock *)(ret & ~TB_EXIT_MASK);
|
|
*tb_exit = ret & TB_EXIT_MASK;
|
|
if (*tb_exit != TB_EXIT_REQUESTED) {
|
|
*last_tb = tb;
|
|
return;
|
|
}
|
|
|
|
*last_tb = NULL;
|
|
insns_left = atomic_read(&cpu->icount_decr.u32);
|
|
atomic_set(&cpu->icount_decr.u16.high, 0);
|
|
if (insns_left < 0) {
|
|
/* Something asked us to stop executing chained TBs; just
|
|
* continue round the main loop. Whatever requested the exit
|
|
* will also have set something else (eg exit_request or
|
|
* interrupt_request) which we will handle next time around
|
|
* the loop. But we need to ensure the zeroing of icount_decr
|
|
* comes before the next read of cpu->exit_request
|
|
* or cpu->interrupt_request.
|
|
*/
|
|
smp_mb();
|
|
return;
|
|
}
|
|
|
|
/* Instruction counter expired. */
|
|
assert(use_icount);
|
|
#ifndef CONFIG_USER_ONLY
|
|
/* Ensure global icount has gone forward */
|
|
cpu_update_icount(cpu);
|
|
/* Refill decrementer and continue execution. */
|
|
insns_left = MIN(0xffff, cpu->icount_budget);
|
|
cpu->icount_decr.u16.low = insns_left;
|
|
cpu->icount_extra = cpu->icount_budget - insns_left;
|
|
if (!cpu->icount_extra) {
|
|
/* Execute any remaining instructions, then let the main loop
|
|
* handle the next event.
|
|
*/
|
|
if (insns_left > 0) {
|
|
cpu_exec_nocache(cpu, insns_left, tb, false);
|
|
}
|
|
}
|
|
#endif
|
|
}
|
|
|
|
/* main execution loop */
|
|
|
|
int cpu_exec(CPUState *cpu)
|
|
{
|
|
CPUClass *cc = CPU_GET_CLASS(cpu);
|
|
int ret;
|
|
SyncClocks sc = { 0 };
|
|
|
|
/* replay_interrupt may need current_cpu */
|
|
current_cpu = cpu;
|
|
|
|
if (cpu_handle_halt(cpu)) {
|
|
return EXCP_HALTED;
|
|
}
|
|
|
|
rcu_read_lock();
|
|
|
|
cc->cpu_exec_enter(cpu);
|
|
|
|
/* Calculate difference between guest clock and host clock.
|
|
* This delay includes the delay of the last cycle, so
|
|
* what we have to do is sleep until it is 0. As for the
|
|
* advance/delay we gain here, we try to fix it next time.
|
|
*/
|
|
init_delay_params(&sc, cpu);
|
|
|
|
/* prepare setjmp context for exception handling */
|
|
if (sigsetjmp(cpu->jmp_env, 0) != 0) {
|
|
#if defined(__clang__) || !QEMU_GNUC_PREREQ(4, 6)
|
|
/* Some compilers wrongly smash all local variables after
|
|
* siglongjmp. There were bug reports for gcc 4.5.0 and clang.
|
|
* Reload essential local variables here for those compilers.
|
|
* Newer versions of gcc would complain about this code (-Wclobbered). */
|
|
cpu = current_cpu;
|
|
cc = CPU_GET_CLASS(cpu);
|
|
#else /* buggy compiler */
|
|
/* Assert that the compiler does not smash local variables. */
|
|
g_assert(cpu == current_cpu);
|
|
g_assert(cc == CPU_GET_CLASS(cpu));
|
|
#endif /* buggy compiler */
|
|
cpu->can_do_io = 1;
|
|
tb_lock_reset();
|
|
if (qemu_mutex_iothread_locked()) {
|
|
qemu_mutex_unlock_iothread();
|
|
}
|
|
}
|
|
|
|
/* if an exception is pending, we execute it here */
|
|
while (!cpu_handle_exception(cpu, &ret)) {
|
|
TranslationBlock *last_tb = NULL;
|
|
int tb_exit = 0;
|
|
|
|
while (!cpu_handle_interrupt(cpu, &last_tb)) {
|
|
TranslationBlock *tb = tb_find(cpu, last_tb, tb_exit);
|
|
cpu_loop_exec_tb(cpu, tb, &last_tb, &tb_exit);
|
|
/* Try to align the host and virtual clocks
|
|
if the guest is in advance */
|
|
align_clocks(&sc, cpu);
|
|
}
|
|
}
|
|
|
|
cc->cpu_exec_exit(cpu);
|
|
rcu_read_unlock();
|
|
|
|
return ret;
|
|
}
|