83ecdb18eb
The round-robin scheduler will iterate over the CPU list with an assigned budget until the next timer expiry and may exit early because of a TB exit. This is fine under normal operation but with icount enabled and SMP it is possible for a CPU to be starved of run time and the system live-locks. For example, booting a riscv64 platform with '-icount shift=0,align=off,sleep=on -smp 2' we observe a livelock once the kernel has timers enabled and starts performing TLB shootdowns. In this case we have CPU 0 in M-mode with interrupts disabled sending an IPI to CPU 1. As we enter the TCG loop, we assign the icount budget to next timer interrupt to CPU 0 and begin executing where the guest is sat in a busy loop exhausting all of the budget before we try to execute CPU 1 which is the target of the IPI but CPU 1 is left with no budget with which to execute and the process repeats. We try here to add some fairness by splitting the budget across all of the CPUs on the thread fairly before entering each one. The CPU count is cached on CPU list generation ID to avoid iterating the list on each loop iteration. With this change it is possible to boot an SMP rv64 guest with icount enabled and no hangs. Reviewed-by: Philippe Mathieu-Daudé <philmd@linaro.org> Tested-by: Peter Maydell <peter.maydell@linaro.org> Signed-off-by: Jamie Iles <quic_jiles@quicinc.com> Reviewed-by: Richard Henderson <richard.henderson@linaro.org> Message-Id: <20230427020925.51003-3-quic_jiles@quicinc.com> Signed-off-by: Richard Henderson <richard.henderson@linaro.org>
161 lines
5.0 KiB
C
161 lines
5.0 KiB
C
/*
|
|
* QEMU TCG Single Threaded vCPUs implementation using instruction counting
|
|
*
|
|
* Copyright (c) 2003-2008 Fabrice Bellard
|
|
* Copyright (c) 2014 Red Hat Inc.
|
|
*
|
|
* Permission is hereby granted, free of charge, to any person obtaining a copy
|
|
* of this software and associated documentation files (the "Software"), to deal
|
|
* in the Software without restriction, including without limitation the rights
|
|
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
|
* copies of the Software, and to permit persons to whom the Software is
|
|
* furnished to do so, subject to the following conditions:
|
|
*
|
|
* The above copyright notice and this permission notice shall be included in
|
|
* all copies or substantial portions of the Software.
|
|
*
|
|
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
|
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
|
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
|
|
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
|
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
|
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
|
* THE SOFTWARE.
|
|
*/
|
|
|
|
#include "qemu/osdep.h"
|
|
#include "sysemu/replay.h"
|
|
#include "sysemu/cpu-timers.h"
|
|
#include "qemu/main-loop.h"
|
|
#include "qemu/guest-random.h"
|
|
#include "exec/exec-all.h"
|
|
|
|
#include "tcg-accel-ops.h"
|
|
#include "tcg-accel-ops-icount.h"
|
|
#include "tcg-accel-ops-rr.h"
|
|
|
|
static int64_t icount_get_limit(void)
|
|
{
|
|
int64_t deadline;
|
|
|
|
if (replay_mode != REPLAY_MODE_PLAY) {
|
|
/*
|
|
* Include all the timers, because they may need an attention.
|
|
* Too long CPU execution may create unnecessary delay in UI.
|
|
*/
|
|
deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
|
|
QEMU_TIMER_ATTR_ALL);
|
|
/* Check realtime timers, because they help with input processing */
|
|
deadline = qemu_soonest_timeout(deadline,
|
|
qemu_clock_deadline_ns_all(QEMU_CLOCK_REALTIME,
|
|
QEMU_TIMER_ATTR_ALL));
|
|
|
|
/*
|
|
* Maintain prior (possibly buggy) behaviour where if no deadline
|
|
* was set (as there is no QEMU_CLOCK_VIRTUAL timer) or it is more than
|
|
* INT32_MAX nanoseconds ahead, we still use INT32_MAX
|
|
* nanoseconds.
|
|
*/
|
|
if ((deadline < 0) || (deadline > INT32_MAX)) {
|
|
deadline = INT32_MAX;
|
|
}
|
|
|
|
return icount_round(deadline);
|
|
} else {
|
|
return replay_get_instructions();
|
|
}
|
|
}
|
|
|
|
static void icount_notify_aio_contexts(void)
|
|
{
|
|
/* Wake up other AioContexts. */
|
|
qemu_clock_notify(QEMU_CLOCK_VIRTUAL);
|
|
qemu_clock_run_timers(QEMU_CLOCK_VIRTUAL);
|
|
}
|
|
|
|
void icount_handle_deadline(void)
|
|
{
|
|
assert(qemu_in_vcpu_thread());
|
|
int64_t deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL,
|
|
QEMU_TIMER_ATTR_ALL);
|
|
|
|
/*
|
|
* Instructions, interrupts, and exceptions are processed in cpu-exec.
|
|
* Don't interrupt cpu thread, when these events are waiting
|
|
* (i.e., there is no checkpoint)
|
|
*/
|
|
if (deadline == 0) {
|
|
icount_notify_aio_contexts();
|
|
}
|
|
}
|
|
|
|
/* Distribute the budget evenly across all CPUs */
|
|
int64_t icount_percpu_budget(int cpu_count)
|
|
{
|
|
int64_t limit = icount_get_limit();
|
|
int64_t timeslice = limit / cpu_count;
|
|
|
|
if (timeslice == 0) {
|
|
timeslice = limit;
|
|
}
|
|
|
|
return timeslice;
|
|
}
|
|
|
|
void icount_prepare_for_run(CPUState *cpu, int64_t cpu_budget)
|
|
{
|
|
int insns_left;
|
|
|
|
/*
|
|
* These should always be cleared by icount_process_data after
|
|
* each vCPU execution. However u16.high can be raised
|
|
* asynchronously by cpu_exit/cpu_interrupt/tcg_handle_interrupt
|
|
*/
|
|
g_assert(cpu_neg(cpu)->icount_decr.u16.low == 0);
|
|
g_assert(cpu->icount_extra == 0);
|
|
|
|
replay_mutex_lock();
|
|
|
|
cpu->icount_budget = MIN(icount_get_limit(), cpu_budget);
|
|
insns_left = MIN(0xffff, cpu->icount_budget);
|
|
cpu_neg(cpu)->icount_decr.u16.low = insns_left;
|
|
cpu->icount_extra = cpu->icount_budget - insns_left;
|
|
|
|
if (cpu->icount_budget == 0) {
|
|
/*
|
|
* We're called without the iothread lock, so must take it while
|
|
* we're calling timer handlers.
|
|
*/
|
|
qemu_mutex_lock_iothread();
|
|
icount_notify_aio_contexts();
|
|
qemu_mutex_unlock_iothread();
|
|
}
|
|
}
|
|
|
|
void icount_process_data(CPUState *cpu)
|
|
{
|
|
/* Account for executed instructions */
|
|
icount_update(cpu);
|
|
|
|
/* Reset the counters */
|
|
cpu_neg(cpu)->icount_decr.u16.low = 0;
|
|
cpu->icount_extra = 0;
|
|
cpu->icount_budget = 0;
|
|
|
|
replay_account_executed_instructions();
|
|
|
|
replay_mutex_unlock();
|
|
}
|
|
|
|
void icount_handle_interrupt(CPUState *cpu, int mask)
|
|
{
|
|
int old_mask = cpu->interrupt_request;
|
|
|
|
tcg_handle_interrupt(cpu, mask);
|
|
if (qemu_cpu_is_self(cpu) &&
|
|
!cpu->can_do_io
|
|
&& (mask & ~old_mask) != 0) {
|
|
cpu_abort(cpu, "Raised interrupt while not in I/O function");
|
|
}
|
|
}
|