diff --git a/cpus.c b/cpus.c index c652da84cf..2e6701795b 100644 --- a/cpus.c +++ b/cpus.c @@ -1317,6 +1317,8 @@ static void prepare_icount_for_run(CPUState *cpu) insns_left = MIN(0xffff, cpu->icount_budget); cpu->icount_decr.u16.low = insns_left; cpu->icount_extra = cpu->icount_budget - insns_left; + + replay_mutex_lock(); } } @@ -1332,6 +1334,8 @@ static void process_icount_data(CPUState *cpu) cpu->icount_budget = 0; replay_account_executed_instructions(); + + replay_mutex_unlock(); } } @@ -1346,11 +1350,9 @@ static int tcg_cpu_exec(CPUState *cpu) #ifdef CONFIG_PROFILER ti = profile_getclock(); #endif - qemu_mutex_unlock_iothread(); cpu_exec_start(cpu); ret = cpu_exec(cpu); cpu_exec_end(cpu); - qemu_mutex_lock_iothread(); #ifdef CONFIG_PROFILER tcg_time += profile_getclock() - ti; #endif @@ -1417,6 +1419,9 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg) cpu->exit_request = 1; while (1) { + qemu_mutex_unlock_iothread(); + replay_mutex_lock(); + qemu_mutex_lock_iothread(); /* Account partial waits to QEMU_CLOCK_VIRTUAL. */ qemu_account_warp_timer(); @@ -1425,6 +1430,8 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg) */ handle_icount_deadline(); + replay_mutex_unlock(); + if (!cpu) { cpu = first_cpu; } @@ -1440,11 +1447,13 @@ static void *qemu_tcg_rr_cpu_thread_fn(void *arg) if (cpu_can_run(cpu)) { int r; + qemu_mutex_unlock_iothread(); prepare_icount_for_run(cpu); r = tcg_cpu_exec(cpu); process_icount_data(cpu); + qemu_mutex_lock_iothread(); if (r == EXCP_DEBUG) { cpu_handle_guest_debug(cpu); @@ -1634,7 +1643,9 @@ static void *qemu_tcg_cpu_thread_fn(void *arg) while (1) { if (cpu_can_run(cpu)) { int r; + qemu_mutex_unlock_iothread(); r = tcg_cpu_exec(cpu); + qemu_mutex_lock_iothread(); switch (r) { case EXCP_DEBUG: cpu_handle_guest_debug(cpu); @@ -1781,12 +1792,21 @@ void pause_all_vcpus(void) } } + /* We need to drop the replay_lock so any vCPU threads woken up + * can finish their replay tasks + */ + replay_mutex_unlock(); + while (!all_vcpus_paused()) { qemu_cond_wait(&qemu_pause_cond, &qemu_global_mutex); CPU_FOREACH(cpu) { qemu_cpu_kick(cpu); } } + + qemu_mutex_unlock_iothread(); + replay_mutex_lock(); + qemu_mutex_lock_iothread(); } void cpu_resume(CPUState *cpu) diff --git a/docs/replay.txt b/docs/replay.txt index c52407fe23..959633e7ea 100644 --- a/docs/replay.txt +++ b/docs/replay.txt @@ -49,6 +49,28 @@ Modifications of qemu include: * recording/replaying user input (mouse and keyboard) * adding internal checkpoints for cpu and io synchronization +Locking and thread synchronisation +---------------------------------- + +Previously the synchronisation of the main thread and the vCPU thread +was ensured by the holding of the BQL. However the trend has been to +reduce the time the BQL was held across the system including under TCG +system emulation. As it is important that batches of events are kept +in sequence (e.g. expiring timers and checkpoints in the main thread +while instruction checkpoints are written by the vCPU thread) we need +another lock to keep things in lock-step. This role is now handled by +the replay_mutex_lock. It used to be held only for each event being +written but now it is held for a whole execution period. This results +in a deterministic ping-pong between the two main threads. + +As the BQL is now a finer grained lock than the replay_lock it is almost +certainly a bug, and a source of deadlocks, to take the +replay_mutex_lock while the BQL is held. This is enforced by an assert. +While the unlocks are usually in the reverse order, this is not +necessary; you can drop the replay_lock while holding the BQL, without +doing a more complicated unlock_iothread/replay_unlock/lock_iothread +sequence. + Non-deterministic events ------------------------ diff --git a/replay/replay-audio.c b/replay/replay-audio.c index 3d837434d4..b113836de4 100644 --- a/replay/replay-audio.c +++ b/replay/replay-audio.c @@ -19,20 +19,17 @@ void replay_audio_out(int *played) { if (replay_mode == REPLAY_MODE_RECORD) { + g_assert(replay_mutex_locked()); replay_save_instructions(); - replay_mutex_lock(); replay_put_event(EVENT_AUDIO_OUT); replay_put_dword(*played); - replay_mutex_unlock(); } else if (replay_mode == REPLAY_MODE_PLAY) { + g_assert(replay_mutex_locked()); replay_account_executed_instructions(); - replay_mutex_lock(); if (replay_next_event_is(EVENT_AUDIO_OUT)) { *played = replay_get_dword(); replay_finish_event(); - replay_mutex_unlock(); } else { - replay_mutex_unlock(); error_report("Missing audio out event in the replay log"); abort(); } @@ -44,8 +41,8 @@ void replay_audio_in(int *recorded, void *samples, int *wpos, int size) int pos; uint64_t left, right; if (replay_mode == REPLAY_MODE_RECORD) { + g_assert(replay_mutex_locked()); replay_save_instructions(); - replay_mutex_lock(); replay_put_event(EVENT_AUDIO_IN); replay_put_dword(*recorded); replay_put_dword(*wpos); @@ -55,10 +52,9 @@ void replay_audio_in(int *recorded, void *samples, int *wpos, int size) replay_put_qword(left); replay_put_qword(right); } - replay_mutex_unlock(); } else if (replay_mode == REPLAY_MODE_PLAY) { + g_assert(replay_mutex_locked()); replay_account_executed_instructions(); - replay_mutex_lock(); if (replay_next_event_is(EVENT_AUDIO_IN)) { *recorded = replay_get_dword(); *wpos = replay_get_dword(); @@ -69,9 +65,7 @@ void replay_audio_in(int *recorded, void *samples, int *wpos, int size) audio_sample_from_uint64(samples, pos, left, right); } replay_finish_event(); - replay_mutex_unlock(); } else { - replay_mutex_unlock(); error_report("Missing audio in event in the replay log"); abort(); } diff --git a/replay/replay-char.c b/replay/replay-char.c index cbf7c04a9f..736cc8c2e6 100755 --- a/replay/replay-char.c +++ b/replay/replay-char.c @@ -96,25 +96,24 @@ void *replay_event_char_read_load(void) void replay_char_write_event_save(int res, int offset) { + g_assert(replay_mutex_locked()); + replay_save_instructions(); - replay_mutex_lock(); replay_put_event(EVENT_CHAR_WRITE); replay_put_dword(res); replay_put_dword(offset); - replay_mutex_unlock(); } void replay_char_write_event_load(int *res, int *offset) { + g_assert(replay_mutex_locked()); + replay_account_executed_instructions(); - replay_mutex_lock(); if (replay_next_event_is(EVENT_CHAR_WRITE)) { *res = replay_get_dword(); *offset = replay_get_dword(); replay_finish_event(); - replay_mutex_unlock(); } else { - replay_mutex_unlock(); error_report("Missing character write event in the replay log"); exit(1); } @@ -122,23 +121,21 @@ void replay_char_write_event_load(int *res, int *offset) int replay_char_read_all_load(uint8_t *buf) { - replay_mutex_lock(); + g_assert(replay_mutex_locked()); + if (replay_next_event_is(EVENT_CHAR_READ_ALL)) { size_t size; int res; replay_get_array(buf, &size); replay_finish_event(); - replay_mutex_unlock(); res = (int)size; assert(res >= 0); return res; } else if (replay_next_event_is(EVENT_CHAR_READ_ALL_ERROR)) { int res = replay_get_dword(); replay_finish_event(); - replay_mutex_unlock(); return res; } else { - replay_mutex_unlock(); error_report("Missing character read all event in the replay log"); exit(1); } @@ -146,19 +143,17 @@ int replay_char_read_all_load(uint8_t *buf) void replay_char_read_all_save_error(int res) { + g_assert(replay_mutex_locked()); assert(res < 0); replay_save_instructions(); - replay_mutex_lock(); replay_put_event(EVENT_CHAR_READ_ALL_ERROR); replay_put_dword(res); - replay_mutex_unlock(); } void replay_char_read_all_save_buf(uint8_t *buf, int offset) { + g_assert(replay_mutex_locked()); replay_save_instructions(); - replay_mutex_lock(); replay_put_event(EVENT_CHAR_READ_ALL); replay_put_array(buf, offset); - replay_mutex_unlock(); } diff --git a/replay/replay-events.c b/replay/replay-events.c index e858254074..54dd9d2606 100644 --- a/replay/replay-events.c +++ b/replay/replay-events.c @@ -79,16 +79,14 @@ bool replay_has_events(void) void replay_flush_events(void) { - replay_mutex_lock(); + g_assert(replay_mutex_locked()); + while (!QTAILQ_EMPTY(&events_list)) { Event *event = QTAILQ_FIRST(&events_list); - replay_mutex_unlock(); replay_run_event(event); - replay_mutex_lock(); QTAILQ_REMOVE(&events_list, event, events); g_free(event); } - replay_mutex_unlock(); } void replay_disable_events(void) @@ -102,14 +100,14 @@ void replay_disable_events(void) void replay_clear_events(void) { - replay_mutex_lock(); + g_assert(replay_mutex_locked()); + while (!QTAILQ_EMPTY(&events_list)) { Event *event = QTAILQ_FIRST(&events_list); QTAILQ_REMOVE(&events_list, event, events); g_free(event); } - replay_mutex_unlock(); } /*! Adds specified async event to the queue */ @@ -136,9 +134,8 @@ void replay_add_event(ReplayAsyncEventKind event_kind, event->opaque2 = opaque2; event->id = id; - replay_mutex_lock(); + g_assert(replay_mutex_locked()); QTAILQ_INSERT_TAIL(&events_list, event, events); - replay_mutex_unlock(); } void replay_bh_schedule_event(QEMUBH *bh) @@ -207,13 +204,11 @@ static void replay_save_event(Event *event, int checkpoint) /* Called with replay mutex locked */ void replay_save_events(int checkpoint) { + g_assert(replay_mutex_locked()); while (!QTAILQ_EMPTY(&events_list)) { Event *event = QTAILQ_FIRST(&events_list); replay_save_event(event, checkpoint); - - replay_mutex_unlock(); replay_run_event(event); - replay_mutex_lock(); QTAILQ_REMOVE(&events_list, event, events); g_free(event); } @@ -292,6 +287,7 @@ static Event *replay_read_event(int checkpoint) /* Called with replay mutex locked */ void replay_read_events(int checkpoint) { + g_assert(replay_mutex_locked()); while (replay_state.data_kind == EVENT_ASYNC) { Event *event = replay_read_event(checkpoint); if (!event) { @@ -299,9 +295,7 @@ void replay_read_events(int checkpoint) } replay_finish_event(); read_event_kind = -1; - replay_mutex_unlock(); replay_run_event(event); - replay_mutex_lock(); g_free(event); } diff --git a/replay/replay-internal.c b/replay/replay-internal.c index fa7bba6dfd..8e7474f787 100644 --- a/replay/replay-internal.c +++ b/replay/replay-internal.c @@ -174,6 +174,9 @@ static __thread bool replay_locked; void replay_mutex_init(void) { qemu_mutex_init(&lock); + /* Hold the mutex while we start-up */ + qemu_mutex_lock(&lock); + replay_locked = true; } bool replay_mutex_locked(void) @@ -181,25 +184,31 @@ bool replay_mutex_locked(void) return replay_locked; } +/* Ordering constraints, replay_lock must be taken before BQL */ void replay_mutex_lock(void) { - g_assert(!replay_mutex_locked()); - qemu_mutex_lock(&lock); - replay_locked = true; + if (replay_mode != REPLAY_MODE_NONE) { + g_assert(!qemu_mutex_iothread_locked()); + g_assert(!replay_mutex_locked()); + qemu_mutex_lock(&lock); + replay_locked = true; + } } void replay_mutex_unlock(void) { - g_assert(replay_mutex_locked()); - replay_locked = false; - qemu_mutex_unlock(&lock); + if (replay_mode != REPLAY_MODE_NONE) { + g_assert(replay_mutex_locked()); + replay_locked = false; + qemu_mutex_unlock(&lock); + } } /*! Saves cached instructions. */ void replay_save_instructions(void) { if (replay_file && replay_mode == REPLAY_MODE_RECORD) { - replay_mutex_lock(); + g_assert(replay_mutex_locked()); int diff = (int)(replay_get_current_step() - replay_state.current_step); /* Time can only go forward */ @@ -210,6 +219,5 @@ void replay_save_instructions(void) replay_put_dword(diff); replay_state.current_step += diff; } - replay_mutex_unlock(); } } diff --git a/replay/replay-time.c b/replay/replay-time.c index f70382a88f..6a7565ec8d 100644 --- a/replay/replay-time.c +++ b/replay/replay-time.c @@ -17,13 +17,13 @@ int64_t replay_save_clock(ReplayClockKind kind, int64_t clock) { - replay_save_instructions(); if (replay_file) { - replay_mutex_lock(); + g_assert(replay_mutex_locked()); + + replay_save_instructions(); replay_put_event(EVENT_CLOCK + kind); replay_put_qword(clock); - replay_mutex_unlock(); } return clock; @@ -46,16 +46,16 @@ void replay_read_next_clock(ReplayClockKind kind) /*! Reads next clock event from the input. */ int64_t replay_read_clock(ReplayClockKind kind) { + g_assert(replay_file && replay_mutex_locked()); + replay_account_executed_instructions(); if (replay_file) { int64_t ret; - replay_mutex_lock(); if (replay_next_event_is(EVENT_CLOCK + kind)) { replay_read_next_clock(kind); } ret = replay_state.cached_clock[kind]; - replay_mutex_unlock(); return ret; } diff --git a/replay/replay.c b/replay/replay.c index 5d05ee0460..90f98b7490 100644 --- a/replay/replay.c +++ b/replay/replay.c @@ -81,7 +81,7 @@ int replay_get_instructions(void) void replay_account_executed_instructions(void) { if (replay_mode == REPLAY_MODE_PLAY) { - replay_mutex_lock(); + g_assert(replay_mutex_locked()); if (replay_state.instructions_count > 0) { int count = (int)(replay_get_current_step() - replay_state.current_step); @@ -100,24 +100,22 @@ void replay_account_executed_instructions(void) qemu_notify_event(); } } - replay_mutex_unlock(); } } bool replay_exception(void) { + if (replay_mode == REPLAY_MODE_RECORD) { + g_assert(replay_mutex_locked()); replay_save_instructions(); - replay_mutex_lock(); replay_put_event(EVENT_EXCEPTION); - replay_mutex_unlock(); return true; } else if (replay_mode == REPLAY_MODE_PLAY) { + g_assert(replay_mutex_locked()); bool res = replay_has_exception(); if (res) { - replay_mutex_lock(); replay_finish_event(); - replay_mutex_unlock(); } return res; } @@ -129,10 +127,9 @@ bool replay_has_exception(void) { bool res = false; if (replay_mode == REPLAY_MODE_PLAY) { + g_assert(replay_mutex_locked()); replay_account_executed_instructions(); - replay_mutex_lock(); res = replay_next_event_is(EVENT_EXCEPTION); - replay_mutex_unlock(); } return res; @@ -141,17 +138,15 @@ bool replay_has_exception(void) bool replay_interrupt(void) { if (replay_mode == REPLAY_MODE_RECORD) { + g_assert(replay_mutex_locked()); replay_save_instructions(); - replay_mutex_lock(); replay_put_event(EVENT_INTERRUPT); - replay_mutex_unlock(); return true; } else if (replay_mode == REPLAY_MODE_PLAY) { + g_assert(replay_mutex_locked()); bool res = replay_has_interrupt(); if (res) { - replay_mutex_lock(); replay_finish_event(); - replay_mutex_unlock(); } return res; } @@ -163,10 +158,9 @@ bool replay_has_interrupt(void) { bool res = false; if (replay_mode == REPLAY_MODE_PLAY) { + g_assert(replay_mutex_locked()); replay_account_executed_instructions(); - replay_mutex_lock(); res = replay_next_event_is(EVENT_INTERRUPT); - replay_mutex_unlock(); } return res; } @@ -174,9 +168,8 @@ bool replay_has_interrupt(void) void replay_shutdown_request(ShutdownCause cause) { if (replay_mode == REPLAY_MODE_RECORD) { - replay_mutex_lock(); + g_assert(replay_mutex_locked()); replay_put_event(EVENT_SHUTDOWN + cause); - replay_mutex_unlock(); } } @@ -190,9 +183,9 @@ bool replay_checkpoint(ReplayCheckpoint checkpoint) return true; } - replay_mutex_lock(); if (replay_mode == REPLAY_MODE_PLAY) { + g_assert(replay_mutex_locked()); if (replay_next_event_is(EVENT_CHECKPOINT + checkpoint)) { replay_finish_event(); } else if (replay_state.data_kind != EVENT_ASYNC) { @@ -205,12 +198,12 @@ bool replay_checkpoint(ReplayCheckpoint checkpoint) checkpoint were processed */ res = replay_state.data_kind != EVENT_ASYNC; } else if (replay_mode == REPLAY_MODE_RECORD) { + g_assert(replay_mutex_locked()); replay_put_event(EVENT_CHECKPOINT + checkpoint); replay_save_events(checkpoint); res = true; } out: - replay_mutex_unlock(); return res; } @@ -233,8 +226,6 @@ static void replay_enable(const char *fname, int mode) atexit(replay_finish); - replay_mutex_init(); - replay_file = fopen(fname, fmode); if (replay_file == NULL) { fprintf(stderr, "Replay: open %s: %s\n", fname, strerror(errno)); @@ -242,8 +233,9 @@ static void replay_enable(const char *fname, int mode) } replay_filename = g_strdup(fname); - replay_mode = mode; + replay_mutex_init(); + replay_state.data_kind = -1; replay_state.instructions_count = 0; replay_state.current_step = 0; diff --git a/util/main-loop.c b/util/main-loop.c index 7558eb5f53..992f9b0f34 100644 --- a/util/main-loop.c +++ b/util/main-loop.c @@ -29,6 +29,7 @@ #include "qemu/sockets.h" // struct in_addr needed for libslirp.h #include "sysemu/qtest.h" #include "sysemu/cpus.h" +#include "sysemu/replay.h" #include "slirp/libslirp.h" #include "qemu/main-loop.h" #include "block/aio.h" @@ -245,18 +246,19 @@ static int os_host_main_loop_wait(int64_t timeout) timeout = SCALE_MS; } + if (timeout) { spin_counter = 0; - qemu_mutex_unlock_iothread(); } else { spin_counter++; } + qemu_mutex_unlock_iothread(); + replay_mutex_unlock(); ret = qemu_poll_ns((GPollFD *)gpollfds->data, gpollfds->len, timeout); - if (timeout) { - qemu_mutex_lock_iothread(); - } + replay_mutex_lock(); + qemu_mutex_lock_iothread(); glib_pollfds_poll(); @@ -463,8 +465,13 @@ static int os_host_main_loop_wait(int64_t timeout) poll_timeout_ns = qemu_soonest_timeout(poll_timeout_ns, timeout); qemu_mutex_unlock_iothread(); + + replay_mutex_unlock(); + g_poll_ret = qemu_poll_ns(poll_fds, n_poll_fds + w->num, poll_timeout_ns); + replay_mutex_lock(); + qemu_mutex_lock_iothread(); if (g_poll_ret > 0) { for (i = 0; i < w->num; i++) { diff --git a/vl.c b/vl.c index e81152417a..5925a4b502 100644 --- a/vl.c +++ b/vl.c @@ -3058,6 +3058,7 @@ int main(int argc, char **argv, char **envp) qemu_init_cpu_list(); qemu_init_cpu_loop(); + qemu_mutex_lock_iothread(); atexit(qemu_run_exit_notifiers);