replay: introduce a central report point for sync errors

Figuring out why replay has failed is tricky at the best of times.
Lets centralise the reporting of a replay sync error and add a little
bit of extra information to help with debugging.

Reviewed-by: Richard Henderson <richard.henderson@linaro.org>
Signed-off-by: Alex Bennée <alex.bennee@linaro.org>
Message-Id: <20231211091346.14616-10-alex.bennee@linaro.org>
This commit is contained in:
Alex Bennée 2023-12-11 09:13:38 +00:00
parent 2b7a58b60a
commit dcda73211c
4 changed files with 131 additions and 1 deletions

View File

@ -175,6 +175,7 @@ void replay_fetch_data_kind(void)
if (replay_file) {
if (!replay_state.has_unread_data) {
replay_state.data_kind = replay_get_byte();
replay_state.current_event++;
if (replay_state.data_kind == EVENT_INSTRUCTION) {
replay_state.instruction_count = replay_get_dword();
}

View File

@ -25,7 +25,12 @@ typedef enum ReplayAsyncEventKind {
REPLAY_ASYNC_COUNT
} ReplayAsyncEventKind;
/* Any changes to order/number of events will need to bump REPLAY_VERSION */
/*
* Any changes to order/number of events will need to bump
* REPLAY_VERSION to prevent confusion with old logs. Also don't
* forget to update replay_event_name() to make your debugging life
* easier.
*/
enum ReplayEvents {
/* for instruction event */
EVENT_INSTRUCTION,
@ -74,6 +79,7 @@ enum ReplayEvents {
* @cached_clock: Cached clocks values
* @current_icount: number of processed instructions
* @instruction_count: number of instructions until next event
* @current_event: current event index
* @data_kind: current event
* @has_unread_data: true if event not yet processed
* @file_offset: offset into replay log at replay snapshot
@ -84,6 +90,7 @@ typedef struct ReplayState {
int64_t cached_clock[REPLAY_CLOCK_COUNT];
uint64_t current_icount;
int instruction_count;
unsigned int current_event;
unsigned int data_kind;
bool has_unread_data;
uint64_t file_offset;
@ -188,6 +195,16 @@ void replay_event_net_save(void *opaque);
/*! Reads network from the file. */
void *replay_event_net_load(void);
/* Diagnostics */
/**
* replay_sync_error(): report sync error and exit
*
* When we reach an error condition we want to report it centrally so
* we can also dump some useful information into the logs.
*/
G_NORETURN void replay_sync_error(const char *error);
/* VMState-related functions */
/* Registers replay VMState.

View File

@ -55,6 +55,7 @@ static const VMStateDescription vmstate_replay = {
VMSTATE_INT64_ARRAY(cached_clock, ReplayState, REPLAY_CLOCK_COUNT),
VMSTATE_UINT64(current_icount, ReplayState),
VMSTATE_INT32(instruction_count, ReplayState),
VMSTATE_UINT32(current_event, ReplayState),
VMSTATE_UINT32(data_kind, ReplayState),
VMSTATE_BOOL(has_unread_data, ReplayState),
VMSTATE_UINT64(file_offset, ReplayState),

View File

@ -38,6 +38,107 @@ static GSList *replay_blockers;
uint64_t replay_break_icount = -1ULL;
QEMUTimer *replay_break_timer;
/* Pretty print event names */
static const char *replay_async_event_name(ReplayAsyncEventKind event)
{
switch (event) {
#define ASYNC_EVENT(_x) case REPLAY_ASYNC_EVENT_ ## _x: return "ASYNC_EVENT_"#_x
ASYNC_EVENT(BH);
ASYNC_EVENT(BH_ONESHOT);
ASYNC_EVENT(INPUT);
ASYNC_EVENT(INPUT_SYNC);
ASYNC_EVENT(CHAR_READ);
ASYNC_EVENT(BLOCK);
ASYNC_EVENT(NET);
#undef ASYNC_EVENT
default:
g_assert_not_reached();
}
}
static const char *replay_clock_event_name(ReplayClockKind clock)
{
switch (clock) {
#define CLOCK_EVENT(_x) case REPLAY_CLOCK_ ## _x: return "CLOCK_" #_x
CLOCK_EVENT(HOST);
CLOCK_EVENT(VIRTUAL_RT);
#undef CLOCK_EVENT
default:
g_assert_not_reached();
}
}
/* Pretty print shutdown event names */
static const char *replay_shutdown_event_name(ShutdownCause cause)
{
switch (cause) {
#define SHUTDOWN_EVENT(_x) case SHUTDOWN_CAUSE_ ## _x: return "SHUTDOWN_CAUSE_" #_x
SHUTDOWN_EVENT(NONE);
SHUTDOWN_EVENT(HOST_ERROR);
SHUTDOWN_EVENT(HOST_QMP_QUIT);
SHUTDOWN_EVENT(HOST_QMP_SYSTEM_RESET);
SHUTDOWN_EVENT(HOST_SIGNAL);
SHUTDOWN_EVENT(HOST_UI);
SHUTDOWN_EVENT(GUEST_SHUTDOWN);
SHUTDOWN_EVENT(GUEST_RESET);
SHUTDOWN_EVENT(GUEST_PANIC);
SHUTDOWN_EVENT(SUBSYSTEM_RESET);
SHUTDOWN_EVENT(SNAPSHOT_LOAD);
#undef SHUTDOWN_EVENT
default:
g_assert_not_reached();
}
}
static const char *replay_checkpoint_event_name(enum ReplayCheckpoint checkpoint)
{
switch (checkpoint) {
#define CHECKPOINT_EVENT(_x) case CHECKPOINT_ ## _x: return "CHECKPOINT_" #_x
CHECKPOINT_EVENT(CLOCK_WARP_START);
CHECKPOINT_EVENT(CLOCK_WARP_ACCOUNT);
CHECKPOINT_EVENT(RESET_REQUESTED);
CHECKPOINT_EVENT(SUSPEND_REQUESTED);
CHECKPOINT_EVENT(CLOCK_VIRTUAL);
CHECKPOINT_EVENT(CLOCK_HOST);
CHECKPOINT_EVENT(CLOCK_VIRTUAL_RT);
CHECKPOINT_EVENT(INIT);
CHECKPOINT_EVENT(RESET);
#undef CHECKPOINT_EVENT
default:
g_assert_not_reached();
}
}
static const char *replay_event_name(enum ReplayEvents event)
{
/* First deal with the simple ones */
switch (event) {
#define EVENT(_x) case EVENT_ ## _x: return "EVENT_"#_x
EVENT(INSTRUCTION);
EVENT(INTERRUPT);
EVENT(EXCEPTION);
EVENT(CHAR_WRITE);
EVENT(CHAR_READ_ALL);
EVENT(AUDIO_OUT);
EVENT(AUDIO_IN);
EVENT(RANDOM);
#undef EVENT
default:
if (event >= EVENT_ASYNC && event <= EVENT_ASYNC_LAST) {
return replay_async_event_name(event - EVENT_ASYNC);
} else if (event >= EVENT_SHUTDOWN && event <= EVENT_SHUTDOWN_LAST) {
return replay_shutdown_event_name(event - EVENT_SHUTDOWN);
} else if (event >= EVENT_CLOCK && event <= EVENT_CLOCK_LAST) {
return replay_clock_event_name(event - EVENT_CLOCK);
} else if (event >= EVENT_CHECKPOINT && event <= EVENT_CHECKPOINT_LAST) {
return replay_checkpoint_event_name(event - EVENT_CHECKPOINT);
}
}
g_assert_not_reached();
}
bool replay_next_event_is(int event)
{
bool res = false;
@ -226,6 +327,15 @@ bool replay_has_event(void)
return res;
}
G_NORETURN void replay_sync_error(const char *error)
{
error_report("%s (insn total %"PRId64"/%d left, event %d is %s)", error,
replay_state.current_icount, replay_state.instruction_count,
replay_state.current_event,
replay_event_name(replay_state.data_kind));
abort();
}
static void replay_enable(const char *fname, int mode)
{
const char *fmode = NULL;
@ -258,6 +368,7 @@ static void replay_enable(const char *fname, int mode)
replay_state.data_kind = -1;
replay_state.instruction_count = 0;
replay_state.current_icount = 0;
replay_state.current_event = 0;
replay_state.has_unread_data = 0;
/* skip file header for RECORD and check it for PLAY */