migration: Add per vmstate downtime tracepoints

We have a bunch of savevm_section* tracepoints, they're good to analyze
migration stream, but not always suitable if someone would like to analyze
the migration downtime.  Two major problems:

  - savevm_section* tracepoints are dumping all sections, we only care
    about the sections that contribute to the downtime

  - They don't have an identifier to show the type of sections, so no way
    to filter downtime information either easily.

We can add type into the tracepoints, but instead of doing so, this patch
kept them untouched, instead of adding a bunch of downtime specific
tracepoints, so one can enable "vmstate_downtime*" tracepoints and get a
full picture of how the downtime is distributed across iterative and
non-iterative vmstate save/load.

Note that here both save() and load() need to be traced, because both of
them may contribute to the downtime.  The contribution is not a simple "add
them together", though: consider when the src is doing a save() of device1
while the dest can be load()ing for device2, so they can happen
concurrently.

Tracking both sides make sense because device load() and save() can be
imbalanced, one device can save() super fast, but load() super slow, vice
versa.  We can't figure that out without tracing both.

Signed-off-by: Peter Xu <peterx@redhat.com>
Reviewed-by: Juan Quintela <quintela@redhat.com>
Signed-off-by: Juan Quintela <quintela@redhat.com>
Message-ID: <20231030163346.765724-4-peterx@redhat.com>
This commit is contained in:
Peter Xu 2023-10-30 12:33:44 -04:00 committed by Juan Quintela
parent e22ffad03a
commit 3c80f14272
2 changed files with 47 additions and 4 deletions

View File

@ -1491,6 +1491,7 @@ void qemu_savevm_state_complete_postcopy(QEMUFile *f)
static
int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
{
int64_t start_ts_each, end_ts_each;
SaveStateEntry *se;
int ret;
@ -1507,6 +1508,8 @@ int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
continue;
}
}
start_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
trace_savevm_section_start(se->idstr, se->section_id);
save_section_header(f, se, QEMU_VM_SECTION_END);
@ -1518,6 +1521,9 @@ int qemu_savevm_state_complete_precopy_iterable(QEMUFile *f, bool in_postcopy)
qemu_file_set_error(f, ret);
return -1;
}
end_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
trace_vmstate_downtime_save("iterable", se->idstr, se->instance_id,
end_ts_each - start_ts_each);
}
return 0;
@ -1528,6 +1534,7 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
bool inactivate_disks)
{
MigrationState *ms = migrate_get_current();
int64_t start_ts_each, end_ts_each;
JSONWriter *vmdesc = ms->vmdesc;
int vmdesc_len;
SaveStateEntry *se;
@ -1539,11 +1546,17 @@ int qemu_savevm_state_complete_precopy_non_iterable(QEMUFile *f,
continue;
}
start_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
ret = vmstate_save(f, se, vmdesc);
if (ret) {
qemu_file_set_error(f, ret);
return ret;
}
end_ts_each = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
trace_vmstate_downtime_save("non-iterable", se->idstr, se->instance_id,
end_ts_each - start_ts_each);
}
if (inactivate_disks) {
@ -2537,9 +2550,12 @@ static bool check_section_footer(QEMUFile *f, SaveStateEntry *se)
}
static int
qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis)
qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis,
uint8_t type)
{
bool trace_downtime = (type == QEMU_VM_SECTION_FULL);
uint32_t instance_id, version_id, section_id;
int64_t start_ts, end_ts;
SaveStateEntry *se;
char idstr[256];
int ret;
@ -2588,12 +2604,23 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis)
return -EINVAL;
}
if (trace_downtime) {
start_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
}
ret = vmstate_load(f, se);
if (ret < 0) {
error_report("error while loading state for instance 0x%"PRIx32" of"
" device '%s'", instance_id, idstr);
return ret;
}
if (trace_downtime) {
end_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
trace_vmstate_downtime_load("non-iterable", se->idstr,
se->instance_id, end_ts - start_ts);
}
if (!check_section_footer(f, se)) {
return -EINVAL;
}
@ -2602,8 +2629,11 @@ qemu_loadvm_section_start_full(QEMUFile *f, MigrationIncomingState *mis)
}
static int
qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis)
qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis,
uint8_t type)
{
bool trace_downtime = (type == QEMU_VM_SECTION_END);
int64_t start_ts, end_ts;
uint32_t section_id;
SaveStateEntry *se;
int ret;
@ -2628,12 +2658,23 @@ qemu_loadvm_section_part_end(QEMUFile *f, MigrationIncomingState *mis)
return -EINVAL;
}
if (trace_downtime) {
start_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
}
ret = vmstate_load(f, se);
if (ret < 0) {
error_report("error while loading state section id %d(%s)",
section_id, se->idstr);
return ret;
}
if (trace_downtime) {
end_ts = qemu_clock_get_us(QEMU_CLOCK_REALTIME);
trace_vmstate_downtime_load("iterable", se->idstr,
se->instance_id, end_ts - start_ts);
}
if (!check_section_footer(f, se)) {
return -EINVAL;
}
@ -2822,14 +2863,14 @@ retry:
switch (section_type) {
case QEMU_VM_SECTION_START:
case QEMU_VM_SECTION_FULL:
ret = qemu_loadvm_section_start_full(f, mis);
ret = qemu_loadvm_section_start_full(f, mis, section_type);
if (ret < 0) {
goto out;
}
break;
case QEMU_VM_SECTION_PART:
case QEMU_VM_SECTION_END:
ret = qemu_loadvm_section_part_end(f, mis);
ret = qemu_loadvm_section_part_end(f, mis, section_type);
if (ret < 0) {
goto out;
}

View File

@ -48,6 +48,8 @@ savevm_state_cleanup(void) ""
savevm_state_complete_precopy(void) ""
vmstate_save(const char *idstr, const char *vmsd_name) "%s, %s"
vmstate_load(const char *idstr, const char *vmsd_name) "%s, %s"
vmstate_downtime_save(const char *type, const char *idstr, uint32_t instance_id, int64_t downtime) "type=%s idstr=%s instance_id=%d downtime=%"PRIi64
vmstate_downtime_load(const char *type, const char *idstr, uint32_t instance_id, int64_t downtime) "type=%s idstr=%s instance_id=%d downtime=%"PRIi64
postcopy_pause_incoming(void) ""
postcopy_pause_incoming_continued(void) ""
postcopy_page_req_sync(void *host_addr) "sync page req %p"