migration: Split save_live_pending() into state_pending_*

We split the function into to:

- state_pending_estimate: We estimate the remaining state size without
  stopping the machine.

- state pending_exact: We calculate the exact amount of remaining
  state.

The only "device" that implements different functions for _estimate()
and _exact() is ram.

Signed-off-by: Juan Quintela <quintela@redhat.com>
Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
This commit is contained in:
Juan Quintela 2022-10-03 02:00:03 +02:00
parent 255dc7af7e
commit c8df4a7aef
13 changed files with 143 additions and 76 deletions

View File

@ -482,15 +482,17 @@ An iterative device must provide:
- A ``load_setup`` function that initialises the data structures on the
destination.
- A ``save_live_pending`` function that is called repeatedly and must
indicate how much more data the iterative data must save. The core
migration code will use this to determine when to pause the CPUs
and complete the migration.
- A ``state_pending_exact`` function that indicates how much more
data we must save. The core migration code will use this to
determine when to pause the CPUs and complete the migration.
- A ``save_live_iterate`` function (called after ``save_live_pending``
when there is significant data still to be sent). It should send
a chunk of data until the point that stream bandwidth limits tell it
to stop. Each call generates one section.
- A ``state_pending_estimate`` function that indicates how much more
data we must save. When the estimated amount is smaller than the
threshold, we call ``state_pending_exact``.
- A ``save_live_iterate`` function should send a chunk of data until
the point that stream bandwidth limits tell it to stop. Each call
generates one section.
- A ``save_live_complete_precopy`` function that must transmit the
last section for the device containing any remaining data.

View File

@ -28,7 +28,7 @@ VFIO implements the device hooks for the iterative approach as follows:
* A ``load_setup`` function that sets up the migration region on the
destination and sets _RESUMING flag in the VFIO device state.
* A ``save_live_pending`` function that reads pending_bytes from the vendor
* A ``state_pending_exact`` function that reads pending_bytes from the vendor
driver, which indicates the amount of data that the vendor driver has yet to
save for the VFIO device.
@ -114,7 +114,7 @@ Live migration save path
(RUNNING, _SETUP, _RUNNING|_SAVING)
|
(RUNNING, _ACTIVE, _RUNNING|_SAVING)
If device is active, get pending_bytes by .save_live_pending()
If device is active, get pending_bytes by .state_pending_exact()
If total pending_bytes >= threshold_size, call .save_live_iterate()
Data of VFIO device for pre-copy phase is copied
Iterate till total pending bytes converge and are less than threshold

View File

@ -182,10 +182,10 @@ static int cmma_save_setup(QEMUFile *f, void *opaque)
return 0;
}
static void cmma_save_pending(void *opaque, uint64_t max_size,
uint64_t *res_precopy_only,
uint64_t *res_compatible,
uint64_t *res_postcopy_only)
static void cmma_state_pending(void *opaque, uint64_t max_size,
uint64_t *res_precopy_only,
uint64_t *res_compatible,
uint64_t *res_postcopy_only)
{
S390StAttribState *sas = S390_STATTRIB(opaque);
S390StAttribClass *sac = S390_STATTRIB_GET_CLASS(sas);
@ -371,7 +371,8 @@ static SaveVMHandlers savevm_s390_stattrib_handlers = {
.save_setup = cmma_save_setup,
.save_live_iterate = cmma_save_iterate,
.save_live_complete_precopy = cmma_save_complete,
.save_live_pending = cmma_save_pending,
.state_pending_exact = cmma_state_pending,
.state_pending_estimate = cmma_state_pending,
.save_cleanup = cmma_save_cleanup,
.load_state = cmma_load,
.is_active = cmma_active,

View File

@ -456,11 +456,11 @@ static void vfio_save_cleanup(void *opaque)
trace_vfio_save_cleanup(vbasedev->name);
}
static void vfio_save_pending(void *opaque,
uint64_t threshold_size,
uint64_t *res_precopy_only,
uint64_t *res_compatible,
uint64_t *res_postcopy_only)
static void vfio_state_pending(void *opaque,
uint64_t threshold_size,
uint64_t *res_precopy_only,
uint64_t *res_compatible,
uint64_t *res_postcopy_only)
{
VFIODevice *vbasedev = opaque;
VFIOMigration *migration = vbasedev->migration;
@ -473,7 +473,7 @@ static void vfio_save_pending(void *opaque,
*res_precopy_only += migration->pending_bytes;
trace_vfio_save_pending(vbasedev->name, *res_precopy_only,
trace_vfio_state_pending(vbasedev->name, *res_precopy_only,
*res_postcopy_only, *res_compatible);
}
@ -515,9 +515,9 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque)
}
/*
* Reset pending_bytes as .save_live_pending is not called during savevm or
* snapshot case, in such case vfio_update_pending() at the start of this
* function updates pending_bytes.
* Reset pending_bytes as state_pending* are not called during
* savevm or snapshot case, in such case vfio_update_pending() at
* the start of this function updates pending_bytes.
*/
migration->pending_bytes = 0;
trace_vfio_save_iterate(vbasedev->name, data_size);
@ -685,7 +685,8 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
static SaveVMHandlers savevm_vfio_handlers = {
.save_setup = vfio_save_setup,
.save_cleanup = vfio_save_cleanup,
.save_live_pending = vfio_save_pending,
.state_pending_exact = vfio_state_pending,
.state_pending_estimate = vfio_state_pending,
.save_live_iterate = vfio_save_iterate,
.save_live_complete_precopy = vfio_save_complete_precopy,
.save_state = vfio_save_state,

View File

@ -157,7 +157,7 @@ vfio_save_cleanup(const char *name) " (%s)"
vfio_save_buffer(const char *name, uint64_t data_offset, uint64_t data_size, uint64_t pending) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64" pending 0x%"PRIx64
vfio_update_pending(const char *name, uint64_t pending) " (%s) pending 0x%"PRIx64
vfio_save_device_config_state(const char *name) " (%s)"
vfio_save_pending(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t compatible) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" compatible 0x%"PRIx64
vfio_state_pending(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t compatible) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" compatible 0x%"PRIx64
vfio_save_iterate(const char *name, int data_size) " (%s) data_size %d"
vfio_save_complete_precopy(const char *name) " (%s)"
vfio_load_device_config_state(const char *name) " (%s)"

View File

@ -46,11 +46,6 @@ typedef struct SaveVMHandlers {
/* This runs outside the iothread lock! */
int (*save_setup)(QEMUFile *f, void *opaque);
void (*save_live_pending)(void *opaque,
uint64_t threshold_size,
uint64_t *res_precopy_only,
uint64_t *res_compatible,
uint64_t *res_postcopy_only);
/* Note for save_live_pending:
* - res_precopy_only is for data which must be migrated in precopy phase
* or in stopped state, in other words - before target vm start
@ -61,8 +56,18 @@ typedef struct SaveVMHandlers {
* Sum of res_postcopy_only, res_compatible and res_postcopy_only is the
* whole amount of pending data.
*/
/* This estimates the remaining data to transfer */
void (*state_pending_estimate)(void *opaque,
uint64_t threshold_size,
uint64_t *res_precopy_only,
uint64_t *res_compatible,
uint64_t *res_postcopy_only);
/* This calculate the exact remaining data to transfer */
void (*state_pending_exact)(void *opaque,
uint64_t threshold_size,
uint64_t *res_precopy_only,
uint64_t *res_compatible,
uint64_t *res_postcopy_only);
LoadStateHandler *load_state;
int (*load_setup)(QEMUFile *f, void *opaque);
int (*load_cleanup)(void *opaque);

View File

@ -762,11 +762,11 @@ static int dirty_bitmap_save_complete(QEMUFile *f, void *opaque)
return 0;
}
static void dirty_bitmap_save_pending(void *opaque,
uint64_t max_size,
uint64_t *res_precopy_only,
uint64_t *res_compatible,
uint64_t *res_postcopy_only)
static void dirty_bitmap_state_pending(void *opaque,
uint64_t max_size,
uint64_t *res_precopy_only,
uint64_t *res_compatible,
uint64_t *res_postcopy_only)
{
DBMSaveState *s = &((DBMState *)opaque)->save;
SaveBitmapState *dbms;
@ -784,7 +784,7 @@ static void dirty_bitmap_save_pending(void *opaque,
qemu_mutex_unlock_iothread();
trace_dirty_bitmap_save_pending(pending, max_size);
trace_dirty_bitmap_state_pending(pending);
*res_postcopy_only += pending;
}
@ -1253,7 +1253,8 @@ static SaveVMHandlers savevm_dirty_bitmap_handlers = {
.save_live_complete_postcopy = dirty_bitmap_save_complete,
.save_live_complete_precopy = dirty_bitmap_save_complete,
.has_postcopy = dirty_bitmap_has_postcopy,
.save_live_pending = dirty_bitmap_save_pending,
.state_pending_exact = dirty_bitmap_state_pending,
.state_pending_estimate = dirty_bitmap_state_pending,
.save_live_iterate = dirty_bitmap_save_iterate,
.is_active_iterate = dirty_bitmap_is_active_iterate,
.load_state = dirty_bitmap_load,

View File

@ -863,10 +863,10 @@ static int block_save_complete(QEMUFile *f, void *opaque)
return 0;
}
static void block_save_pending(void *opaque, uint64_t max_size,
uint64_t *res_precopy_only,
uint64_t *res_compatible,
uint64_t *res_postcopy_only)
static void block_state_pending(void *opaque, uint64_t max_size,
uint64_t *res_precopy_only,
uint64_t *res_compatible,
uint64_t *res_postcopy_only)
{
/* Estimate pending number of bytes to send */
uint64_t pending;
@ -885,7 +885,7 @@ static void block_save_pending(void *opaque, uint64_t max_size,
pending = BLK_MIG_BLOCK_SIZE;
}
trace_migration_block_save_pending(pending);
trace_migration_block_state_pending(pending);
/* We don't do postcopy */
*res_precopy_only += pending;
}
@ -1020,7 +1020,8 @@ static SaveVMHandlers savevm_block_handlers = {
.save_setup = block_save_setup,
.save_live_iterate = block_save_iterate,
.save_live_complete_precopy = block_save_complete,
.save_live_pending = block_save_pending,
.state_pending_exact = block_state_pending,
.state_pending_estimate = block_state_pending,
.load_state = block_load,
.save_cleanup = block_migration_cleanup,
.is_active = block_is_active,

View File

@ -3778,15 +3778,23 @@ typedef enum {
*/
static MigIterateState migration_iteration_run(MigrationState *s)
{
uint64_t pending_size, pend_pre, pend_compat, pend_post;
uint64_t pend_pre, pend_compat, pend_post;
bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE;
qemu_savevm_state_pending(s->threshold_size, &pend_pre,
&pend_compat, &pend_post);
pending_size = pend_pre + pend_compat + pend_post;
qemu_savevm_state_pending_estimate(s->threshold_size, &pend_pre,
&pend_compat, &pend_post);
uint64_t pending_size = pend_pre + pend_compat + pend_post;
trace_migrate_pending(pending_size, s->threshold_size,
pend_pre, pend_compat, pend_post);
trace_migrate_pending_estimate(pending_size, s->threshold_size,
pend_pre, pend_compat, pend_post);
if (pend_pre + pend_compat <= s->threshold_size) {
qemu_savevm_state_pending_exact(s->threshold_size, &pend_pre,
&pend_compat, &pend_post);
pending_size = pend_pre + pend_compat + pend_post;
trace_migrate_pending_exact(pending_size, s->threshold_size,
pend_pre, pend_compat, pend_post);
}
if (pending_size && pending_size >= s->threshold_size) {
/* Still a significant amount to transfer */

View File

@ -3409,19 +3409,35 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
return 0;
}
static void ram_save_pending(void *opaque, uint64_t max_size,
uint64_t *res_precopy_only,
uint64_t *res_compatible,
uint64_t *res_postcopy_only)
static void ram_state_pending_estimate(void *opaque, uint64_t max_size,
uint64_t *res_precopy_only,
uint64_t *res_compatible,
uint64_t *res_postcopy_only)
{
RAMState **temp = opaque;
RAMState *rs = *temp;
uint64_t remaining_size;
remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
uint64_t remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
if (!migration_in_postcopy() &&
remaining_size < max_size) {
if (migrate_postcopy_ram()) {
/* We can do postcopy, and all the data is postcopiable */
*res_postcopy_only += remaining_size;
} else {
*res_precopy_only += remaining_size;
}
}
static void ram_state_pending_exact(void *opaque, uint64_t max_size,
uint64_t *res_precopy_only,
uint64_t *res_compatible,
uint64_t *res_postcopy_only)
{
RAMState **temp = opaque;
RAMState *rs = *temp;
uint64_t remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
if (!migration_in_postcopy()) {
qemu_mutex_lock_iothread();
WITH_RCU_READ_LOCK_GUARD() {
migration_bitmap_sync_precopy(rs);
@ -4577,7 +4593,8 @@ static SaveVMHandlers savevm_ram_handlers = {
.save_live_complete_postcopy = ram_save_complete,
.save_live_complete_precopy = ram_save_complete,
.has_postcopy = ram_has_postcopy,
.save_live_pending = ram_save_pending,
.state_pending_exact = ram_state_pending_exact,
.state_pending_estimate = ram_state_pending_estimate,
.load_state = ram_load,
.save_cleanup = ram_save_cleanup,
.load_setup = ram_load_setup,

View File

@ -1472,10 +1472,10 @@ flush:
* the result is split into the amount for units that can and
* for units that can't do postcopy.
*/
void qemu_savevm_state_pending(uint64_t threshold_size,
uint64_t *res_precopy_only,
uint64_t *res_compatible,
uint64_t *res_postcopy_only)
void qemu_savevm_state_pending_estimate(uint64_t threshold_size,
uint64_t *res_precopy_only,
uint64_t *res_compatible,
uint64_t *res_postcopy_only)
{
SaveStateEntry *se;
@ -1485,7 +1485,7 @@ void qemu_savevm_state_pending(uint64_t threshold_size,
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
if (!se->ops || !se->ops->save_live_pending) {
if (!se->ops || !se->ops->state_pending_exact) {
continue;
}
if (se->ops->is_active) {
@ -1493,9 +1493,35 @@ void qemu_savevm_state_pending(uint64_t threshold_size,
continue;
}
}
se->ops->save_live_pending(se->opaque, threshold_size,
res_precopy_only, res_compatible,
res_postcopy_only);
se->ops->state_pending_exact(se->opaque, threshold_size,
res_precopy_only, res_compatible,
res_postcopy_only);
}
}
void qemu_savevm_state_pending_exact(uint64_t threshold_size,
uint64_t *res_precopy_only,
uint64_t *res_compatible,
uint64_t *res_postcopy_only)
{
SaveStateEntry *se;
*res_precopy_only = 0;
*res_compatible = 0;
*res_postcopy_only = 0;
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
if (!se->ops || !se->ops->state_pending_estimate) {
continue;
}
if (se->ops->is_active) {
if (!se->ops->is_active(se->opaque)) {
continue;
}
}
se->ops->state_pending_estimate(se->opaque, threshold_size,
res_precopy_only, res_compatible,
res_postcopy_only);
}
}

View File

@ -40,10 +40,14 @@ void qemu_savevm_state_cleanup(void);
void qemu_savevm_state_complete_postcopy(QEMUFile *f);
int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
bool inactivate_disks);
void qemu_savevm_state_pending(uint64_t max_size,
uint64_t *res_precopy_only,
uint64_t *res_compatible,
uint64_t *res_postcopy_only);
void qemu_savevm_state_pending_exact(uint64_t threshold_size,
uint64_t *res_precopy_only,
uint64_t *res_compatible,
uint64_t *res_postcopy_only);
void qemu_savevm_state_pending_estimate(uint64_t thershold_size,
uint64_t *res_precopy_only,
uint64_t *res_compatible,
uint64_t *res_postcopy_only);
void qemu_savevm_send_ping(QEMUFile *f, uint32_t value);
void qemu_savevm_send_open_return_path(QEMUFile *f);
int qemu_savevm_send_packaged(QEMUFile *f, const uint8_t *buf, size_t len);

View File

@ -150,7 +150,8 @@ migrate_fd_cleanup(void) ""
migrate_fd_error(const char *error_desc) "error=%s"
migrate_fd_cancel(void) ""
migrate_handle_rp_req_pages(const char *rbname, size_t start, size_t len) "in %s at 0x%zx len 0x%zx"
migrate_pending(uint64_t size, uint64_t max, uint64_t pre, uint64_t compat, uint64_t post) "pending size %" PRIu64 " max %" PRIu64 " (pre = %" PRIu64 " compat=%" PRIu64 " post=%" PRIu64 ")"
migrate_pending_exact(uint64_t size, uint64_t max, uint64_t pre, uint64_t compat, uint64_t post) "exact pending size %" PRIu64 " max %" PRIu64 " (pre = %" PRIu64 " compat=%" PRIu64 " post=%" PRIu64 ")"
migrate_pending_estimate(uint64_t size, uint64_t max, uint64_t pre, uint64_t compat, uint64_t post) "estimate pending size %" PRIu64 " max %" PRIu64 " (pre = %" PRIu64 " compat=%" PRIu64 " post=%" PRIu64 ")"
migrate_send_rp_message(int msg_type, uint16_t len) "%d: len %d"
migrate_send_rp_recv_bitmap(char *name, int64_t size) "block '%s' size 0x%"PRIi64
migration_completion_file_err(void) ""
@ -330,7 +331,7 @@ send_bitmap_bits(uint32_t flags, uint64_t start_sector, uint32_t nr_sectors, uin
dirty_bitmap_save_iterate(int in_postcopy) "in postcopy: %d"
dirty_bitmap_save_complete_enter(void) ""
dirty_bitmap_save_complete_finish(void) ""
dirty_bitmap_save_pending(uint64_t pending, uint64_t max_size) "pending %" PRIu64 " max: %" PRIu64
dirty_bitmap_state_pending(uint64_t pending) "pending %" PRIu64
dirty_bitmap_load_complete(void) ""
dirty_bitmap_load_bits_enter(uint64_t first_sector, uint32_t nr_sectors) "chunk: %" PRIu64 " %" PRIu32
dirty_bitmap_load_bits_zeroes(void) ""
@ -355,7 +356,7 @@ migration_block_save_device_dirty(int64_t sector) "Error reading sector %" PRId6
migration_block_flush_blks(const char *action, int submitted, int read_done, int transferred) "%s submitted %d read_done %d transferred %d"
migration_block_save(const char *mig_stage, int submitted, int transferred) "Enter save live %s submitted %d transferred %d"
migration_block_save_complete(void) "Block migration completed"
migration_block_save_pending(uint64_t pending) "Enter save live pending %" PRIu64
migration_block_state_pending(uint64_t pending) "Enter save live pending %" PRIu64
# page_cache.c
migration_pagecache_init(int64_t max_num_items) "Setting cache buckets to %" PRId64