migration: Split save_live_pending() into state_pending_*
We split the function into to: - state_pending_estimate: We estimate the remaining state size without stopping the machine. - state pending_exact: We calculate the exact amount of remaining state. The only "device" that implements different functions for _estimate() and _exact() is ram. Signed-off-by: Juan Quintela <quintela@redhat.com> Reviewed-by: Dr. David Alan Gilbert <dgilbert@redhat.com>
This commit is contained in:
parent
255dc7af7e
commit
c8df4a7aef
@ -482,15 +482,17 @@ An iterative device must provide:
|
||||
- A ``load_setup`` function that initialises the data structures on the
|
||||
destination.
|
||||
|
||||
- A ``save_live_pending`` function that is called repeatedly and must
|
||||
indicate how much more data the iterative data must save. The core
|
||||
migration code will use this to determine when to pause the CPUs
|
||||
and complete the migration.
|
||||
- A ``state_pending_exact`` function that indicates how much more
|
||||
data we must save. The core migration code will use this to
|
||||
determine when to pause the CPUs and complete the migration.
|
||||
|
||||
- A ``save_live_iterate`` function (called after ``save_live_pending``
|
||||
when there is significant data still to be sent). It should send
|
||||
a chunk of data until the point that stream bandwidth limits tell it
|
||||
to stop. Each call generates one section.
|
||||
- A ``state_pending_estimate`` function that indicates how much more
|
||||
data we must save. When the estimated amount is smaller than the
|
||||
threshold, we call ``state_pending_exact``.
|
||||
|
||||
- A ``save_live_iterate`` function should send a chunk of data until
|
||||
the point that stream bandwidth limits tell it to stop. Each call
|
||||
generates one section.
|
||||
|
||||
- A ``save_live_complete_precopy`` function that must transmit the
|
||||
last section for the device containing any remaining data.
|
||||
|
@ -28,7 +28,7 @@ VFIO implements the device hooks for the iterative approach as follows:
|
||||
* A ``load_setup`` function that sets up the migration region on the
|
||||
destination and sets _RESUMING flag in the VFIO device state.
|
||||
|
||||
* A ``save_live_pending`` function that reads pending_bytes from the vendor
|
||||
* A ``state_pending_exact`` function that reads pending_bytes from the vendor
|
||||
driver, which indicates the amount of data that the vendor driver has yet to
|
||||
save for the VFIO device.
|
||||
|
||||
@ -114,7 +114,7 @@ Live migration save path
|
||||
(RUNNING, _SETUP, _RUNNING|_SAVING)
|
||||
|
|
||||
(RUNNING, _ACTIVE, _RUNNING|_SAVING)
|
||||
If device is active, get pending_bytes by .save_live_pending()
|
||||
If device is active, get pending_bytes by .state_pending_exact()
|
||||
If total pending_bytes >= threshold_size, call .save_live_iterate()
|
||||
Data of VFIO device for pre-copy phase is copied
|
||||
Iterate till total pending bytes converge and are less than threshold
|
||||
|
@ -182,7 +182,7 @@ static int cmma_save_setup(QEMUFile *f, void *opaque)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void cmma_save_pending(void *opaque, uint64_t max_size,
|
||||
static void cmma_state_pending(void *opaque, uint64_t max_size,
|
||||
uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
uint64_t *res_postcopy_only)
|
||||
@ -371,7 +371,8 @@ static SaveVMHandlers savevm_s390_stattrib_handlers = {
|
||||
.save_setup = cmma_save_setup,
|
||||
.save_live_iterate = cmma_save_iterate,
|
||||
.save_live_complete_precopy = cmma_save_complete,
|
||||
.save_live_pending = cmma_save_pending,
|
||||
.state_pending_exact = cmma_state_pending,
|
||||
.state_pending_estimate = cmma_state_pending,
|
||||
.save_cleanup = cmma_save_cleanup,
|
||||
.load_state = cmma_load,
|
||||
.is_active = cmma_active,
|
||||
|
@ -456,7 +456,7 @@ static void vfio_save_cleanup(void *opaque)
|
||||
trace_vfio_save_cleanup(vbasedev->name);
|
||||
}
|
||||
|
||||
static void vfio_save_pending(void *opaque,
|
||||
static void vfio_state_pending(void *opaque,
|
||||
uint64_t threshold_size,
|
||||
uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
@ -473,7 +473,7 @@ static void vfio_save_pending(void *opaque,
|
||||
|
||||
*res_precopy_only += migration->pending_bytes;
|
||||
|
||||
trace_vfio_save_pending(vbasedev->name, *res_precopy_only,
|
||||
trace_vfio_state_pending(vbasedev->name, *res_precopy_only,
|
||||
*res_postcopy_only, *res_compatible);
|
||||
}
|
||||
|
||||
@ -515,9 +515,9 @@ static int vfio_save_iterate(QEMUFile *f, void *opaque)
|
||||
}
|
||||
|
||||
/*
|
||||
* Reset pending_bytes as .save_live_pending is not called during savevm or
|
||||
* snapshot case, in such case vfio_update_pending() at the start of this
|
||||
* function updates pending_bytes.
|
||||
* Reset pending_bytes as state_pending* are not called during
|
||||
* savevm or snapshot case, in such case vfio_update_pending() at
|
||||
* the start of this function updates pending_bytes.
|
||||
*/
|
||||
migration->pending_bytes = 0;
|
||||
trace_vfio_save_iterate(vbasedev->name, data_size);
|
||||
@ -685,7 +685,8 @@ static int vfio_load_state(QEMUFile *f, void *opaque, int version_id)
|
||||
static SaveVMHandlers savevm_vfio_handlers = {
|
||||
.save_setup = vfio_save_setup,
|
||||
.save_cleanup = vfio_save_cleanup,
|
||||
.save_live_pending = vfio_save_pending,
|
||||
.state_pending_exact = vfio_state_pending,
|
||||
.state_pending_estimate = vfio_state_pending,
|
||||
.save_live_iterate = vfio_save_iterate,
|
||||
.save_live_complete_precopy = vfio_save_complete_precopy,
|
||||
.save_state = vfio_save_state,
|
||||
|
@ -157,7 +157,7 @@ vfio_save_cleanup(const char *name) " (%s)"
|
||||
vfio_save_buffer(const char *name, uint64_t data_offset, uint64_t data_size, uint64_t pending) " (%s) Offset 0x%"PRIx64" size 0x%"PRIx64" pending 0x%"PRIx64
|
||||
vfio_update_pending(const char *name, uint64_t pending) " (%s) pending 0x%"PRIx64
|
||||
vfio_save_device_config_state(const char *name) " (%s)"
|
||||
vfio_save_pending(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t compatible) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" compatible 0x%"PRIx64
|
||||
vfio_state_pending(const char *name, uint64_t precopy, uint64_t postcopy, uint64_t compatible) " (%s) precopy 0x%"PRIx64" postcopy 0x%"PRIx64" compatible 0x%"PRIx64
|
||||
vfio_save_iterate(const char *name, int data_size) " (%s) data_size %d"
|
||||
vfio_save_complete_precopy(const char *name) " (%s)"
|
||||
vfio_load_device_config_state(const char *name) " (%s)"
|
||||
|
@ -46,11 +46,6 @@ typedef struct SaveVMHandlers {
|
||||
|
||||
/* This runs outside the iothread lock! */
|
||||
int (*save_setup)(QEMUFile *f, void *opaque);
|
||||
void (*save_live_pending)(void *opaque,
|
||||
uint64_t threshold_size,
|
||||
uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
uint64_t *res_postcopy_only);
|
||||
/* Note for save_live_pending:
|
||||
* - res_precopy_only is for data which must be migrated in precopy phase
|
||||
* or in stopped state, in other words - before target vm start
|
||||
@ -61,8 +56,18 @@ typedef struct SaveVMHandlers {
|
||||
* Sum of res_postcopy_only, res_compatible and res_postcopy_only is the
|
||||
* whole amount of pending data.
|
||||
*/
|
||||
|
||||
|
||||
/* This estimates the remaining data to transfer */
|
||||
void (*state_pending_estimate)(void *opaque,
|
||||
uint64_t threshold_size,
|
||||
uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
uint64_t *res_postcopy_only);
|
||||
/* This calculate the exact remaining data to transfer */
|
||||
void (*state_pending_exact)(void *opaque,
|
||||
uint64_t threshold_size,
|
||||
uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
uint64_t *res_postcopy_only);
|
||||
LoadStateHandler *load_state;
|
||||
int (*load_setup)(QEMUFile *f, void *opaque);
|
||||
int (*load_cleanup)(void *opaque);
|
||||
|
@ -762,7 +762,7 @@ static int dirty_bitmap_save_complete(QEMUFile *f, void *opaque)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void dirty_bitmap_save_pending(void *opaque,
|
||||
static void dirty_bitmap_state_pending(void *opaque,
|
||||
uint64_t max_size,
|
||||
uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
@ -784,7 +784,7 @@ static void dirty_bitmap_save_pending(void *opaque,
|
||||
|
||||
qemu_mutex_unlock_iothread();
|
||||
|
||||
trace_dirty_bitmap_save_pending(pending, max_size);
|
||||
trace_dirty_bitmap_state_pending(pending);
|
||||
|
||||
*res_postcopy_only += pending;
|
||||
}
|
||||
@ -1253,7 +1253,8 @@ static SaveVMHandlers savevm_dirty_bitmap_handlers = {
|
||||
.save_live_complete_postcopy = dirty_bitmap_save_complete,
|
||||
.save_live_complete_precopy = dirty_bitmap_save_complete,
|
||||
.has_postcopy = dirty_bitmap_has_postcopy,
|
||||
.save_live_pending = dirty_bitmap_save_pending,
|
||||
.state_pending_exact = dirty_bitmap_state_pending,
|
||||
.state_pending_estimate = dirty_bitmap_state_pending,
|
||||
.save_live_iterate = dirty_bitmap_save_iterate,
|
||||
.is_active_iterate = dirty_bitmap_is_active_iterate,
|
||||
.load_state = dirty_bitmap_load,
|
||||
|
@ -863,7 +863,7 @@ static int block_save_complete(QEMUFile *f, void *opaque)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void block_save_pending(void *opaque, uint64_t max_size,
|
||||
static void block_state_pending(void *opaque, uint64_t max_size,
|
||||
uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
uint64_t *res_postcopy_only)
|
||||
@ -885,7 +885,7 @@ static void block_save_pending(void *opaque, uint64_t max_size,
|
||||
pending = BLK_MIG_BLOCK_SIZE;
|
||||
}
|
||||
|
||||
trace_migration_block_save_pending(pending);
|
||||
trace_migration_block_state_pending(pending);
|
||||
/* We don't do postcopy */
|
||||
*res_precopy_only += pending;
|
||||
}
|
||||
@ -1020,7 +1020,8 @@ static SaveVMHandlers savevm_block_handlers = {
|
||||
.save_setup = block_save_setup,
|
||||
.save_live_iterate = block_save_iterate,
|
||||
.save_live_complete_precopy = block_save_complete,
|
||||
.save_live_pending = block_save_pending,
|
||||
.state_pending_exact = block_state_pending,
|
||||
.state_pending_estimate = block_state_pending,
|
||||
.load_state = block_load,
|
||||
.save_cleanup = block_migration_cleanup,
|
||||
.is_active = block_is_active,
|
||||
|
@ -3778,15 +3778,23 @@ typedef enum {
|
||||
*/
|
||||
static MigIterateState migration_iteration_run(MigrationState *s)
|
||||
{
|
||||
uint64_t pending_size, pend_pre, pend_compat, pend_post;
|
||||
uint64_t pend_pre, pend_compat, pend_post;
|
||||
bool in_postcopy = s->state == MIGRATION_STATUS_POSTCOPY_ACTIVE;
|
||||
|
||||
qemu_savevm_state_pending(s->threshold_size, &pend_pre,
|
||||
qemu_savevm_state_pending_estimate(s->threshold_size, &pend_pre,
|
||||
&pend_compat, &pend_post);
|
||||
uint64_t pending_size = pend_pre + pend_compat + pend_post;
|
||||
|
||||
trace_migrate_pending_estimate(pending_size, s->threshold_size,
|
||||
pend_pre, pend_compat, pend_post);
|
||||
|
||||
if (pend_pre + pend_compat <= s->threshold_size) {
|
||||
qemu_savevm_state_pending_exact(s->threshold_size, &pend_pre,
|
||||
&pend_compat, &pend_post);
|
||||
pending_size = pend_pre + pend_compat + pend_post;
|
||||
|
||||
trace_migrate_pending(pending_size, s->threshold_size,
|
||||
trace_migrate_pending_exact(pending_size, s->threshold_size,
|
||||
pend_pre, pend_compat, pend_post);
|
||||
}
|
||||
|
||||
if (pending_size && pending_size >= s->threshold_size) {
|
||||
/* Still a significant amount to transfer */
|
||||
|
@ -3409,19 +3409,35 @@ static int ram_save_complete(QEMUFile *f, void *opaque)
|
||||
return 0;
|
||||
}
|
||||
|
||||
static void ram_save_pending(void *opaque, uint64_t max_size,
|
||||
static void ram_state_pending_estimate(void *opaque, uint64_t max_size,
|
||||
uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
uint64_t *res_postcopy_only)
|
||||
{
|
||||
RAMState **temp = opaque;
|
||||
RAMState *rs = *temp;
|
||||
uint64_t remaining_size;
|
||||
|
||||
remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
|
||||
uint64_t remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
|
||||
|
||||
if (!migration_in_postcopy() &&
|
||||
remaining_size < max_size) {
|
||||
if (migrate_postcopy_ram()) {
|
||||
/* We can do postcopy, and all the data is postcopiable */
|
||||
*res_postcopy_only += remaining_size;
|
||||
} else {
|
||||
*res_precopy_only += remaining_size;
|
||||
}
|
||||
}
|
||||
|
||||
static void ram_state_pending_exact(void *opaque, uint64_t max_size,
|
||||
uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
uint64_t *res_postcopy_only)
|
||||
{
|
||||
RAMState **temp = opaque;
|
||||
RAMState *rs = *temp;
|
||||
|
||||
uint64_t remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE;
|
||||
|
||||
if (!migration_in_postcopy()) {
|
||||
qemu_mutex_lock_iothread();
|
||||
WITH_RCU_READ_LOCK_GUARD() {
|
||||
migration_bitmap_sync_precopy(rs);
|
||||
@ -4577,7 +4593,8 @@ static SaveVMHandlers savevm_ram_handlers = {
|
||||
.save_live_complete_postcopy = ram_save_complete,
|
||||
.save_live_complete_precopy = ram_save_complete,
|
||||
.has_postcopy = ram_has_postcopy,
|
||||
.save_live_pending = ram_save_pending,
|
||||
.state_pending_exact = ram_state_pending_exact,
|
||||
.state_pending_estimate = ram_state_pending_estimate,
|
||||
.load_state = ram_load,
|
||||
.save_cleanup = ram_save_cleanup,
|
||||
.load_setup = ram_load_setup,
|
||||
|
@ -1472,7 +1472,7 @@ flush:
|
||||
* the result is split into the amount for units that can and
|
||||
* for units that can't do postcopy.
|
||||
*/
|
||||
void qemu_savevm_state_pending(uint64_t threshold_size,
|
||||
void qemu_savevm_state_pending_estimate(uint64_t threshold_size,
|
||||
uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
uint64_t *res_postcopy_only)
|
||||
@ -1485,7 +1485,7 @@ void qemu_savevm_state_pending(uint64_t threshold_size,
|
||||
|
||||
|
||||
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
|
||||
if (!se->ops || !se->ops->save_live_pending) {
|
||||
if (!se->ops || !se->ops->state_pending_exact) {
|
||||
continue;
|
||||
}
|
||||
if (se->ops->is_active) {
|
||||
@ -1493,7 +1493,33 @@ void qemu_savevm_state_pending(uint64_t threshold_size,
|
||||
continue;
|
||||
}
|
||||
}
|
||||
se->ops->save_live_pending(se->opaque, threshold_size,
|
||||
se->ops->state_pending_exact(se->opaque, threshold_size,
|
||||
res_precopy_only, res_compatible,
|
||||
res_postcopy_only);
|
||||
}
|
||||
}
|
||||
|
||||
void qemu_savevm_state_pending_exact(uint64_t threshold_size,
|
||||
uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
uint64_t *res_postcopy_only)
|
||||
{
|
||||
SaveStateEntry *se;
|
||||
|
||||
*res_precopy_only = 0;
|
||||
*res_compatible = 0;
|
||||
*res_postcopy_only = 0;
|
||||
|
||||
QTAILQ_FOREACH(se, &savevm_state.handlers, entry) {
|
||||
if (!se->ops || !se->ops->state_pending_estimate) {
|
||||
continue;
|
||||
}
|
||||
if (se->ops->is_active) {
|
||||
if (!se->ops->is_active(se->opaque)) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
se->ops->state_pending_estimate(se->opaque, threshold_size,
|
||||
res_precopy_only, res_compatible,
|
||||
res_postcopy_only);
|
||||
}
|
||||
|
@ -40,7 +40,11 @@ void qemu_savevm_state_cleanup(void);
|
||||
void qemu_savevm_state_complete_postcopy(QEMUFile *f);
|
||||
int qemu_savevm_state_complete_precopy(QEMUFile *f, bool iterable_only,
|
||||
bool inactivate_disks);
|
||||
void qemu_savevm_state_pending(uint64_t max_size,
|
||||
void qemu_savevm_state_pending_exact(uint64_t threshold_size,
|
||||
uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
uint64_t *res_postcopy_only);
|
||||
void qemu_savevm_state_pending_estimate(uint64_t thershold_size,
|
||||
uint64_t *res_precopy_only,
|
||||
uint64_t *res_compatible,
|
||||
uint64_t *res_postcopy_only);
|
||||
|
@ -150,7 +150,8 @@ migrate_fd_cleanup(void) ""
|
||||
migrate_fd_error(const char *error_desc) "error=%s"
|
||||
migrate_fd_cancel(void) ""
|
||||
migrate_handle_rp_req_pages(const char *rbname, size_t start, size_t len) "in %s at 0x%zx len 0x%zx"
|
||||
migrate_pending(uint64_t size, uint64_t max, uint64_t pre, uint64_t compat, uint64_t post) "pending size %" PRIu64 " max %" PRIu64 " (pre = %" PRIu64 " compat=%" PRIu64 " post=%" PRIu64 ")"
|
||||
migrate_pending_exact(uint64_t size, uint64_t max, uint64_t pre, uint64_t compat, uint64_t post) "exact pending size %" PRIu64 " max %" PRIu64 " (pre = %" PRIu64 " compat=%" PRIu64 " post=%" PRIu64 ")"
|
||||
migrate_pending_estimate(uint64_t size, uint64_t max, uint64_t pre, uint64_t compat, uint64_t post) "estimate pending size %" PRIu64 " max %" PRIu64 " (pre = %" PRIu64 " compat=%" PRIu64 " post=%" PRIu64 ")"
|
||||
migrate_send_rp_message(int msg_type, uint16_t len) "%d: len %d"
|
||||
migrate_send_rp_recv_bitmap(char *name, int64_t size) "block '%s' size 0x%"PRIi64
|
||||
migration_completion_file_err(void) ""
|
||||
@ -330,7 +331,7 @@ send_bitmap_bits(uint32_t flags, uint64_t start_sector, uint32_t nr_sectors, uin
|
||||
dirty_bitmap_save_iterate(int in_postcopy) "in postcopy: %d"
|
||||
dirty_bitmap_save_complete_enter(void) ""
|
||||
dirty_bitmap_save_complete_finish(void) ""
|
||||
dirty_bitmap_save_pending(uint64_t pending, uint64_t max_size) "pending %" PRIu64 " max: %" PRIu64
|
||||
dirty_bitmap_state_pending(uint64_t pending) "pending %" PRIu64
|
||||
dirty_bitmap_load_complete(void) ""
|
||||
dirty_bitmap_load_bits_enter(uint64_t first_sector, uint32_t nr_sectors) "chunk: %" PRIu64 " %" PRIu32
|
||||
dirty_bitmap_load_bits_zeroes(void) ""
|
||||
@ -355,7 +356,7 @@ migration_block_save_device_dirty(int64_t sector) "Error reading sector %" PRId6
|
||||
migration_block_flush_blks(const char *action, int submitted, int read_done, int transferred) "%s submitted %d read_done %d transferred %d"
|
||||
migration_block_save(const char *mig_stage, int submitted, int transferred) "Enter save live %s submitted %d transferred %d"
|
||||
migration_block_save_complete(void) "Block migration completed"
|
||||
migration_block_save_pending(uint64_t pending) "Enter save live pending %" PRIu64
|
||||
migration_block_state_pending(uint64_t pending) "Enter save live pending %" PRIu64
|
||||
|
||||
# page_cache.c
|
||||
migration_pagecache_init(int64_t max_num_items) "Setting cache buckets to %" PRId64
|
||||
|
Loading…
Reference in New Issue
Block a user