diff --git a/arch_init.c b/arch_init.c index fad1c4099a..af1ae9f72a 100644 --- a/arch_init.c +++ b/arch_init.c @@ -582,12 +582,9 @@ static int ram_save_setup(QEMUFile *f, void *opaque) static int ram_save_iterate(QEMUFile *f, void *opaque) { - uint64_t bytes_transferred_last; - double bwidth = 0; int ret; int i; - uint64_t expected_downtime; - MigrationState *s = migrate_get_current(); + int64_t t0; qemu_mutex_lock_ramlist(); @@ -595,9 +592,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) reset_ram_globals(); } - bytes_transferred_last = bytes_transferred; - bwidth = qemu_get_clock_ns(rt_clock); - + t0 = qemu_get_clock_ns(rt_clock); i = 0; while ((ret = qemu_file_rate_limit(f)) == 0) { int bytes_sent; @@ -615,7 +610,7 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) iterations */ if ((i & 63) == 0) { - uint64_t t1 = (qemu_get_clock_ns(rt_clock) - bwidth) / 1000000; + uint64_t t1 = (qemu_get_clock_ns(rt_clock) - t0) / 1000000; if (t1 > MAX_WAIT) { DPRINTF("big wait: %" PRIu64 " milliseconds, %d iterations\n", t1, i); @@ -629,31 +624,10 @@ static int ram_save_iterate(QEMUFile *f, void *opaque) return ret; } - bwidth = qemu_get_clock_ns(rt_clock) - bwidth; - bwidth = (bytes_transferred - bytes_transferred_last) / bwidth; - - /* if we haven't transferred anything this round, force - * expected_downtime to a very high value, but without - * crashing */ - if (bwidth == 0) { - bwidth = 0.000001; - } - qemu_mutex_unlock_ramlist(); qemu_put_be64(f, RAM_SAVE_FLAG_EOS); - expected_downtime = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth; - DPRINTF("ram_save_live: expected(%" PRIu64 ") <= max(" PRIu64 ")?\n", - expected_downtime, migrate_max_downtime()); - - if (expected_downtime <= migrate_max_downtime()) { - migration_bitmap_sync(); - expected_downtime = ram_save_remaining() * TARGET_PAGE_SIZE / bwidth; - s->expected_downtime = expected_downtime / 1000000; /* ns -> ms */ - - return expected_downtime <= migrate_max_downtime(); - } - return 0; + return i; } static int ram_save_complete(QEMUFile *f, void *opaque) @@ -683,6 +657,19 @@ static int ram_save_complete(QEMUFile *f, void *opaque) return 0; } +static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) +{ + uint64_t remaining_size; + + remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; + + if (remaining_size < max_size) { + migration_bitmap_sync(); + remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; + } + return remaining_size; +} + static int load_xbzrle(QEMUFile *f, ram_addr_t addr, void *host) { int ret, rc = 0; @@ -869,6 +856,7 @@ SaveVMHandlers savevm_ram_handlers = { .save_live_setup = ram_save_setup, .save_live_iterate = ram_save_iterate, .save_live_complete = ram_save_complete, + .save_live_pending = ram_save_pending, .load_state = ram_load, .cancel = ram_migration_cancel, }; diff --git a/block-migration.c b/block-migration.c index ca4ba3fffb..6acf3e1682 100644 --- a/block-migration.c +++ b/block-migration.c @@ -77,9 +77,7 @@ typedef struct BlkMigState { int64_t total_sector_sum; int prev_progress; int bulk_completed; - long double total_time; long double prev_time_offset; - int reads; } BlkMigState; static BlkMigState block_mig_state; @@ -132,12 +130,6 @@ uint64_t blk_mig_bytes_total(void) return sum << BDRV_SECTOR_BITS; } -static inline long double compute_read_bwidth(void) -{ - assert(block_mig_state.total_time != 0); - return (block_mig_state.reads / block_mig_state.total_time) * BLOCK_SIZE; -} - static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector) { int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK; @@ -191,8 +183,6 @@ static void blk_mig_read_cb(void *opaque, int ret) blk->ret = ret; - block_mig_state.reads++; - block_mig_state.total_time += (curr_time - block_mig_state.prev_time_offset); block_mig_state.prev_time_offset = curr_time; QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry); @@ -310,8 +300,6 @@ static void init_blk_migration(QEMUFile *f) block_mig_state.total_sector_sum = 0; block_mig_state.prev_progress = -1; block_mig_state.bulk_completed = 0; - block_mig_state.total_time = 0; - block_mig_state.reads = 0; bdrv_iterate(init_blk_migration_it, NULL); } @@ -493,32 +481,6 @@ static int64_t get_remaining_dirty(void) return dirty * BLOCK_SIZE; } -static int is_stage2_completed(void) -{ - int64_t remaining_dirty; - long double bwidth; - - if (block_mig_state.bulk_completed == 1) { - - remaining_dirty = get_remaining_dirty(); - if (remaining_dirty == 0) { - return 1; - } - - bwidth = compute_read_bwidth(); - - if ((remaining_dirty / bwidth) <= - migrate_max_downtime()) { - /* finish stage2 because we think that we can finish remaining work - below max_downtime */ - - return 1; - } - } - - return 0; -} - static void blk_mig_cleanup(void) { BlkMigDevState *bmds; @@ -619,7 +581,7 @@ static int block_save_iterate(QEMUFile *f, void *opaque) qemu_put_be64(f, BLK_MIG_FLAG_EOS); - return is_stage2_completed(); + return 0; } static int block_save_complete(QEMUFile *f, void *opaque) @@ -659,6 +621,14 @@ static int block_save_complete(QEMUFile *f, void *opaque) return 0; } +static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) +{ + + DPRINTF("Enter save live pending %ld\n", get_remaining_dirty()); + + return get_remaining_dirty(); +} + static int block_load(QEMUFile *f, void *opaque, int version_id) { static int banner_printed; @@ -755,6 +725,7 @@ SaveVMHandlers savevm_block_handlers = { .save_live_setup = block_save_setup, .save_live_iterate = block_save_iterate, .save_live_complete = block_save_complete, + .save_live_pending = block_save_pending, .load_state = block_load, .cancel = block_migration_cancel, .is_active = block_is_active, diff --git a/buffered_file.c b/buffered_file.c index be9424b543..fdf7efa964 100644 --- a/buffered_file.c +++ b/buffered_file.c @@ -181,13 +181,15 @@ static int64_t buffered_get_rate_limit(void *opaque) return s->xfer_limit; } -/* 10ms xfer_limit is the limit that we should write each 10ms */ +/* 100ms xfer_limit is the limit that we should write each 100ms */ #define BUFFER_DELAY 100 static void *buffered_file_thread(void *opaque) { QEMUFileBuffered *s = opaque; - int64_t expire_time = qemu_get_clock_ms(rt_clock) + BUFFER_DELAY; + int64_t initial_time = qemu_get_clock_ms(rt_clock); + int64_t max_size = 0; + bool last_round = false; while (true) { int64_t current_time = qemu_get_clock_ms(rt_clock); @@ -195,13 +197,22 @@ static void *buffered_file_thread(void *opaque) if (s->migration_state->complete) { break; } - if (current_time >= expire_time) { + if (current_time >= initial_time + BUFFER_DELAY) { + uint64_t transferred_bytes = s->bytes_xfer; + uint64_t time_spent = current_time - initial_time; + double bandwidth = transferred_bytes / time_spent; + max_size = bandwidth * migrate_max_downtime() / 1000000; + + DPRINTF("transferred %" PRIu64 " time_spent %" PRIu64 + " bandwidth %g max_size %" PRId64 "\n", + transferred_bytes, time_spent, bandwidth, max_size); + s->bytes_xfer = 0; - expire_time = current_time + BUFFER_DELAY; + initial_time = current_time; } - if (s->bytes_xfer >= s->xfer_limit) { + if (!last_round && (s->bytes_xfer >= s->xfer_limit)) { /* usleep expects microseconds */ - g_usleep((expire_time - current_time)*1000); + g_usleep((initial_time + BUFFER_DELAY - current_time)*1000); } if (buffered_flush(s) < 0) { break; @@ -210,7 +221,7 @@ static void *buffered_file_thread(void *opaque) DPRINTF("file is ready\n"); if (s->bytes_xfer < s->xfer_limit) { DPRINTF("notifying client\n"); - migrate_fd_put_ready(s->migration_state); + last_round = migrate_fd_put_ready(s->migration_state, max_size); } } diff --git a/include/migration/migration.h b/include/migration/migration.h index 92190f2afd..9571ec532c 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -81,7 +81,7 @@ void migrate_fd_connect(MigrationState *s); ssize_t migrate_fd_put_buffer(MigrationState *s, const void *data, size_t size); -void migrate_fd_put_ready(MigrationState *s); +bool migrate_fd_put_ready(MigrationState *s, uint64_t max_size); int migrate_fd_close(MigrationState *s); void add_migration_state_change_notifier(Notifier *notify); diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index 623af0a29a..f27276c2d8 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -35,6 +35,7 @@ typedef struct SaveVMHandlers { int (*save_live_setup)(QEMUFile *f, void *opaque); int (*save_live_iterate)(QEMUFile *f, void *opaque); int (*save_live_complete)(QEMUFile *f, void *opaque); + uint64_t (*save_live_pending)(QEMUFile *f, void *opaque, uint64_t max_size); void (*cancel)(void *opaque); LoadStateHandler *load_state; bool (*is_active)(void *opaque); diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index 8eaa4707a0..28a783e2be 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -78,6 +78,7 @@ int qemu_savevm_state_begin(QEMUFile *f, int qemu_savevm_state_iterate(QEMUFile *f); int qemu_savevm_state_complete(QEMUFile *f); void qemu_savevm_state_cancel(QEMUFile *f); +uint64_t qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size); int qemu_loadvm_state(QEMUFile *f); /* SLIRP */ diff --git a/migration.c b/migration.c index 11123bcea0..b6374ae072 100644 --- a/migration.c +++ b/migration.c @@ -316,15 +316,17 @@ ssize_t migrate_fd_put_buffer(MigrationState *s, const void *data, return ret; } -void migrate_fd_put_ready(MigrationState *s) +bool migrate_fd_put_ready(MigrationState *s, uint64_t max_size) { int ret; + uint64_t pending_size; + bool last_round = false; qemu_mutex_lock_iothread(); if (s->state != MIG_STATE_ACTIVE) { DPRINTF("put_ready returning because of non-active state\n"); qemu_mutex_unlock_iothread(); - return; + return false; } if (s->first_time) { s->first_time = false; @@ -334,15 +336,19 @@ void migrate_fd_put_ready(MigrationState *s) DPRINTF("failed, %d\n", ret); migrate_fd_error(s); qemu_mutex_unlock_iothread(); - return; + return false; } } DPRINTF("iterate\n"); - ret = qemu_savevm_state_iterate(s->file); - if (ret < 0) { - migrate_fd_error(s); - } else if (ret == 1) { + pending_size = qemu_savevm_state_pending(s->file, max_size); + DPRINTF("pending size %lu max %lu\n", pending_size, max_size); + if (pending_size >= max_size) { + ret = qemu_savevm_state_iterate(s->file); + if (ret < 0) { + migrate_fd_error(s); + } + } else { int old_vm_running = runstate_is_running(); int64_t start_time, end_time; @@ -368,9 +374,11 @@ void migrate_fd_put_ready(MigrationState *s) vm_start(); } } + last_round = true; } qemu_mutex_unlock_iothread(); + return last_round; } static void migrate_fd_cancel(MigrationState *s) diff --git a/savevm.c b/savevm.c index c93d0b4fde..bcdb92ee81 100644 --- a/savevm.c +++ b/savevm.c @@ -1753,6 +1753,25 @@ int qemu_savevm_state_complete(QEMUFile *f) return qemu_file_get_error(f); } +uint64_t qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size) +{ + SaveStateEntry *se; + uint64_t ret = 0; + + QTAILQ_FOREACH(se, &savevm_handlers, entry) { + if (!se->ops || !se->ops->save_live_pending) { + continue; + } + if (se->ops && se->ops->is_active) { + if (!se->ops->is_active(se->opaque)) { + continue; + } + } + ret += se->ops->save_live_pending(f, se->opaque, max_size); + } + return ret; +} + void qemu_savevm_state_cancel(QEMUFile *f) { SaveStateEntry *se;