From 5da5aad068def65b5e278a6380192d4bfe279585 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:07 +0100 Subject: [PATCH 01/46] migration: simplify while loop Unify the goto around the loop, with the exit condition at the end of it. Both can be expressed as "while (ret >= 0)". Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- migration.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) diff --git a/migration.c b/migration.c index 11725ae3fc..ba8b647dce 100644 --- a/migration.c +++ b/migration.c @@ -666,14 +666,9 @@ static void *buffered_file_thread(void *opaque) qemu_mutex_lock_iothread(); DPRINTF("beginning savevm\n"); ret = qemu_savevm_state_begin(s->file, &s->params); - if (ret < 0) { - DPRINTF("failed, %d\n", ret); - qemu_mutex_unlock_iothread(); - goto out; - } qemu_mutex_unlock_iothread(); - while (true) { + while (ret >= 0) { int64_t current_time; uint64_t pending_size; @@ -754,12 +749,8 @@ static void *buffered_file_thread(void *opaque) sleep_time += qemu_get_clock_ms(rt_clock) - current_time; } ret = buffered_flush(s); - if (ret < 0) { - break; - } } -out: if (ret < 0) { migrate_fd_error(s); } From 891518abd804401978e402d588733e282be960ad Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:08 +0100 Subject: [PATCH 02/46] migration: always use vm_stop_force_state vm_stop_force_state does: if (runstate_is_running()) { vm_stop(state); } else { runstate_set(state); } migration.c does: if (runstate_is_running()) { vm_stop(state); } else { vm_stop_force_state(state); } The code run is the same even if we always use vm_stop_force_state in migration.c. Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- migration.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/migration.c b/migration.c index ba8b647dce..65e8583517 100644 --- a/migration.c +++ b/migration.c @@ -699,11 +699,7 @@ static void *buffered_file_thread(void *opaque) DPRINTF("done iterating\n"); start_time = qemu_get_clock_ms(rt_clock); qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); - if (old_vm_running) { - vm_stop(RUN_STATE_FINISH_MIGRATE); - } else { - vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); - } + vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); ret = qemu_savevm_state_complete(s->file); if (ret < 0) { qemu_mutex_unlock_iothread(); From 7a2c17216cd5ae4c22844123b8e9360d517932f8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:09 +0100 Subject: [PATCH 03/46] migration: move more error handling to migrate_fd_cleanup The next patch will add more cases where qemu_savevm_state_cancel needs to be called; prepare for that already, the function can be called twice with no ill effect. Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- migration.c | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/migration.c b/migration.c index 65e8583517..10ce9fe6a2 100644 --- a/migration.c +++ b/migration.c @@ -260,7 +260,7 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, /* shared migration helpers */ -static int migrate_fd_cleanup(MigrationState *s) +static void migrate_fd_cleanup(MigrationState *s) { int ret = 0; @@ -271,7 +271,13 @@ static int migrate_fd_cleanup(MigrationState *s) } assert(s->fd == -1); - return ret; + if (ret < 0 && s->state == MIG_STATE_ACTIVE) { + s->state = MIG_STATE_ERROR; + } + + if (s->state != MIG_STATE_ACTIVE) { + qemu_savevm_state_cancel(); + } } void migrate_fd_error(MigrationState *s) @@ -285,9 +291,8 @@ void migrate_fd_error(MigrationState *s) static void migrate_fd_completed(MigrationState *s) { DPRINTF("setting completed state\n"); - if (migrate_fd_cleanup(s) < 0) { - s->state = MIG_STATE_ERROR; - } else { + migrate_fd_cleanup(s); + if (s->state == MIG_STATE_ACTIVE) { s->state = MIG_STATE_COMPLETED; runstate_set(RUN_STATE_POSTMIGRATE); } @@ -322,7 +327,6 @@ static void migrate_fd_cancel(MigrationState *s) s->state = MIG_STATE_CANCELLED; notifier_list_notify(&migration_state_notifiers, s); - qemu_savevm_state_cancel(); migrate_fd_cleanup(s); } From 04943ebaa9e4f5f9ac080198a7b0d25c6d7ac444 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:10 +0100 Subject: [PATCH 04/46] migration: push qemu_savevm_state_cancel out of qemu_savevm_state_* This is useful, because it lets us keep the cancellation callbacks inside the big lock while pushing the others out. Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- savevm.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/savevm.c b/savevm.c index a8a53efc9b..4302903e7c 100644 --- a/savevm.c +++ b/savevm.c @@ -1621,17 +1621,11 @@ int qemu_savevm_state_begin(QEMUFile *f, ret = se->ops->save_live_setup(f, se->opaque); if (ret < 0) { - qemu_savevm_state_cancel(); return ret; } } ret = qemu_file_get_error(f); - if (ret != 0) { - qemu_savevm_state_cancel(); - } - return ret; - } /* @@ -1677,9 +1671,6 @@ int qemu_savevm_state_iterate(QEMUFile *f) return ret; } ret = qemu_file_get_error(f); - if (ret != 0) { - qemu_savevm_state_cancel(); - } return ret; } @@ -1778,8 +1769,7 @@ static int qemu_savevm_state(QEMUFile *f) }; if (qemu_savevm_state_blocked(NULL)) { - ret = -EINVAL; - goto out; + return -EINVAL; } ret = qemu_savevm_state_begin(f, ¶ms); @@ -1798,6 +1788,9 @@ out: if (ret == 0) { ret = qemu_file_get_error(f); } + if (ret != 0) { + qemu_savevm_state_cancel(); + } return ret; } From d418cf57a3e699746ef0bfa772bbe8c7e17cebb5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:11 +0100 Subject: [PATCH 05/46] block-migration: remove useless calls to blk_mig_cleanup Now that the cancel callback is called consistently for all errors, we can avoid doing its work in the other callbacks. Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- block-migration.c | 26 ++++++++------------------ 1 file changed, 8 insertions(+), 18 deletions(-) diff --git a/block-migration.c b/block-migration.c index 43ab2028c0..e6c917d952 100644 --- a/block-migration.c +++ b/block-migration.c @@ -524,16 +524,10 @@ static int block_save_setup(QEMUFile *f, void *opaque) set_dirty_tracking(1); ret = flush_blks(f); - if (ret) { - blk_mig_cleanup(); - return ret; - } - blk_mig_reset_dirty_cursor(); - qemu_put_be64(f, BLK_MIG_FLAG_EOS); - return 0; + return ret; } static int block_save_iterate(QEMUFile *f, void *opaque) @@ -546,7 +540,6 @@ static int block_save_iterate(QEMUFile *f, void *opaque) ret = flush_blks(f); if (ret) { - blk_mig_cleanup(); return ret; } @@ -564,20 +557,18 @@ static int block_save_iterate(QEMUFile *f, void *opaque) } } else { ret = blk_mig_save_dirty_block(f, 1); + if (ret < 0) { + return ret; + } if (ret != 0) { /* no more dirty blocks */ break; } } } - if (ret < 0) { - blk_mig_cleanup(); - return ret; - } ret = flush_blks(f); if (ret) { - blk_mig_cleanup(); return ret; } @@ -595,7 +586,6 @@ static int block_save_complete(QEMUFile *f, void *opaque) ret = flush_blks(f); if (ret) { - blk_mig_cleanup(); return ret; } @@ -607,12 +597,11 @@ static int block_save_complete(QEMUFile *f, void *opaque) do { ret = blk_mig_save_dirty_block(f, 0); + if (ret < 0) { + return ret; + } } while (ret == 0); - blk_mig_cleanup(); - if (ret < 0) { - return ret; - } /* report completion */ qemu_put_be64(f, (100 << BDRV_SECTOR_BITS) | BLK_MIG_FLAG_PROGRESS); @@ -620,6 +609,7 @@ static int block_save_complete(QEMUFile *f, void *opaque) qemu_put_be64(f, BLK_MIG_FLAG_EOS); + blk_mig_cleanup(); return 0; } From 93bf21044c38134bc7d35577b675d9f2bdcb8419 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:12 +0100 Subject: [PATCH 06/46] qemu-file: pass errno from qemu_fflush via f->last_error This is done by almost all callers of qemu_fflush, move the code directly to qemu_fflush. Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- savevm.c | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/savevm.c b/savevm.c index 4302903e7c..a681177964 100644 --- a/savevm.c +++ b/savevm.c @@ -453,13 +453,13 @@ static void qemu_file_set_error(QEMUFile *f, int ret) /** Flushes QEMUFile buffer * */ -static int qemu_fflush(QEMUFile *f) +static void qemu_fflush(QEMUFile *f) { int ret = 0; - if (!f->ops->put_buffer) - return 0; - + if (!f->ops->put_buffer) { + return; + } if (f->is_write && f->buf_index > 0) { ret = f->ops->put_buffer(f->opaque, f->buf, f->buf_offset, f->buf_index); if (ret >= 0) { @@ -467,7 +467,9 @@ static int qemu_fflush(QEMUFile *f) } f->buf_index = 0; } - return ret; + if (ret < 0) { + qemu_file_set_error(f, ret); + } } static void qemu_fill_buffer(QEMUFile *f) @@ -518,7 +520,8 @@ int qemu_get_fd(QEMUFile *f) int qemu_fclose(QEMUFile *f) { int ret; - ret = qemu_fflush(f); + qemu_fflush(f); + ret = qemu_file_get_error(f); if (f->ops->close) { int ret2 = f->ops->close(f->opaque); @@ -560,9 +563,8 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size) buf += l; size -= l; if (f->buf_index >= IO_BUF_SIZE) { - int ret = qemu_fflush(f); - if (ret < 0) { - qemu_file_set_error(f, ret); + qemu_fflush(f); + if (qemu_file_get_error(f)) { break; } } @@ -584,10 +586,7 @@ void qemu_put_byte(QEMUFile *f, int v) f->buf[f->buf_index++] = v; f->is_write = 1; if (f->buf_index >= IO_BUF_SIZE) { - int ret = qemu_fflush(f); - if (ret < 0) { - qemu_file_set_error(f, ret); - } + qemu_fflush(f); } } From 47c8c17af883b5bd0f147cfcec8d7ef8ff76023b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:13 +0100 Subject: [PATCH 07/46] migration: use qemu_file_set_error to pass error codes back to qemu_savevm_state Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- include/sysemu/sysemu.h | 6 +++--- savevm.c | 44 +++++++++++++++++------------------------ 2 files changed, 21 insertions(+), 29 deletions(-) diff --git a/include/sysemu/sysemu.h b/include/sysemu/sysemu.h index b19ec952b4..6578782fc3 100644 --- a/include/sysemu/sysemu.h +++ b/include/sysemu/sysemu.h @@ -73,10 +73,10 @@ void do_info_snapshots(Monitor *mon, const QDict *qdict); void qemu_announce_self(void); bool qemu_savevm_state_blocked(Error **errp); -int qemu_savevm_state_begin(QEMUFile *f, - const MigrationParams *params); +void qemu_savevm_state_begin(QEMUFile *f, + const MigrationParams *params); int qemu_savevm_state_iterate(QEMUFile *f); -int qemu_savevm_state_complete(QEMUFile *f); +void qemu_savevm_state_complete(QEMUFile *f); void qemu_savevm_state_cancel(void); uint64_t qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size); int qemu_loadvm_state(QEMUFile *f); diff --git a/savevm.c b/savevm.c index a681177964..a1690b4ddc 100644 --- a/savevm.c +++ b/savevm.c @@ -1579,8 +1579,8 @@ bool qemu_savevm_state_blocked(Error **errp) return false; } -int qemu_savevm_state_begin(QEMUFile *f, - const MigrationParams *params) +void qemu_savevm_state_begin(QEMUFile *f, + const MigrationParams *params) { SaveStateEntry *se; int ret; @@ -1620,11 +1620,10 @@ int qemu_savevm_state_begin(QEMUFile *f, ret = se->ops->save_live_setup(f, se->opaque); if (ret < 0) { - return ret; + qemu_file_set_error(f, ret); + break; } } - ret = qemu_file_get_error(f); - return ret; } /* @@ -1658,6 +1657,9 @@ int qemu_savevm_state_iterate(QEMUFile *f) ret = se->ops->save_live_iterate(f, se->opaque); trace_savevm_section_end(se->section_id); + if (ret < 0) { + qemu_file_set_error(f, ret); + } if (ret <= 0) { /* Do not proceed to the next vmstate before this one reported completion of the current stage. This serializes the migration @@ -1666,14 +1668,10 @@ int qemu_savevm_state_iterate(QEMUFile *f) break; } } - if (ret != 0) { - return ret; - } - ret = qemu_file_get_error(f); return ret; } -int qemu_savevm_state_complete(QEMUFile *f) +void qemu_savevm_state_complete(QEMUFile *f) { SaveStateEntry *se; int ret; @@ -1697,7 +1695,8 @@ int qemu_savevm_state_complete(QEMUFile *f) ret = se->ops->save_live_complete(f, se->opaque); trace_savevm_section_end(se->section_id); if (ret < 0) { - return ret; + qemu_file_set_error(f, ret); + return; } } @@ -1725,8 +1724,6 @@ int qemu_savevm_state_complete(QEMUFile *f) } qemu_put_byte(f, QEMU_VM_EOF); - - return qemu_file_get_error(f); } uint64_t qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size) @@ -1771,26 +1768,21 @@ static int qemu_savevm_state(QEMUFile *f) return -EINVAL; } - ret = qemu_savevm_state_begin(f, ¶ms); - if (ret < 0) - goto out; + qemu_savevm_state_begin(f, ¶ms); + while (qemu_file_get_error(f) == 0) { + if (qemu_savevm_state_iterate(f) > 0) { + break; + } + } - do { - ret = qemu_savevm_state_iterate(f); - if (ret < 0) - goto out; - } while (ret == 0); - - ret = qemu_savevm_state_complete(f); - -out: + ret = qemu_file_get_error(f); if (ret == 0) { + qemu_savevm_state_complete(f); ret = qemu_file_get_error(f); } if (ret != 0) { qemu_savevm_state_cancel(); } - return ret; } From 4eb938102b3d533e142de23e255e46da1326fc5a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:14 +0100 Subject: [PATCH 08/46] qemu-file: temporarily expose qemu_file_set_error and qemu_fflush Right now, migration cannot entirely rely on QEMUFile's automatic drop of I/O after an error, because it does its "real" I/O outside the put_buffer callback. To fix this until buffering is gone, expose qemu_file_set_error which we will use in buffered_flush. Similarly, buffered_flush is not a complete flush because some data may still reside in the QEMUFile's own buffer. This somewhat complicates the process of closing the migration thread. Again, when buffering is gone buffered_flush will disappear and calling qemu_fflush will not be needed; in the meanwhile, we expose the function for use in migration.c. Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- include/migration/qemu-file.h | 2 ++ savevm.c | 4 ++-- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h index 46fc11dc99..5e0c287793 100644 --- a/include/migration/qemu-file.h +++ b/include/migration/qemu-file.h @@ -82,6 +82,7 @@ QEMUFile *qemu_popen_cmd(const char *command, const char *mode); int qemu_get_fd(QEMUFile *f); int qemu_fclose(QEMUFile *f); int64_t qemu_ftell(QEMUFile *f); +void qemu_fflush(QEMUFile *f); void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size); void qemu_put_byte(QEMUFile *f, int v); @@ -113,6 +114,7 @@ int qemu_file_rate_limit(QEMUFile *f); int64_t qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate); int64_t qemu_file_get_rate_limit(QEMUFile *f); int qemu_file_get_error(QEMUFile *f); +void qemu_file_set_error(QEMUFile *f, int ret); static inline void qemu_put_be64s(QEMUFile *f, const uint64_t *pv) { diff --git a/savevm.c b/savevm.c index a1690b4ddc..e10a045df6 100644 --- a/savevm.c +++ b/savevm.c @@ -443,7 +443,7 @@ int qemu_file_get_error(QEMUFile *f) return f->last_error; } -static void qemu_file_set_error(QEMUFile *f, int ret) +void qemu_file_set_error(QEMUFile *f, int ret) { if (f->last_error == 0) { f->last_error = ret; @@ -453,7 +453,7 @@ static void qemu_file_set_error(QEMUFile *f, int ret) /** Flushes QEMUFile buffer * */ -static void qemu_fflush(QEMUFile *f) +void qemu_fflush(QEMUFile *f) { int ret = 0; From f5821518ed6d49aae9fd0aa6169d2d74bb83054c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:15 +0100 Subject: [PATCH 09/46] migration: flush all data to fd when buffered_flush is called Including data that resided in the QEMUFile's own buffer. Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- migration.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/migration.c b/migration.c index 10ce9fe6a2..c5a7f29eb7 100644 --- a/migration.c +++ b/migration.c @@ -525,6 +525,8 @@ static ssize_t buffered_flush(MigrationState *s) DPRINTF("flushing %zu byte(s) of data\n", s->buffer_size); + qemu_fflush(s->file); + while (s->bytes_xfer < s->xfer_limit && offset < s->buffer_size) { size_t to_send = MIN(s->buffer_size - offset, s->xfer_limit - s->bytes_xfer); ret = migrate_fd_put_buffer(s, s->buffer + offset, to_send); From 63dfbd7ee03185c181a0791958ec9c8337089b55 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:16 +0100 Subject: [PATCH 10/46] migration: use qemu_file_set_error Remove the return value of buffered_flush, pass it via the error code of s->file. Once this is done, the error can be retrieved simply via migrate_fd_close's call to qemu_fclose. Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- migration.c | 22 ++++++---------------- 1 file changed, 6 insertions(+), 16 deletions(-) diff --git a/migration.c b/migration.c index c5a7f29eb7..939c3f7640 100644 --- a/migration.c +++ b/migration.c @@ -518,7 +518,7 @@ int64_t migrate_xbzrle_cache_size(void) /* migration thread support */ -static ssize_t buffered_flush(MigrationState *s) +static void buffered_flush(MigrationState *s) { size_t offset = 0; ssize_t ret = 0; @@ -545,9 +545,8 @@ static ssize_t buffered_flush(MigrationState *s) s->buffer_size -= offset; if (ret < 0) { - return ret; + qemu_file_set_error(s->file, ret); } - return offset; } static int buffered_put_buffer(void *opaque, const uint8_t *buf, @@ -586,25 +585,15 @@ static int buffered_put_buffer(void *opaque, const uint8_t *buf, static int buffered_close(void *opaque) { MigrationState *s = opaque; - ssize_t ret = 0; - int ret2; DPRINTF("closing\n"); s->xfer_limit = INT_MAX; while (!qemu_file_get_error(s->file) && s->buffer_size) { - ret = buffered_flush(s); - if (ret < 0) { - break; - } - } - - ret2 = migrate_fd_close(s); - if (ret >= 0) { - ret = ret2; + buffered_flush(s); } s->complete = true; - return ret; + return migrate_fd_close(s); } static int buffered_get_fd(void *opaque) @@ -750,7 +739,8 @@ static void *buffered_file_thread(void *opaque) g_usleep((initial_time + BUFFER_DELAY - current_time)*1000); sleep_time += qemu_get_clock_ms(rt_clock) - current_time; } - ret = buffered_flush(s); + buffered_flush(s); + ret = qemu_file_get_error(s->file); } if (ret < 0) { From dba433c03a0f5dc22a459435dd89557886298921 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:17 +0100 Subject: [PATCH 11/46] migration: simplify error handling Always use qemu_file_get_error to detect errors, since that is how QEMUFile itself drops I/O after an error occurs. There is no need to propagate and check return values all the time. Also remove the "complete" member, since we know that it is set (via migrate_fd_cleanup) only when the state changes. Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- include/migration/migration.h | 1 - migration.c | 46 +++++++++++------------------------ 2 files changed, 14 insertions(+), 33 deletions(-) diff --git a/include/migration/migration.h b/include/migration/migration.h index d1214097fe..3e680af7f6 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -54,7 +54,6 @@ struct MigrationState int64_t dirty_bytes_rate; bool enabled_capabilities[MIGRATION_CAPABILITY_MAX]; int64_t xbzrle_cache_size; - bool complete; }; void process_incoming_migration(QEMUFile *f); diff --git a/migration.c b/migration.c index 939c3f7640..18f97cfed3 100644 --- a/migration.c +++ b/migration.c @@ -525,6 +525,10 @@ static void buffered_flush(MigrationState *s) DPRINTF("flushing %zu byte(s) of data\n", s->buffer_size); + if (qemu_file_get_error(s->file)) { + s->buffer_size = 0; + return; + } qemu_fflush(s->file); while (s->bytes_xfer < s->xfer_limit && offset < s->buffer_size) { @@ -592,7 +596,6 @@ static int buffered_close(void *opaque) while (!qemu_file_get_error(s->file) && s->buffer_size) { buffered_flush(s); } - s->complete = true; return migrate_fd_close(s); } @@ -656,37 +659,21 @@ static void *buffered_file_thread(void *opaque) int64_t sleep_time = 0; int64_t max_size = 0; bool last_round = false; - int ret; qemu_mutex_lock_iothread(); DPRINTF("beginning savevm\n"); - ret = qemu_savevm_state_begin(s->file, &s->params); - qemu_mutex_unlock_iothread(); + qemu_savevm_state_begin(s->file, &s->params); - while (ret >= 0) { + while (s->state == MIG_STATE_ACTIVE) { int64_t current_time; uint64_t pending_size; - qemu_mutex_lock_iothread(); - if (s->state != MIG_STATE_ACTIVE) { - DPRINTF("put_ready returning because of non-active state\n"); - qemu_mutex_unlock_iothread(); - break; - } - if (s->complete) { - qemu_mutex_unlock_iothread(); - break; - } if (s->bytes_xfer < s->xfer_limit) { DPRINTF("iterate\n"); pending_size = qemu_savevm_state_pending(s->file, max_size); DPRINTF("pending size %lu max %lu\n", pending_size, max_size); if (pending_size && pending_size >= max_size) { - ret = qemu_savevm_state_iterate(s->file); - if (ret < 0) { - qemu_mutex_unlock_iothread(); - break; - } + qemu_savevm_state_iterate(s->file); } else { int old_vm_running = runstate_is_running(); int64_t start_time, end_time; @@ -695,13 +682,8 @@ static void *buffered_file_thread(void *opaque) start_time = qemu_get_clock_ms(rt_clock); qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); - ret = qemu_savevm_state_complete(s->file); - if (ret < 0) { - qemu_mutex_unlock_iothread(); - break; - } else { - migrate_fd_completed(s); - } + qemu_savevm_state_complete(s->file); + migrate_fd_completed(s); end_time = qemu_get_clock_ms(rt_clock); s->total_time = end_time - s->total_time; s->downtime = end_time - start_time; @@ -740,12 +722,13 @@ static void *buffered_file_thread(void *opaque) sleep_time += qemu_get_clock_ms(rt_clock) - current_time; } buffered_flush(s); - ret = qemu_file_get_error(s->file); + qemu_mutex_lock_iothread(); + if (qemu_file_get_error(s->file)) { + migrate_fd_error(s); + } } - if (ret < 0) { - migrate_fd_error(s); - } + qemu_mutex_unlock_iothread(); g_free(s->buffer); return NULL; } @@ -770,7 +753,6 @@ void migrate_fd_connect(MigrationState *s) s->expected_downtime = max_downtime/1000000; s->xfer_limit = s->bandwidth_limit / XFER_LIMIT_RATIO; - s->complete = false; s->file = qemu_fopen_ops(s, &buffered_file_ops); From a3fa1d78cbae2259491b17689812edcb643a3b30 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:18 +0100 Subject: [PATCH 12/46] migration: do not nest flushing of device data Completion of migration is currently done with a "nested" loop that invokes buffered_flush: migrate_fd_completed is called by buffered_file_thread, which calls migrate_fd_cleanup, which calls buffered_close (via qemu_fclose), which flushes the buffer. Simplify this, by reusing the buffered_flush call of buffered_file_thread. Then if qemu_savevm_state_complete was called, and the buffer is empty (including the QEMUFile buffer, for which we need the previous patch), we are done. Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- migration.c | 55 +++++++++++++++++++++++------------------------------ 1 file changed, 24 insertions(+), 31 deletions(-) diff --git a/migration.c b/migration.c index 18f97cfed3..9a234c7b08 100644 --- a/migration.c +++ b/migration.c @@ -262,41 +262,34 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, static void migrate_fd_cleanup(MigrationState *s) { - int ret = 0; - if (s->file) { DPRINTF("closing file\n"); - ret = qemu_fclose(s->file); + qemu_fclose(s->file); s->file = NULL; } assert(s->fd == -1); - if (ret < 0 && s->state == MIG_STATE_ACTIVE) { - s->state = MIG_STATE_ERROR; - } + assert(s->state != MIG_STATE_ACTIVE); - if (s->state != MIG_STATE_ACTIVE) { + if (s->state != MIG_STATE_COMPLETED) { qemu_savevm_state_cancel(); } + + notifier_list_notify(&migration_state_notifiers, s); } void migrate_fd_error(MigrationState *s) { DPRINTF("setting error state\n"); s->state = MIG_STATE_ERROR; - notifier_list_notify(&migration_state_notifiers, s); migrate_fd_cleanup(s); } static void migrate_fd_completed(MigrationState *s) { DPRINTF("setting completed state\n"); + s->state = MIG_STATE_COMPLETED; migrate_fd_cleanup(s); - if (s->state == MIG_STATE_ACTIVE) { - s->state = MIG_STATE_COMPLETED; - runstate_set(RUN_STATE_POSTMIGRATE); - } - notifier_list_notify(&migration_state_notifiers, s); } static ssize_t migrate_fd_put_buffer(MigrationState *s, const void *data, @@ -326,8 +319,6 @@ static void migrate_fd_cancel(MigrationState *s) DPRINTF("cancelling migration\n"); s->state = MIG_STATE_CANCELLED; - notifier_list_notify(&migration_state_notifiers, s); - migrate_fd_cleanup(s); } @@ -592,10 +583,6 @@ static int buffered_close(void *opaque) DPRINTF("closing\n"); - s->xfer_limit = INT_MAX; - while (!qemu_file_get_error(s->file) && s->buffer_size) { - buffered_flush(s); - } return migrate_fd_close(s); } @@ -658,6 +645,8 @@ static void *buffered_file_thread(void *opaque) int64_t initial_time = qemu_get_clock_ms(rt_clock); int64_t sleep_time = 0; int64_t max_size = 0; + int64_t start_time = initial_time; + bool old_vm_running = false; bool last_round = false; qemu_mutex_lock_iothread(); @@ -675,23 +664,13 @@ static void *buffered_file_thread(void *opaque) if (pending_size && pending_size >= max_size) { qemu_savevm_state_iterate(s->file); } else { - int old_vm_running = runstate_is_running(); - int64_t start_time, end_time; - DPRINTF("done iterating\n"); start_time = qemu_get_clock_ms(rt_clock); qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); + old_vm_running = runstate_is_running(); vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); + s->xfer_limit = INT_MAX; qemu_savevm_state_complete(s->file); - migrate_fd_completed(s); - end_time = qemu_get_clock_ms(rt_clock); - s->total_time = end_time - s->total_time; - s->downtime = end_time - start_time; - if (s->state != MIG_STATE_COMPLETED) { - if (old_vm_running) { - vm_start(); - } - } last_round = true; } } @@ -725,6 +704,20 @@ static void *buffered_file_thread(void *opaque) qemu_mutex_lock_iothread(); if (qemu_file_get_error(s->file)) { migrate_fd_error(s); + } else if (last_round && s->buffer_size == 0) { + migrate_fd_completed(s); + } + } + + if (s->state == MIG_STATE_COMPLETED) { + int64_t end_time = qemu_get_clock_ms(rt_clock); + s->total_time = end_time - s->total_time; + s->downtime = end_time - start_time; + runstate_set(RUN_STATE_POSTMIGRATE); + } else { + if (old_vm_running) { + assert(last_round); + vm_start(); } } From c09e5bb1d88ef38986bac7c6ed59dbd732cc4771 Mon Sep 17 00:00:00 2001 From: Kazuya Saito Date: Fri, 22 Feb 2013 17:36:19 +0100 Subject: [PATCH 13/46] migration: add migrate_set_state tracepoint Signed-off-by: Kazuya Saito Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- migration.c | 9 ++++++++- trace-events | 3 +++ 2 files changed, 11 insertions(+), 1 deletion(-) diff --git a/migration.c b/migration.c index 9a234c7b08..b091532152 100644 --- a/migration.c +++ b/migration.c @@ -23,6 +23,7 @@ #include "migration/block.h" #include "qemu/thread.h" #include "qmp-commands.h" +#include "trace.h" //#define DEBUG_MIGRATION @@ -282,6 +283,7 @@ void migrate_fd_error(MigrationState *s) { DPRINTF("setting error state\n"); s->state = MIG_STATE_ERROR; + trace_migrate_set_state(MIG_STATE_ERROR); migrate_fd_cleanup(s); } @@ -289,6 +291,7 @@ static void migrate_fd_completed(MigrationState *s) { DPRINTF("setting completed state\n"); s->state = MIG_STATE_COMPLETED; + trace_migrate_set_state(MIG_STATE_COMPLETED); migrate_fd_cleanup(s); } @@ -319,6 +322,7 @@ static void migrate_fd_cancel(MigrationState *s) DPRINTF("cancelling migration\n"); s->state = MIG_STATE_CANCELLED; + trace_migrate_set_state(MIG_STATE_CANCELLED); migrate_fd_cleanup(s); } @@ -377,8 +381,9 @@ static MigrationState *migrate_init(const MigrationParams *params) s->bandwidth_limit = bandwidth_limit; s->state = MIG_STATE_SETUP; - s->total_time = qemu_get_clock_ms(rt_clock); + trace_migrate_set_state(MIG_STATE_SETUP); + s->total_time = qemu_get_clock_ms(rt_clock); return s; } @@ -738,6 +743,8 @@ static const QEMUFileOps buffered_file_ops = { void migrate_fd_connect(MigrationState *s) { s->state = MIG_STATE_ACTIVE; + trace_migrate_set_state(MIG_STATE_ACTIVE); + s->bytes_xfer = 0; s->buffer = NULL; s->buffer_size = 0; diff --git a/trace-events b/trace-events index 3064fc7767..8389d83568 100644 --- a/trace-events +++ b/trace-events @@ -1091,3 +1091,6 @@ css_io_interrupt(int cssid, int ssid, int schid, uint32_t intparm, uint8_t isc, # hw/s390x/virtio-ccw.c virtio_ccw_interpret_ccw(int cssid, int ssid, int schid, int cmd_code) "VIRTIO-CCW: %x.%x.%04x: interpret command %x" virtio_ccw_new_device(int cssid, int ssid, int schid, int devno, const char *devno_mode) "VIRTIO-CCW: add subchannel %x.%x.%04x, devno %04x (%s)" + +# migration.c +migrate_set_state(int new_state) "new state %d" From f4410a5d9926886c36d9fa9fdd969d0469d62724 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:20 +0100 Subject: [PATCH 14/46] migration: prepare to access s->state outside critical sections Accessing s->state outside the big QEMU lock will simplify a bit the locking/unlocking of the iothread lock. We need to keep the lock in migrate_fd_error and migrate_fd_completed, however, because they call migrate_fd_cleanup. Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- migration.c | 33 +++++++++++++++++++++------------ 1 file changed, 21 insertions(+), 12 deletions(-) diff --git a/migration.c b/migration.c index b091532152..b40755f8ad 100644 --- a/migration.c +++ b/migration.c @@ -279,19 +279,25 @@ static void migrate_fd_cleanup(MigrationState *s) notifier_list_notify(&migration_state_notifiers, s); } +static void migrate_finish_set_state(MigrationState *s, int new_state) +{ + if (__sync_val_compare_and_swap(&s->state, MIG_STATE_ACTIVE, + new_state) == new_state) { + trace_migrate_set_state(new_state); + } +} + void migrate_fd_error(MigrationState *s) { DPRINTF("setting error state\n"); - s->state = MIG_STATE_ERROR; - trace_migrate_set_state(MIG_STATE_ERROR); + migrate_finish_set_state(s, MIG_STATE_ERROR); migrate_fd_cleanup(s); } static void migrate_fd_completed(MigrationState *s) { DPRINTF("setting completed state\n"); - s->state = MIG_STATE_COMPLETED; - trace_migrate_set_state(MIG_STATE_COMPLETED); + migrate_finish_set_state(s, MIG_STATE_COMPLETED); migrate_fd_cleanup(s); } @@ -316,13 +322,9 @@ static ssize_t migrate_fd_put_buffer(MigrationState *s, const void *data, static void migrate_fd_cancel(MigrationState *s) { - if (s->state != MIG_STATE_ACTIVE) - return; - DPRINTF("cancelling migration\n"); - s->state = MIG_STATE_CANCELLED; - trace_migrate_set_state(MIG_STATE_CANCELLED); + migrate_finish_set_state(s, MIG_STATE_CANCELLED); migrate_fd_cleanup(s); } @@ -657,12 +659,14 @@ static void *buffered_file_thread(void *opaque) qemu_mutex_lock_iothread(); DPRINTF("beginning savevm\n"); qemu_savevm_state_begin(s->file, &s->params); + qemu_mutex_unlock_iothread(); while (s->state == MIG_STATE_ACTIVE) { int64_t current_time; uint64_t pending_size; if (s->bytes_xfer < s->xfer_limit) { + qemu_mutex_lock_iothread(); DPRINTF("iterate\n"); pending_size = qemu_savevm_state_pending(s->file, max_size); DPRINTF("pending size %lu max %lu\n", pending_size, max_size); @@ -678,8 +682,9 @@ static void *buffered_file_thread(void *opaque) qemu_savevm_state_complete(s->file); last_round = true; } + qemu_mutex_unlock_iothread(); } - qemu_mutex_unlock_iothread(); + current_time = qemu_get_clock_ms(rt_clock); if (current_time >= initial_time + BUFFER_DELAY) { uint64_t transferred_bytes = s->bytes_xfer; @@ -706,14 +711,18 @@ static void *buffered_file_thread(void *opaque) sleep_time += qemu_get_clock_ms(rt_clock) - current_time; } buffered_flush(s); - qemu_mutex_lock_iothread(); if (qemu_file_get_error(s->file)) { + qemu_mutex_lock_iothread(); migrate_fd_error(s); + qemu_mutex_unlock_iothread(); } else if (last_round && s->buffer_size == 0) { + qemu_mutex_lock_iothread(); migrate_fd_completed(s); + qemu_mutex_unlock_iothread(); } } + qemu_mutex_lock_iothread(); if (s->state == MIG_STATE_COMPLETED) { int64_t end_time = qemu_get_clock_ms(rt_clock); s->total_time = end_time - s->total_time; @@ -725,8 +734,8 @@ static void *buffered_file_thread(void *opaque) vm_start(); } } - qemu_mutex_unlock_iothread(); + g_free(s->buffer); return NULL; } From bb1fadc444ff967554c41d96cb9dde110e8aece9 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:21 +0100 Subject: [PATCH 15/46] migration: cleanup migration (including thread) in the iothread Perform final cleanup in a bottom half, and add joining the thread to the series of cleanup actions. migrate_fd_error remains for connection error, but it doesn't need to cleanup anything anymore. Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- include/migration/migration.h | 1 + migration.c | 38 ++++++++++++++++++----------------- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/include/migration/migration.h b/include/migration/migration.h index 3e680af7f6..ed20bed03c 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -38,6 +38,7 @@ struct MigrationState size_t buffer_size; size_t buffer_capacity; QemuThread thread; + QEMUBH *cleanup_bh; QEMUFile *file; int fd; diff --git a/migration.c b/migration.c index b40755f8ad..729578b730 100644 --- a/migration.c +++ b/migration.c @@ -261,8 +261,13 @@ void qmp_migrate_set_capabilities(MigrationCapabilityStatusList *params, /* shared migration helpers */ -static void migrate_fd_cleanup(MigrationState *s) +static void migrate_fd_cleanup(void *opaque) { + MigrationState *s = opaque; + + qemu_bh_delete(s->cleanup_bh); + s->cleanup_bh = NULL; + if (s->file) { DPRINTF("closing file\n"); qemu_fclose(s->file); @@ -290,15 +295,10 @@ static void migrate_finish_set_state(MigrationState *s, int new_state) void migrate_fd_error(MigrationState *s) { DPRINTF("setting error state\n"); - migrate_finish_set_state(s, MIG_STATE_ERROR); - migrate_fd_cleanup(s); -} - -static void migrate_fd_completed(MigrationState *s) -{ - DPRINTF("setting completed state\n"); - migrate_finish_set_state(s, MIG_STATE_COMPLETED); - migrate_fd_cleanup(s); + assert(s->file == NULL); + s->state = MIG_STATE_ERROR; + trace_migrate_set_state(MIG_STATE_ERROR); + notifier_list_notify(&migration_state_notifiers, s); } static ssize_t migrate_fd_put_buffer(MigrationState *s, const void *data, @@ -325,7 +325,6 @@ static void migrate_fd_cancel(MigrationState *s) DPRINTF("cancelling migration\n"); migrate_finish_set_state(s, MIG_STATE_CANCELLED); - migrate_fd_cleanup(s); } int migrate_fd_close(MigrationState *s) @@ -590,6 +589,11 @@ static int buffered_close(void *opaque) DPRINTF("closing\n"); + qemu_mutex_unlock_iothread(); + qemu_thread_join(&s->thread); + qemu_mutex_lock_iothread(); + assert(s->state != MIG_STATE_ACTIVE); + return migrate_fd_close(s); } @@ -712,13 +716,9 @@ static void *buffered_file_thread(void *opaque) } buffered_flush(s); if (qemu_file_get_error(s->file)) { - qemu_mutex_lock_iothread(); - migrate_fd_error(s); - qemu_mutex_unlock_iothread(); + migrate_finish_set_state(s, MIG_STATE_ERROR); } else if (last_round && s->buffer_size == 0) { - qemu_mutex_lock_iothread(); - migrate_fd_completed(s); - qemu_mutex_unlock_iothread(); + migrate_finish_set_state(s, MIG_STATE_COMPLETED); } } @@ -734,6 +734,7 @@ static void *buffered_file_thread(void *opaque) vm_start(); } } + qemu_bh_schedule(s->cleanup_bh); qemu_mutex_unlock_iothread(); g_free(s->buffer); @@ -763,9 +764,10 @@ void migrate_fd_connect(MigrationState *s) s->xfer_limit = s->bandwidth_limit / XFER_LIMIT_RATIO; + s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s); s->file = qemu_fopen_ops(s, &buffered_file_ops); qemu_thread_create(&s->thread, buffered_file_thread, s, - QEMU_THREAD_DETACHED); + QEMU_THREAD_JOINABLE); notifier_list_notify(&migration_state_notifiers, s); } From a55ce1c851b5802569fb00b2a645a73c03fd7c86 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:22 +0100 Subject: [PATCH 16/46] block-migration: remove variables that are never read Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- block-migration.c | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/block-migration.c b/block-migration.c index e6c917d952..e72f322bc7 100644 --- a/block-migration.c +++ b/block-migration.c @@ -50,7 +50,6 @@ typedef struct BlkMigDevState { int64_t cur_dirty; int64_t completed_sectors; int64_t total_sectors; - int64_t dirty; QSIMPLEQ_ENTRY(BlkMigDevState) entry; unsigned long *aio_bitmap; } BlkMigDevState; @@ -78,7 +77,6 @@ typedef struct BlkMigState { int64_t total_sector_sum; int prev_progress; int bulk_completed; - long double prev_time_offset; } BlkMigState; static BlkMigState block_mig_state; @@ -179,13 +177,10 @@ static void alloc_aio_bitmap(BlkMigDevState *bmds) static void blk_mig_read_cb(void *opaque, int ret) { - long double curr_time = qemu_get_clock_ns(rt_clock); BlkMigBlock *blk = opaque; blk->ret = ret; - block_mig_state.prev_time_offset = curr_time; - QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry); bmds_set_aio_inflight(blk->bmds, blk->sector, blk->nr_sectors, 0); @@ -236,10 +231,6 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE; qemu_iovec_init_external(&blk->qiov, &blk->iov, 1); - if (block_mig_state.submitted == 0) { - block_mig_state.prev_time_offset = qemu_get_clock_ns(rt_clock); - } - blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov, nr_sectors, blk_mig_read_cb, blk); block_mig_state.submitted++; @@ -382,10 +373,6 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds, blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE; qemu_iovec_init_external(&blk->qiov, &blk->iov, 1); - if (block_mig_state.submitted == 0) { - block_mig_state.prev_time_offset = qemu_get_clock_ns(rt_clock); - } - blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov, nr_sectors, blk_mig_read_cb, blk); block_mig_state.submitted++; From 13197e3cbaba0ba693dd2855a32182ca584fa97e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:23 +0100 Subject: [PATCH 17/46] block-migration: small preparatory changes for locking Some small changes that will simplify the positioning of lock/unlock primitives. Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- block-migration.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) diff --git a/block-migration.c b/block-migration.c index e72f322bc7..9a40edd457 100644 --- a/block-migration.c +++ b/block-migration.c @@ -231,9 +231,10 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE; qemu_iovec_init_external(&blk->qiov, &blk->iov, 1); + block_mig_state.submitted++; + blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov, nr_sectors, blk_mig_read_cb, blk); - block_mig_state.submitted++; bdrv_reset_dirty(bs, cur_sector, nr_sectors); bmds->cur_sector = cur_sector + nr_sectors; @@ -440,9 +441,10 @@ static int flush_blks(QEMUFile *f) ret = blk->ret; break; } - blk_send(f, blk); QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry); + blk_send(f, blk); + g_free(blk->buf); g_free(blk); @@ -542,15 +544,16 @@ static int block_save_iterate(QEMUFile *f, void *opaque) /* finished saving bulk on all devices */ block_mig_state.bulk_completed = 1; } + ret = 0; } else { ret = blk_mig_save_dirty_block(f, 1); - if (ret < 0) { - return ret; - } - if (ret != 0) { - /* no more dirty blocks */ - break; - } + } + if (ret < 0) { + return ret; + } + if (ret != 0) { + /* no more dirty blocks */ + break; } } @@ -560,7 +563,6 @@ static int block_save_iterate(QEMUFile *f, void *opaque) } qemu_put_be64(f, BLK_MIG_FLAG_EOS); - return qemu_ftell(f) - last_ftell; } @@ -603,7 +605,9 @@ static int block_save_complete(QEMUFile *f, void *opaque) static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) { /* Estimate pending number of bytes to send */ - uint64_t pending = get_remaining_dirty() + + uint64_t pending; + + pending = get_remaining_dirty() + block_mig_state.submitted * BLOCK_SIZE + block_mig_state.read_done * BLOCK_SIZE; From 323920c4eac01de74cf2b5e941c97ca9b2d36b7f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:24 +0100 Subject: [PATCH 18/46] block-migration: document usage of state across threads Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- block-migration.c | 23 ++++++++++++++++++----- 1 file changed, 18 insertions(+), 5 deletions(-) diff --git a/block-migration.c b/block-migration.c index 9a40edd457..d62a8b80ba 100644 --- a/block-migration.c +++ b/block-migration.c @@ -43,18 +43,24 @@ #endif typedef struct BlkMigDevState { + /* Written during setup phase. Can be read without a lock. */ BlockDriverState *bs; - int bulk_completed; int shared_base; - int64_t cur_sector; - int64_t cur_dirty; - int64_t completed_sectors; int64_t total_sectors; QSIMPLEQ_ENTRY(BlkMigDevState) entry; + + /* Only used by migration thread. Does not need a lock. */ + int bulk_completed; + int64_t cur_sector; + int64_t cur_dirty; + + /* Protected by iothread lock. */ unsigned long *aio_bitmap; + int64_t completed_sectors; } BlkMigDevState; typedef struct BlkMigBlock { + /* Only used by migration thread. */ uint8_t *buf; BlkMigDevState *bmds; int64_t sector; @@ -62,19 +68,26 @@ typedef struct BlkMigBlock { struct iovec iov; QEMUIOVector qiov; BlockDriverAIOCB *aiocb; + + /* Protected by iothread lock. */ int ret; QSIMPLEQ_ENTRY(BlkMigBlock) entry; } BlkMigBlock; typedef struct BlkMigState { + /* Written during setup phase. Can be read without a lock. */ int blk_enable; int shared_base; QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list; + int64_t total_sector_sum; + + /* Protected by iothread lock. */ QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list; int submitted; int read_done; + + /* Only used by migration thread. Does not need a lock. */ int transferred; - int64_t total_sector_sum; int prev_progress; int bulk_completed; } BlkMigState; From 52e850dea988585c3d693fd9cd4a4c38968d89b8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:25 +0100 Subject: [PATCH 19/46] block-migration: add lock Some state is shared between the block migration code and its AIO callbacks. Once block migration will run outside the iothread, the block migration code and the AIO callbacks will be able to run concurrently. Protect the critical sections with a separate lock. Do the same for completed_sectors, which can be used from the monitor. Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- block-migration.c | 54 ++++++++++++++++++++++++++++++++++++++++--- include/qemu/atomic.h | 1 + 2 files changed, 52 insertions(+), 3 deletions(-) diff --git a/block-migration.c b/block-migration.c index d62a8b80ba..b726c6c002 100644 --- a/block-migration.c +++ b/block-migration.c @@ -54,7 +54,7 @@ typedef struct BlkMigDevState { int64_t cur_sector; int64_t cur_dirty; - /* Protected by iothread lock. */ + /* Protected by block migration lock. */ unsigned long *aio_bitmap; int64_t completed_sectors; } BlkMigDevState; @@ -69,7 +69,7 @@ typedef struct BlkMigBlock { QEMUIOVector qiov; BlockDriverAIOCB *aiocb; - /* Protected by iothread lock. */ + /* Protected by block migration lock. */ int ret; QSIMPLEQ_ENTRY(BlkMigBlock) entry; } BlkMigBlock; @@ -81,7 +81,7 @@ typedef struct BlkMigState { QSIMPLEQ_HEAD(bmds_list, BlkMigDevState) bmds_list; int64_t total_sector_sum; - /* Protected by iothread lock. */ + /* Protected by lock. */ QSIMPLEQ_HEAD(blk_list, BlkMigBlock) blk_list; int submitted; int read_done; @@ -90,10 +90,23 @@ typedef struct BlkMigState { int transferred; int prev_progress; int bulk_completed; + + /* Lock must be taken _inside_ the iothread lock. */ + QemuMutex lock; } BlkMigState; static BlkMigState block_mig_state; +static void blk_mig_lock(void) +{ + qemu_mutex_lock(&block_mig_state.lock); +} + +static void blk_mig_unlock(void) +{ + qemu_mutex_unlock(&block_mig_state.lock); +} + static void blk_send(QEMUFile *f, BlkMigBlock * blk) { int len; @@ -120,9 +133,11 @@ uint64_t blk_mig_bytes_transferred(void) BlkMigDevState *bmds; uint64_t sum = 0; + blk_mig_lock(); QSIMPLEQ_FOREACH(bmds, &block_mig_state.bmds_list, entry) { sum += bmds->completed_sectors; } + blk_mig_unlock(); return sum << BDRV_SECTOR_BITS; } @@ -142,6 +157,9 @@ uint64_t blk_mig_bytes_total(void) return sum << BDRV_SECTOR_BITS; } + +/* Called with migration lock held. */ + static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector) { int64_t chunk = sector / (int64_t)BDRV_SECTORS_PER_DIRTY_CHUNK; @@ -154,6 +172,8 @@ static int bmds_aio_inflight(BlkMigDevState *bmds, int64_t sector) } } +/* Called with migration lock held. */ + static void bmds_set_aio_inflight(BlkMigDevState *bmds, int64_t sector_num, int nb_sectors, int set) { @@ -188,10 +208,13 @@ static void alloc_aio_bitmap(BlkMigDevState *bmds) bmds->aio_bitmap = g_malloc0(bitmap_size); } +/* Never hold migration lock when yielding to the main loop! */ + static void blk_mig_read_cb(void *opaque, int ret) { BlkMigBlock *blk = opaque; + blk_mig_lock(); blk->ret = ret; QSIMPLEQ_INSERT_TAIL(&block_mig_state.blk_list, blk, entry); @@ -200,6 +223,7 @@ static void blk_mig_read_cb(void *opaque, int ret) block_mig_state.submitted--; block_mig_state.read_done++; assert(block_mig_state.submitted >= 0); + blk_mig_unlock(); } static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) @@ -244,7 +268,9 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) blk->iov.iov_len = nr_sectors * BDRV_SECTOR_SIZE; qemu_iovec_init_external(&blk->qiov, &blk->iov, 1); + blk_mig_lock(); block_mig_state.submitted++; + blk_mig_unlock(); blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov, nr_sectors, blk_mig_read_cb, blk); @@ -366,8 +392,12 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds, int ret = -EIO; for (sector = bmds->cur_dirty; sector < bmds->total_sectors;) { + blk_mig_lock(); if (bmds_aio_inflight(bmds, sector)) { + blk_mig_unlock(); bdrv_drain_all(); + } else { + blk_mig_unlock(); } if (bdrv_get_dirty(bmds->bs, sector)) { @@ -389,8 +419,11 @@ static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds, blk->aiocb = bdrv_aio_readv(bmds->bs, sector, &blk->qiov, nr_sectors, blk_mig_read_cb, blk); + + blk_mig_lock(); block_mig_state.submitted++; bmds_set_aio_inflight(bmds, sector, nr_sectors, 1); + blk_mig_unlock(); } else { ret = bdrv_read(bmds->bs, sector, blk->buf, nr_sectors); if (ret < 0) { @@ -446,6 +479,7 @@ static int flush_blks(QEMUFile *f) __FUNCTION__, block_mig_state.submitted, block_mig_state.read_done, block_mig_state.transferred); + blk_mig_lock(); while ((blk = QSIMPLEQ_FIRST(&block_mig_state.blk_list)) != NULL) { if (qemu_file_rate_limit(f)) { break; @@ -456,7 +490,9 @@ static int flush_blks(QEMUFile *f) } QSIMPLEQ_REMOVE_HEAD(&block_mig_state.blk_list, entry); + blk_mig_unlock(); blk_send(f, blk); + blk_mig_lock(); g_free(blk->buf); g_free(blk); @@ -465,6 +501,7 @@ static int flush_blks(QEMUFile *f) block_mig_state.transferred++; assert(block_mig_state.read_done >= 0); } + blk_mig_unlock(); DPRINTF("%s Exit submitted %d read_done %d transferred %d\n", __FUNCTION__, block_mig_state.submitted, block_mig_state.read_done, @@ -493,6 +530,7 @@ static void blk_mig_cleanup(void) set_dirty_tracking(0); + blk_mig_lock(); while ((bmds = QSIMPLEQ_FIRST(&block_mig_state.bmds_list)) != NULL) { QSIMPLEQ_REMOVE_HEAD(&block_mig_state.bmds_list, entry); bdrv_set_in_use(bmds->bs, 0); @@ -506,6 +544,7 @@ static void blk_mig_cleanup(void) g_free(blk->buf); g_free(blk); } + blk_mig_unlock(); } static void block_migration_cancel(void *opaque) @@ -548,9 +587,11 @@ static int block_save_iterate(QEMUFile *f, void *opaque) blk_mig_reset_dirty_cursor(); /* control the rate of transfer */ + blk_mig_lock(); while ((block_mig_state.submitted + block_mig_state.read_done) * BLOCK_SIZE < qemu_file_get_rate_limit(f)) { + blk_mig_unlock(); if (block_mig_state.bulk_completed == 0) { /* first finish the bulk phase */ if (blk_mig_save_bulked_block(f) == 0) { @@ -564,11 +605,13 @@ static int block_save_iterate(QEMUFile *f, void *opaque) if (ret < 0) { return ret; } + blk_mig_lock(); if (ret != 0) { /* no more dirty blocks */ break; } } + blk_mig_unlock(); ret = flush_blks(f); if (ret) { @@ -595,7 +638,9 @@ static int block_save_complete(QEMUFile *f, void *opaque) /* we know for sure that save bulk is completed and all async read completed */ + blk_mig_lock(); assert(block_mig_state.submitted == 0); + blk_mig_unlock(); do { ret = blk_mig_save_dirty_block(f, 0); @@ -620,6 +665,7 @@ static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) /* Estimate pending number of bytes to send */ uint64_t pending; + blk_mig_lock(); pending = get_remaining_dirty() + block_mig_state.submitted * BLOCK_SIZE + block_mig_state.read_done * BLOCK_SIZE; @@ -628,6 +674,7 @@ static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) if (pending == 0 && !block_mig_state.bulk_completed) { pending = BLOCK_SIZE; } + blk_mig_unlock(); DPRINTF("Enter save live pending %" PRIu64 "\n", pending); return pending; @@ -739,6 +786,7 @@ void blk_mig_init(void) { QSIMPLEQ_INIT(&block_mig_state.bmds_list); QSIMPLEQ_INIT(&block_mig_state.blk_list); + qemu_mutex_init(&block_mig_state.lock); register_savevm_live(NULL, "block", 0, 1, &savevm_block_handlers, &block_mig_state); diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h index 96a194bbee..10becb6101 100644 --- a/include/qemu/atomic.h +++ b/include/qemu/atomic.h @@ -16,6 +16,7 @@ */ #define smp_wmb() barrier() #define smp_rmb() barrier() + /* * We use GCC builtin if it's available, as that can use * mfence on 32 bit as well, e.g. if built with -march=pentium-m. From 8c8de19d93444536d3291e6ab83e2bcf61dd2d0c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:26 +0100 Subject: [PATCH 20/46] migration: reorder SaveVMHandlers members This groups together the callbacks that later will have similar locking rules. Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- include/migration/vmstate.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index 94a409b708..fdf4e651ad 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -34,13 +34,15 @@ typedef int LoadStateHandler(QEMUFile *f, void *opaque, int version_id); typedef struct SaveVMHandlers { void (*set_params)(const MigrationParams *params, void * opaque); SaveStateHandler *save_state; + int (*save_live_setup)(QEMUFile *f, void *opaque); - int (*save_live_iterate)(QEMUFile *f, void *opaque); - int (*save_live_complete)(QEMUFile *f, void *opaque); - uint64_t (*save_live_pending)(QEMUFile *f, void *opaque, uint64_t max_size); void (*cancel)(void *opaque); - LoadStateHandler *load_state; + int (*save_live_complete)(QEMUFile *f, void *opaque); bool (*is_active)(void *opaque); + int (*save_live_iterate)(QEMUFile *f, void *opaque); + uint64_t (*save_live_pending)(QEMUFile *f, void *opaque, uint64_t max_size); + + LoadStateHandler *load_state; } SaveVMHandlers; int register_savevm(DeviceState *dev, From 32c835ba3984728c22d4e73cdb595090a60f437e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:27 +0100 Subject: [PATCH 21/46] migration: run pending/iterate callbacks out of big lock This makes it possible to do blocking writes directly to the socket, with no buffer in the middle. For RAM, only the migration_bitmap_sync() call needs the iothread lock. For block migration, it is needed by the block layer (including bdrv_drain_all and dirty bitmap access), but because some code is shared between iterate and complete, all of mig_save_device_dirty is run with the lock taken. In the savevm case, the iterate callback runs within the big lock. This is annoying because it complicates the rules. Luckily we do not need to do anything about it: the RAM iterate callback does not need the iothread lock, and block migration never runs during savevm. Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- arch_init.c | 4 ++++ block-migration.c | 37 +++++++++++++++++++++++++++++++++++-- include/migration/vmstate.h | 11 +++++++++++ migration.c | 4 ++-- 4 files changed, 52 insertions(+), 4 deletions(-) diff --git a/arch_init.c b/arch_init.c index 8daeafaf5c..32b437897c 100644 --- a/arch_init.c +++ b/arch_init.c @@ -379,6 +379,8 @@ static inline bool migration_bitmap_set_dirty(MemoryRegion *mr, return ret; } +/* Needs iothread lock! */ + static void migration_bitmap_sync(void) { RAMBlock *block; @@ -690,7 +692,9 @@ static uint64_t ram_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; if (remaining_size < max_size) { + qemu_mutex_lock_iothread(); migration_bitmap_sync(); + qemu_mutex_unlock_iothread(); remaining_size = ram_save_remaining() * TARGET_PAGE_SIZE; } return remaining_size; diff --git a/block-migration.c b/block-migration.c index b726c6c002..8da5f868af 100644 --- a/block-migration.c +++ b/block-migration.c @@ -107,6 +107,10 @@ static void blk_mig_unlock(void) qemu_mutex_unlock(&block_mig_state.lock); } +/* Must run outside of the iothread lock during the bulk phase, + * or the VM will stall. + */ + static void blk_send(QEMUFile *f, BlkMigBlock * blk) { int len; @@ -226,6 +230,8 @@ static void blk_mig_read_cb(void *opaque, int ret) blk_mig_unlock(); } +/* Called with no lock taken. */ + static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) { int64_t total_sectors = bmds->total_sectors; @@ -235,11 +241,13 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) int nr_sectors; if (bmds->shared_base) { + qemu_mutex_lock_iothread(); while (cur_sector < total_sectors && !bdrv_is_allocated(bs, cur_sector, MAX_IS_ALLOCATED_SEARCH, &nr_sectors)) { cur_sector += nr_sectors; } + qemu_mutex_unlock_iothread(); } if (cur_sector >= total_sectors) { @@ -272,15 +280,19 @@ static int mig_save_device_bulk(QEMUFile *f, BlkMigDevState *bmds) block_mig_state.submitted++; blk_mig_unlock(); + qemu_mutex_lock_iothread(); blk->aiocb = bdrv_aio_readv(bs, cur_sector, &blk->qiov, nr_sectors, blk_mig_read_cb, blk); bdrv_reset_dirty(bs, cur_sector, nr_sectors); - bmds->cur_sector = cur_sector + nr_sectors; + qemu_mutex_unlock_iothread(); + bmds->cur_sector = cur_sector + nr_sectors; return (bmds->cur_sector >= total_sectors); } +/* Called with iothread lock taken. */ + static void set_dirty_tracking(int enable) { BlkMigDevState *bmds; @@ -336,6 +348,8 @@ static void init_blk_migration(QEMUFile *f) bdrv_iterate(init_blk_migration_it, NULL); } +/* Called with no lock taken. */ + static int blk_mig_save_bulked_block(QEMUFile *f) { int64_t completed_sector_sum = 0; @@ -382,6 +396,8 @@ static void blk_mig_reset_dirty_cursor(void) } } +/* Called with iothread lock taken. */ + static int mig_save_device_dirty(QEMUFile *f, BlkMigDevState *bmds, int is_async) { @@ -451,7 +467,9 @@ error: return ret; } -/* return value: +/* Called with iothread lock taken. + * + * return value: * 0: too much data for max_downtime * 1: few enough data for max_downtime */ @@ -470,6 +488,8 @@ static int blk_mig_save_dirty_block(QEMUFile *f, int is_async) return ret; } +/* Called with no locks taken. */ + static int flush_blks(QEMUFile *f) { BlkMigBlock *blk; @@ -509,6 +529,8 @@ static int flush_blks(QEMUFile *f) return ret; } +/* Called with iothread lock taken. */ + static int64_t get_remaining_dirty(void) { BlkMigDevState *bmds; @@ -521,6 +543,8 @@ static int64_t get_remaining_dirty(void) return dirty << BDRV_SECTOR_BITS; } +/* Called with iothread lock taken. */ + static void blk_mig_cleanup(void) { BlkMigDevState *bmds; @@ -600,7 +624,12 @@ static int block_save_iterate(QEMUFile *f, void *opaque) } ret = 0; } else { + /* Always called with iothread lock taken for + * simplicity, block_save_complete also calls it. + */ + qemu_mutex_lock_iothread(); ret = blk_mig_save_dirty_block(f, 1); + qemu_mutex_unlock_iothread(); } if (ret < 0) { return ret; @@ -622,6 +651,8 @@ static int block_save_iterate(QEMUFile *f, void *opaque) return qemu_ftell(f) - last_ftell; } +/* Called with iothread lock taken. */ + static int block_save_complete(QEMUFile *f, void *opaque) { int ret; @@ -665,6 +696,7 @@ static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) /* Estimate pending number of bytes to send */ uint64_t pending; + qemu_mutex_lock_iothread(); blk_mig_lock(); pending = get_remaining_dirty() + block_mig_state.submitted * BLOCK_SIZE + @@ -675,6 +707,7 @@ static uint64_t block_save_pending(QEMUFile *f, void *opaque, uint64_t max_size) pending = BLOCK_SIZE; } blk_mig_unlock(); + qemu_mutex_unlock_iothread(); DPRINTF("Enter save live pending %" PRIu64 "\n", pending); return pending; diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index fdf4e651ad..a816ac3243 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -32,14 +32,25 @@ typedef void SaveStateHandler(QEMUFile *f, void *opaque); typedef int LoadStateHandler(QEMUFile *f, void *opaque, int version_id); typedef struct SaveVMHandlers { + /* This runs inside the iothread lock. */ void (*set_params)(const MigrationParams *params, void * opaque); SaveStateHandler *save_state; int (*save_live_setup)(QEMUFile *f, void *opaque); void (*cancel)(void *opaque); int (*save_live_complete)(QEMUFile *f, void *opaque); + + /* This runs both outside and inside the iothread lock. */ bool (*is_active)(void *opaque); + + /* This runs outside the iothread lock in the migration case, and + * within the lock in the savevm case. The callback had better only + * use data that is local to the migration thread or protected + * by other locks. + */ int (*save_live_iterate)(QEMUFile *f, void *opaque); + + /* This runs outside the iothread lock! */ uint64_t (*save_live_pending)(QEMUFile *f, void *opaque, uint64_t max_size); LoadStateHandler *load_state; diff --git a/migration.c b/migration.c index 729578b730..92a7152d67 100644 --- a/migration.c +++ b/migration.c @@ -670,7 +670,6 @@ static void *buffered_file_thread(void *opaque) uint64_t pending_size; if (s->bytes_xfer < s->xfer_limit) { - qemu_mutex_lock_iothread(); DPRINTF("iterate\n"); pending_size = qemu_savevm_state_pending(s->file, max_size); DPRINTF("pending size %lu max %lu\n", pending_size, max_size); @@ -678,6 +677,7 @@ static void *buffered_file_thread(void *opaque) qemu_savevm_state_iterate(s->file); } else { DPRINTF("done iterating\n"); + qemu_mutex_lock_iothread(); start_time = qemu_get_clock_ms(rt_clock); qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); old_vm_running = runstate_is_running(); @@ -685,8 +685,8 @@ static void *buffered_file_thread(void *opaque) s->xfer_limit = INT_MAX; qemu_savevm_state_complete(s->file); last_round = true; + qemu_mutex_unlock_iothread(); } - qemu_mutex_unlock_iothread(); } current_time = qemu_get_clock_ms(rt_clock); From 9b0950375277467fd74a9075624477ae43b9bb22 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:28 +0100 Subject: [PATCH 22/46] migration: run setup callbacks out of big lock Only the migration_bitmap_sync() call needs the iothread lock. Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- arch_init.c | 10 ++++++---- block-migration.c | 2 ++ include/migration/vmstate.h | 2 +- migration.c | 2 -- savevm.c | 3 +++ 5 files changed, 12 insertions(+), 7 deletions(-) diff --git a/arch_init.c b/arch_init.c index 32b437897c..6089c53386 100644 --- a/arch_init.c +++ b/arch_init.c @@ -570,10 +570,6 @@ static int ram_save_setup(QEMUFile *f, void *opaque) bitmap_set(migration_bitmap, 0, ram_pages); migration_dirty_pages = ram_pages; - qemu_mutex_lock_ramlist(); - bytes_transferred = 0; - reset_ram_globals(); - if (migrate_use_xbzrle()) { XBZRLE.cache = cache_init(migrate_xbzrle_cache_size() / TARGET_PAGE_SIZE, @@ -587,8 +583,14 @@ static int ram_save_setup(QEMUFile *f, void *opaque) acct_clear(); } + qemu_mutex_lock_iothread(); + qemu_mutex_lock_ramlist(); + bytes_transferred = 0; + reset_ram_globals(); + memory_global_dirty_log_start(); migration_bitmap_sync(); + qemu_mutex_unlock_iothread(); qemu_put_be64(f, ram_bytes_total() | RAM_SAVE_FLAG_MEM_SIZE); diff --git a/block-migration.c b/block-migration.c index 8da5f868af..2fd7699794 100644 --- a/block-migration.c +++ b/block-migration.c @@ -583,10 +583,12 @@ static int block_save_setup(QEMUFile *f, void *opaque) DPRINTF("Enter save live setup submitted %d transferred %d\n", block_mig_state.submitted, block_mig_state.transferred); + qemu_mutex_lock_iothread(); init_blk_migration(f); /* start track dirty blocks */ set_dirty_tracking(1); + qemu_mutex_unlock_iothread(); ret = flush_blks(f); blk_mig_reset_dirty_cursor(); diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index a816ac3243..a64db941bc 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -36,7 +36,6 @@ typedef struct SaveVMHandlers { void (*set_params)(const MigrationParams *params, void * opaque); SaveStateHandler *save_state; - int (*save_live_setup)(QEMUFile *f, void *opaque); void (*cancel)(void *opaque); int (*save_live_complete)(QEMUFile *f, void *opaque); @@ -51,6 +50,7 @@ typedef struct SaveVMHandlers { int (*save_live_iterate)(QEMUFile *f, void *opaque); /* This runs outside the iothread lock! */ + int (*save_live_setup)(QEMUFile *f, void *opaque); uint64_t (*save_live_pending)(QEMUFile *f, void *opaque, uint64_t max_size); LoadStateHandler *load_state; diff --git a/migration.c b/migration.c index 92a7152d67..e64c92d75b 100644 --- a/migration.c +++ b/migration.c @@ -660,10 +660,8 @@ static void *buffered_file_thread(void *opaque) bool old_vm_running = false; bool last_round = false; - qemu_mutex_lock_iothread(); DPRINTF("beginning savevm\n"); qemu_savevm_state_begin(s->file, &s->params); - qemu_mutex_unlock_iothread(); while (s->state == MIG_STATE_ACTIVE) { int64_t current_time; diff --git a/savevm.c b/savevm.c index e10a045df6..7c7774e932 100644 --- a/savevm.c +++ b/savevm.c @@ -1768,7 +1768,10 @@ static int qemu_savevm_state(QEMUFile *f) return -EINVAL; } + qemu_mutex_unlock_iothread(); qemu_savevm_state_begin(f, ¶ms); + qemu_mutex_lock_iothread(); + while (qemu_file_get_error(f) == 0) { if (qemu_savevm_state_iterate(f) > 0) { break; From edaae611f6df0d66a8b5a90c84123b72980c7a22 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:29 +0100 Subject: [PATCH 23/46] migration: yay, buffering is gone Buffering was needed because blocking writes could take a long time and starve other threads seeking to grab the big QEMU mutex. Now that all writes (except within _complete callbacks) are done outside the big QEMU mutex, we do not need buffering at all. Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- include/migration/migration.h | 3 -- migration.c | 79 ++++++++++------------------------- savevm.c | 1 + 3 files changed, 22 insertions(+), 61 deletions(-) diff --git a/include/migration/migration.h b/include/migration/migration.h index ed20bed03c..cec8643870 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -34,9 +34,6 @@ struct MigrationState int64_t bandwidth_limit; size_t bytes_xfer; size_t xfer_limit; - uint8_t *buffer; - size_t buffer_size; - size_t buffer_capacity; QemuThread thread; QEMUBH *cleanup_bh; diff --git a/migration.c b/migration.c index e64c92d75b..4c8d576701 100644 --- a/migration.c +++ b/migration.c @@ -514,73 +514,41 @@ int64_t migrate_xbzrle_cache_size(void) /* migration thread support */ - -static void buffered_flush(MigrationState *s) -{ - size_t offset = 0; - ssize_t ret = 0; - - DPRINTF("flushing %zu byte(s) of data\n", s->buffer_size); - - if (qemu_file_get_error(s->file)) { - s->buffer_size = 0; - return; - } - qemu_fflush(s->file); - - while (s->bytes_xfer < s->xfer_limit && offset < s->buffer_size) { - size_t to_send = MIN(s->buffer_size - offset, s->xfer_limit - s->bytes_xfer); - ret = migrate_fd_put_buffer(s, s->buffer + offset, to_send); - if (ret <= 0) { - DPRINTF("error flushing data, %zd\n", ret); - break; - } else { - DPRINTF("flushed %zd byte(s)\n", ret); - offset += ret; - s->bytes_xfer += ret; - } - } - - DPRINTF("flushed %zu of %zu byte(s)\n", offset, s->buffer_size); - memmove(s->buffer, s->buffer + offset, s->buffer_size - offset); - s->buffer_size -= offset; - - if (ret < 0) { - qemu_file_set_error(s->file, ret); - } -} - static int buffered_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, int size) { MigrationState *s = opaque; - ssize_t error; + ssize_t ret; + size_t sent; DPRINTF("putting %d bytes at %" PRId64 "\n", size, pos); - error = qemu_file_get_error(s->file); - if (error) { - DPRINTF("flush when error, bailing: %s\n", strerror(-error)); - return error; + ret = qemu_file_get_error(s->file); + if (ret) { + DPRINTF("flush when error, bailing: %s\n", strerror(-ret)); + return ret; } if (size <= 0) { return size; } - if (size > (s->buffer_capacity - s->buffer_size)) { - DPRINTF("increasing buffer capacity from %zu by %zu\n", - s->buffer_capacity, size + 1024); - - s->buffer_capacity += size + 1024; - - s->buffer = g_realloc(s->buffer, s->buffer_capacity); + sent = 0; + while (size) { + ret = migrate_fd_put_buffer(s, buf, size); + if (ret <= 0) { + DPRINTF("error flushing data, %zd\n", ret); + return ret; + } else { + DPRINTF("flushed %zd byte(s)\n", ret); + sent += ret; + buf += ret; + size -= ret; + s->bytes_xfer += ret; + } } - memcpy(s->buffer + s->buffer_size, buf, size); - s->buffer_size += size; - - return size; + return sent; } static int buffered_close(void *opaque) @@ -712,10 +680,9 @@ static void *buffered_file_thread(void *opaque) g_usleep((initial_time + BUFFER_DELAY - current_time)*1000); sleep_time += qemu_get_clock_ms(rt_clock) - current_time; } - buffered_flush(s); if (qemu_file_get_error(s->file)) { migrate_finish_set_state(s, MIG_STATE_ERROR); - } else if (last_round && s->buffer_size == 0) { + } else if (last_round) { migrate_finish_set_state(s, MIG_STATE_COMPLETED); } } @@ -735,7 +702,6 @@ static void *buffered_file_thread(void *opaque) qemu_bh_schedule(s->cleanup_bh); qemu_mutex_unlock_iothread(); - g_free(s->buffer); return NULL; } @@ -754,9 +720,6 @@ void migrate_fd_connect(MigrationState *s) trace_migrate_set_state(MIG_STATE_ACTIVE); s->bytes_xfer = 0; - s->buffer = NULL; - s->buffer_size = 0; - s->buffer_capacity = 0; /* This is a best 1st approximation. ns to ms */ s->expected_downtime = max_downtime/1000000; diff --git a/savevm.c b/savevm.c index 7c7774e932..ce10295f5c 100644 --- a/savevm.c +++ b/savevm.c @@ -1724,6 +1724,7 @@ void qemu_savevm_state_complete(QEMUFile *f) } qemu_put_byte(f, QEMU_VM_EOF); + qemu_fflush(f); } uint64_t qemu_savevm_state_pending(QEMUFile *f, uint64_t max_size) From 5f496a1be3d15f192be1ab1fed3a3278fd5a91a1 Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Fri, 22 Feb 2013 17:36:30 +0100 Subject: [PATCH 24/46] Rename buffered_ to migration_ This is consistent once that we have moved everything to migration.c Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Juan Quintela --- migration.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/migration.c b/migration.c index 4c8d576701..861b1245b0 100644 --- a/migration.c +++ b/migration.c @@ -514,7 +514,7 @@ int64_t migrate_xbzrle_cache_size(void) /* migration thread support */ -static int buffered_put_buffer(void *opaque, const uint8_t *buf, +static int migration_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, int size) { MigrationState *s = opaque; @@ -551,7 +551,7 @@ static int buffered_put_buffer(void *opaque, const uint8_t *buf, return sent; } -static int buffered_close(void *opaque) +static int migration_close(void *opaque) { MigrationState *s = opaque; @@ -565,7 +565,7 @@ static int buffered_close(void *opaque) return migrate_fd_close(s); } -static int buffered_get_fd(void *opaque) +static int migration_get_fd(void *opaque) { MigrationState *s = opaque; @@ -578,7 +578,7 @@ static int buffered_get_fd(void *opaque) * 1: Time to stop * negative: There has been an error */ -static int buffered_rate_limit(void *opaque) +static int migration_rate_limit(void *opaque) { MigrationState *s = opaque; int ret; @@ -595,7 +595,7 @@ static int buffered_rate_limit(void *opaque) return 0; } -static int64_t buffered_set_rate_limit(void *opaque, int64_t new_rate) +static int64_t migration_set_rate_limit(void *opaque, int64_t new_rate) { MigrationState *s = opaque; if (qemu_file_get_error(s->file)) { @@ -611,14 +611,14 @@ out: return s->xfer_limit; } -static int64_t buffered_get_rate_limit(void *opaque) +static int64_t migration_get_rate_limit(void *opaque) { MigrationState *s = opaque; return s->xfer_limit; } -static void *buffered_file_thread(void *opaque) +static void *migration_thread(void *opaque) { MigrationState *s = opaque; int64_t initial_time = qemu_get_clock_ms(rt_clock); @@ -705,13 +705,13 @@ static void *buffered_file_thread(void *opaque) return NULL; } -static const QEMUFileOps buffered_file_ops = { - .get_fd = buffered_get_fd, - .put_buffer = buffered_put_buffer, - .close = buffered_close, - .rate_limit = buffered_rate_limit, - .get_rate_limit = buffered_get_rate_limit, - .set_rate_limit = buffered_set_rate_limit, +static const QEMUFileOps migration_file_ops = { + .get_fd = migration_get_fd, + .put_buffer = migration_put_buffer, + .close = migration_close, + .rate_limit = migration_rate_limit, + .get_rate_limit = migration_get_rate_limit, + .set_rate_limit = migration_set_rate_limit, }; void migrate_fd_connect(MigrationState *s) @@ -726,9 +726,9 @@ void migrate_fd_connect(MigrationState *s) s->xfer_limit = s->bandwidth_limit / XFER_LIMIT_RATIO; s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s); - s->file = qemu_fopen_ops(s, &buffered_file_ops); + s->file = qemu_fopen_ops(s, &migration_file_ops); - qemu_thread_create(&s->thread, buffered_file_thread, s, + qemu_thread_create(&s->thread, migration_thread, s, QEMU_THREAD_JOINABLE); notifier_list_notify(&migration_state_notifiers, s); } From 05f28b837c6bd6124abab2496ce15c07a334a5ad Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:31 +0100 Subject: [PATCH 25/46] qemu-file: make qemu_fflush and qemu_file_set_error private again Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- include/migration/qemu-file.h | 2 -- savevm.c | 4 ++-- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h index 5e0c287793..46fc11dc99 100644 --- a/include/migration/qemu-file.h +++ b/include/migration/qemu-file.h @@ -82,7 +82,6 @@ QEMUFile *qemu_popen_cmd(const char *command, const char *mode); int qemu_get_fd(QEMUFile *f); int qemu_fclose(QEMUFile *f); int64_t qemu_ftell(QEMUFile *f); -void qemu_fflush(QEMUFile *f); void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size); void qemu_put_byte(QEMUFile *f, int v); @@ -114,7 +113,6 @@ int qemu_file_rate_limit(QEMUFile *f); int64_t qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate); int64_t qemu_file_get_rate_limit(QEMUFile *f); int qemu_file_get_error(QEMUFile *f); -void qemu_file_set_error(QEMUFile *f, int ret); static inline void qemu_put_be64s(QEMUFile *f, const uint64_t *pv) { diff --git a/savevm.c b/savevm.c index ce10295f5c..fef2ab9beb 100644 --- a/savevm.c +++ b/savevm.c @@ -443,7 +443,7 @@ int qemu_file_get_error(QEMUFile *f) return f->last_error; } -void qemu_file_set_error(QEMUFile *f, int ret) +static void qemu_file_set_error(QEMUFile *f, int ret) { if (f->last_error == 0) { f->last_error = ret; @@ -453,7 +453,7 @@ void qemu_file_set_error(QEMUFile *f, int ret) /** Flushes QEMUFile buffer * */ -void qemu_fflush(QEMUFile *f) +static void qemu_fflush(QEMUFile *f) { int ret = 0; From 059f896cefb2776181e39d9ba69345bd9d07d52b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:32 +0100 Subject: [PATCH 26/46] migration: eliminate last_round We will go around the loop exactly once after setting last_round. Eliminate the variable altogether. Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- migration.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/migration.c b/migration.c index 861b1245b0..612ffa7438 100644 --- a/migration.c +++ b/migration.c @@ -626,7 +626,6 @@ static void *migration_thread(void *opaque) int64_t max_size = 0; int64_t start_time = initial_time; bool old_vm_running = false; - bool last_round = false; DPRINTF("beginning savevm\n"); qemu_savevm_state_begin(s->file, &s->params); @@ -650,8 +649,11 @@ static void *migration_thread(void *opaque) vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); s->xfer_limit = INT_MAX; qemu_savevm_state_complete(s->file); - last_round = true; qemu_mutex_unlock_iothread(); + if (!qemu_file_get_error(s->file)) { + migrate_finish_set_state(s, MIG_STATE_COMPLETED); + break; + } } } @@ -675,15 +677,13 @@ static void *migration_thread(void *opaque) sleep_time = 0; initial_time = current_time; } - if (!last_round && (s->bytes_xfer >= s->xfer_limit)) { + if (s->bytes_xfer >= s->xfer_limit) { /* usleep expects microseconds */ g_usleep((initial_time + BUFFER_DELAY - current_time)*1000); sleep_time += qemu_get_clock_ms(rt_clock) - current_time; } if (qemu_file_get_error(s->file)) { migrate_finish_set_state(s, MIG_STATE_ERROR); - } else if (last_round) { - migrate_finish_set_state(s, MIG_STATE_COMPLETED); } } @@ -695,7 +695,6 @@ static void *migration_thread(void *opaque) runstate_set(RUN_STATE_POSTMIGRATE); } else { if (old_vm_running) { - assert(last_round); vm_start(); } } From fd45ee2c643bb3d55de5c54b50c23859ca631a9f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:33 +0100 Subject: [PATCH 27/46] migration: detect error before sleeping Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- migration.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/migration.c b/migration.c index 612ffa7438..414e0f9023 100644 --- a/migration.c +++ b/migration.c @@ -657,6 +657,10 @@ static void *migration_thread(void *opaque) } } + if (qemu_file_get_error(s->file)) { + migrate_finish_set_state(s, MIG_STATE_ERROR); + break; + } current_time = qemu_get_clock_ms(rt_clock); if (current_time >= initial_time + BUFFER_DELAY) { uint64_t transferred_bytes = s->bytes_xfer; @@ -682,9 +686,6 @@ static void *migration_thread(void *opaque) g_usleep((initial_time + BUFFER_DELAY - current_time)*1000); sleep_time += qemu_get_clock_ms(rt_clock) - current_time; } - if (qemu_file_get_error(s->file)) { - migrate_finish_set_state(s, MIG_STATE_ERROR); - } } qemu_mutex_lock_iothread(); From db2f25309af1af0f27e0ddec4acc3b66837fa668 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:34 +0100 Subject: [PATCH 28/46] migration: remove useless qemu_file_get_error check migration_put_buffer is never called if there has been an error. Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- migration.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/migration.c b/migration.c index 414e0f9023..5d99999e47 100644 --- a/migration.c +++ b/migration.c @@ -523,12 +523,6 @@ static int migration_put_buffer(void *opaque, const uint8_t *buf, DPRINTF("putting %d bytes at %" PRId64 "\n", size, pos); - ret = qemu_file_get_error(s->file); - if (ret) { - DPRINTF("flush when error, bailing: %s\n", strerror(-ret)); - return ret; - } - if (size <= 0) { return size; } From a0ff044b8ea81908cd8fe5819ce33780f53f58ee Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:35 +0100 Subject: [PATCH 29/46] migration: use qemu_file_rate_limit consistently Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- migration.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/migration.c b/migration.c index 5d99999e47..f35728da8d 100644 --- a/migration.c +++ b/migration.c @@ -628,7 +628,7 @@ static void *migration_thread(void *opaque) int64_t current_time; uint64_t pending_size; - if (s->bytes_xfer < s->xfer_limit) { + if (!qemu_file_rate_limit(s->file)) { DPRINTF("iterate\n"); pending_size = qemu_savevm_state_pending(s->file, max_size); DPRINTF("pending size %lu max %lu\n", pending_size, max_size); @@ -675,7 +675,7 @@ static void *migration_thread(void *opaque) sleep_time = 0; initial_time = current_time; } - if (s->bytes_xfer >= s->xfer_limit) { + if (qemu_file_rate_limit(s->file)) { /* usleep expects microseconds */ g_usleep((initial_time + BUFFER_DELAY - current_time)*1000); sleep_time += qemu_get_clock_ms(rt_clock) - current_time; From 817b9ed5eb300dbb434d752da416441028539a96 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:36 +0100 Subject: [PATCH 30/46] migration: merge qemu_popen_cmd with qemu_popen There is no reason for outgoing exec migration to do popen manually anymore (the reason used to be that we needed the FILE* to make it non-blocking). Use qemu_popen_cmd. Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- include/migration/qemu-file.h | 1 - migration-exec.c | 10 ++++------ savevm.c | 22 ++++++++-------------- 3 files changed, 12 insertions(+), 21 deletions(-) diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h index 46fc11dc99..987e719173 100644 --- a/include/migration/qemu-file.h +++ b/include/migration/qemu-file.h @@ -77,7 +77,6 @@ QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops); QEMUFile *qemu_fopen(const char *filename, const char *mode); QEMUFile *qemu_fdopen(int fd, const char *mode); QEMUFile *qemu_fopen_socket(int fd); -QEMUFile *qemu_popen(FILE *popen_file, const char *mode); QEMUFile *qemu_popen_cmd(const char *command, const char *mode); int qemu_get_fd(QEMUFile *f); int qemu_fclose(QEMUFile *f); diff --git a/migration-exec.c b/migration-exec.c index a051a6e668..5dc73139a4 100644 --- a/migration-exec.c +++ b/migration-exec.c @@ -59,19 +59,17 @@ static int exec_close(MigrationState *s) void exec_start_outgoing_migration(MigrationState *s, const char *command, Error **errp) { - FILE *f; - - f = popen(command, "w"); + QEMUFile *f; + f = qemu_popen_cmd(command, "w"); if (f == NULL) { error_setg_errno(errp, errno, "failed to popen the migration target"); return; } - s->fd = fileno(f); + s->opaque = f; + s->fd = qemu_get_fd(f); assert(s->fd != -1); - s->opaque = qemu_popen(f, "w"); - s->close = exec_close; s->get_error = file_errno; s->write = file_write; diff --git a/savevm.c b/savevm.c index fef2ab9beb..38699de4a3 100644 --- a/savevm.c +++ b/savevm.c @@ -275,11 +275,17 @@ static const QEMUFileOps stdio_pipe_write_ops = { .close = stdio_pclose }; -QEMUFile *qemu_popen(FILE *stdio_file, const char *mode) +QEMUFile *qemu_popen_cmd(const char *command, const char *mode) { + FILE *stdio_file; QEMUFileStdio *s; - if (stdio_file == NULL || mode == NULL || (mode[0] != 'r' && mode[0] != 'w') || mode[1] != 0) { + stdio_file = popen(command, mode); + if (stdio_file == NULL) { + return NULL; + } + + if (mode == NULL || (mode[0] != 'r' && mode[0] != 'w') || mode[1] != 0) { fprintf(stderr, "qemu_popen: Argument validity check failed\n"); return NULL; } @@ -296,18 +302,6 @@ QEMUFile *qemu_popen(FILE *stdio_file, const char *mode) return s->file; } -QEMUFile *qemu_popen_cmd(const char *command, const char *mode) -{ - FILE *popen_file; - - popen_file = popen(command, mode); - if(popen_file == NULL) { - return NULL; - } - - return qemu_popen(popen_file, mode); -} - static const QEMUFileOps stdio_file_read_ops = { .get_fd = stdio_get_fd, .get_buffer = stdio_get_buffer, From ce39ee3184a02eca7f9529cc19b1582f6f704c70 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:37 +0100 Subject: [PATCH 31/46] qemu-file: fsync a writable stdio QEMUFile This is what fd_close does. Prepare for switching to a QEMUFile. Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- savevm.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/savevm.c b/savevm.c index 38699de4a3..1d49fde68b 100644 --- a/savevm.c +++ b/savevm.c @@ -256,6 +256,24 @@ static int stdio_fclose(void *opaque) { QEMUFileStdio *s = opaque; int ret = 0; + + if (s->file->ops->put_buffer) { + int fd = fileno(s->stdio_file); + struct stat st; + + ret = fstat(fd, &st); + if (ret == 0 && S_ISREG(st.st_mode)) { + /* + * If the file handle is a regular file make sure the + * data is flushed to disk before signaling success. + */ + ret = fsync(fd); + if (ret != 0) { + ret = -errno; + return ret; + } + } + } if (fclose(s->stdio_file) == EOF) { ret = -errno; } From 13c7b2da073ec83cb47f9582149c8d28bb038e73 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:38 +0100 Subject: [PATCH 32/46] qemu-file: check exit status when closing a pipe QEMUFile This is what exec_close does. Move this to the underlying QEMUFile. Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- include/qemu/osdep.h | 7 +++++++ migration-exec.c | 4 ---- savevm.c | 3 +++ 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/include/qemu/osdep.h b/include/qemu/osdep.h index 87d3b9cfa8..df244006c7 100644 --- a/include/qemu/osdep.h +++ b/include/qemu/osdep.h @@ -9,6 +9,13 @@ #include #endif +#ifndef _WIN32 +#include +#else +#define WIFEXITED(x) 1 +#define WEXITSTATUS(x) (x) +#endif + #include #if defined(CONFIG_SOLARIS) && CONFIG_SOLARIS_VERSION < 10 diff --git a/migration-exec.c b/migration-exec.c index 5dc73139a4..a2b5f8d729 100644 --- a/migration-exec.c +++ b/migration-exec.c @@ -50,10 +50,6 @@ static int exec_close(MigrationState *s) ret = qemu_fclose(s->opaque); s->opaque = NULL; s->fd = -1; - if (ret >= 0 && !(WIFEXITED(ret) && WEXITSTATUS(ret) == 0)) { - /* close succeeded, but non-zero exit code: */ - ret = -EIO; /* fake errno value */ - } return ret; } diff --git a/savevm.c b/savevm.c index 1d49fde68b..6d6f1f1ca6 100644 --- a/savevm.c +++ b/savevm.c @@ -247,6 +247,9 @@ static int stdio_pclose(void *opaque) ret = pclose(s->stdio_file); if (ret == -1) { ret = -errno; + } else if (!WIFEXITED(ret) || WEXITSTATUS(ret) != 0) { + /* close succeeded, but non-zero exit code: */ + ret = -EIO; /* fake errno value */ } g_free(s); return ret; From 0cc3f3ccc9d29acc94b995430518bda1c7c01bef Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:39 +0100 Subject: [PATCH 33/46] qemu-file: add writable socket QEMUFile Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- include/migration/qemu-file.h | 2 +- migration-tcp.c | 2 +- migration-unix.c | 2 +- savevm.c | 33 +++++++++++++++++++++++++++++++-- 4 files changed, 34 insertions(+), 5 deletions(-) diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h index 987e719173..25e84613fa 100644 --- a/include/migration/qemu-file.h +++ b/include/migration/qemu-file.h @@ -76,7 +76,7 @@ typedef struct QEMUFileOps { QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops); QEMUFile *qemu_fopen(const char *filename, const char *mode); QEMUFile *qemu_fdopen(int fd, const char *mode); -QEMUFile *qemu_fopen_socket(int fd); +QEMUFile *qemu_fopen_socket(int fd, const char *mode); QEMUFile *qemu_popen_cmd(const char *command, const char *mode); int qemu_get_fd(QEMUFile *f); int qemu_fclose(QEMUFile *f); diff --git a/migration-tcp.c b/migration-tcp.c index e78a296137..7d975b5e80 100644 --- a/migration-tcp.c +++ b/migration-tcp.c @@ -95,7 +95,7 @@ static void tcp_accept_incoming_migration(void *opaque) goto out; } - f = qemu_fopen_socket(c); + f = qemu_fopen_socket(c, "rb"); if (f == NULL) { fprintf(stderr, "could not qemu_fopen socket\n"); goto out; diff --git a/migration-unix.c b/migration-unix.c index 218835a7a4..4693b43d90 100644 --- a/migration-unix.c +++ b/migration-unix.c @@ -95,7 +95,7 @@ static void unix_accept_incoming_migration(void *opaque) goto out; } - f = qemu_fopen_socket(c); + f = qemu_fopen_socket(c, "rb"); if (f == NULL) { fprintf(stderr, "could not qemu_fopen socket\n"); goto out; diff --git a/savevm.c b/savevm.c index 6d6f1f1ca6..76c88c7960 100644 --- a/savevm.c +++ b/savevm.c @@ -198,6 +198,18 @@ static int socket_get_buffer(void *opaque, uint8_t *buf, int64_t pos, int size) return len; } +static int socket_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, int size) +{ + QEMUFileSocket *s = opaque; + ssize_t len; + + len = qemu_send_full(s->fd, buf, size, 0); + if (len < size) { + len = -socket_error(); + } + return len; +} + static int socket_close(void *opaque) { QEMUFileSocket *s = opaque; @@ -369,12 +381,29 @@ static const QEMUFileOps socket_read_ops = { .close = socket_close }; -QEMUFile *qemu_fopen_socket(int fd) +static const QEMUFileOps socket_write_ops = { + .get_fd = socket_get_fd, + .put_buffer = socket_put_buffer, + .close = socket_close +}; + +QEMUFile *qemu_fopen_socket(int fd, const char *mode) { QEMUFileSocket *s = g_malloc0(sizeof(QEMUFileSocket)); + if (mode == NULL || + (mode[0] != 'r' && mode[0] != 'w') || + mode[1] != 'b' || mode[2] != 0) { + fprintf(stderr, "qemu_fopen: Argument validity check failed\n"); + return NULL; + } + s->fd = fd; - s->file = qemu_fopen_ops(s, &socket_read_ops); + if (mode[0] == 'w') { + s->file = qemu_fopen_ops(s, &socket_write_ops); + } else { + s->file = qemu_fopen_ops(s, &socket_read_ops); + } return s->file; } From 3f2d38faab97f4d676c41868a8243997b2aab7cb Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:40 +0100 Subject: [PATCH 34/46] qemu-file: simplify and export qemu_ftell Force a flush when qemu_ftell is called. This simplifies the buffer magic (it also breaks qemu_ftell for input QEMUFiles, but we never use it). Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- savevm.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) diff --git a/savevm.c b/savevm.c index 76c88c7960..c60ace3218 100644 --- a/savevm.c +++ b/savevm.c @@ -119,8 +119,8 @@ struct QEMUFile { void *opaque; int is_write; - int64_t buf_offset; /* start of buffer when writing, end of buffer - when reading */ + int64_t pos; /* start of buffer when writing, end of buffer + when reading */ int buf_index; int buf_size; /* 0 when writing */ uint8_t buf[IO_BUF_SIZE]; @@ -505,9 +505,9 @@ static void qemu_fflush(QEMUFile *f) return; } if (f->is_write && f->buf_index > 0) { - ret = f->ops->put_buffer(f->opaque, f->buf, f->buf_offset, f->buf_index); + ret = f->ops->put_buffer(f->opaque, f->buf, f->pos, f->buf_index); if (ret >= 0) { - f->buf_offset += f->buf_index; + f->pos += f->buf_index; } f->buf_index = 0; } @@ -534,11 +534,11 @@ static void qemu_fill_buffer(QEMUFile *f) f->buf_index = 0; f->buf_size = pending; - len = f->ops->get_buffer(f->opaque, f->buf + pending, f->buf_offset, + len = f->ops->get_buffer(f->opaque, f->buf + pending, f->pos, IO_BUF_SIZE - pending); if (len > 0) { f->buf_size += len; - f->buf_offset += len; + f->pos += len; } else if (len == 0) { qemu_file_set_error(f, -EIO); } else if (len != -EAGAIN) @@ -718,12 +718,8 @@ int qemu_get_byte(QEMUFile *f) int64_t qemu_ftell(QEMUFile *f) { - /* buf_offset excludes buffer for writing but includes it for reading */ - if (f->is_write) { - return f->buf_offset + f->buf_index; - } else { - return f->buf_offset - f->buf_size + f->buf_index; - } + qemu_fflush(f); + return f->pos; } int qemu_file_rate_limit(QEMUFile *f) From f8bbc1286337a8506162b5785babe6f2a7de2476 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:41 +0100 Subject: [PATCH 35/46] migration: use QEMUFile for migration channel lifetime As a start, use QEMUFile to store the destination and close it. qemu_get_fd gets a file descriptor that will be used by the write callbacks. Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- include/migration/migration.h | 7 ++++--- migration-exec.c | 21 ++------------------- migration-fd.c | 35 +++-------------------------------- migration-tcp.c | 19 +++---------------- migration-unix.c | 19 +++---------------- migration.c | 8 +++++--- savevm.c | 1 + 7 files changed, 21 insertions(+), 89 deletions(-) diff --git a/include/migration/migration.h b/include/migration/migration.h index cec8643870..1f8f305ac9 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -38,12 +38,13 @@ struct MigrationState QEMUBH *cleanup_bh; QEMUFile *file; + QEMUFile *migration_file; + int fd; - int state; int (*get_error)(MigrationState *s); - int (*close)(MigrationState *s); int (*write)(MigrationState *s, const void *buff, size_t size); - void *opaque; + + int state; MigrationParams params; int64_t total_time; int64_t downtime; diff --git a/migration-exec.c b/migration-exec.c index a2b5f8d729..8c3f72050a 100644 --- a/migration-exec.c +++ b/migration-exec.c @@ -43,33 +43,16 @@ static int file_write(MigrationState *s, const void * buf, size_t size) return write(s->fd, buf, size); } -static int exec_close(MigrationState *s) -{ - int ret = 0; - DPRINTF("exec_close\n"); - ret = qemu_fclose(s->opaque); - s->opaque = NULL; - s->fd = -1; - return ret; -} - void exec_start_outgoing_migration(MigrationState *s, const char *command, Error **errp) { - QEMUFile *f; - f = qemu_popen_cmd(command, "w"); - if (f == NULL) { + s->migration_file = qemu_popen_cmd(command, "w"); + if (s->migration_file == NULL) { error_setg_errno(errp, errno, "failed to popen the migration target"); return; } - s->opaque = f; - s->fd = qemu_get_fd(f); - assert(s->fd != -1); - - s->close = exec_close; s->get_error = file_errno; s->write = file_write; - migrate_fd_connect(s); } diff --git a/migration-fd.c b/migration-fd.c index a99e0e3971..463645794c 100644 --- a/migration-fd.c +++ b/migration-fd.c @@ -40,45 +40,16 @@ static int fd_write(MigrationState *s, const void * buf, size_t size) return write(s->fd, buf, size); } -static int fd_close(MigrationState *s) -{ - struct stat st; - int ret; - - DPRINTF("fd_close\n"); - ret = fstat(s->fd, &st); - if (ret == 0 && S_ISREG(st.st_mode)) { - /* - * If the file handle is a regular file make sure the - * data is flushed to disk before signaling success. - */ - ret = fsync(s->fd); - if (ret != 0) { - ret = -errno; - perror("migration-fd: fsync"); - return ret; - } - } - ret = close(s->fd); - s->fd = -1; - if (ret != 0) { - ret = -errno; - perror("migration-fd: close"); - } - return ret; -} - void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error **errp) { - s->fd = monitor_get_fd(cur_mon, fdname, errp); - if (s->fd == -1) { + int fd = monitor_get_fd(cur_mon, fdname, errp); + if (fd == -1) { return; } + s->migration_file = qemu_fdopen(fd, "wb"); s->get_error = fd_errno; s->write = fd_write; - s->close = fd_close; - migrate_fd_connect(s); } diff --git a/migration-tcp.c b/migration-tcp.c index 7d975b5e80..1e8e00411b 100644 --- a/migration-tcp.c +++ b/migration-tcp.c @@ -39,28 +39,17 @@ static int socket_write(MigrationState *s, const void * buf, size_t size) return send(s->fd, buf, size, 0); } -static int tcp_close(MigrationState *s) -{ - int r = 0; - DPRINTF("tcp_close\n"); - if (closesocket(s->fd) < 0) { - r = -socket_error(); - } - return r; -} - static void tcp_wait_for_connect(int fd, void *opaque) { MigrationState *s = opaque; if (fd < 0) { DPRINTF("migrate connect error\n"); - s->fd = -1; + s->migration_file = NULL; migrate_fd_error(s); } else { DPRINTF("migrate connect success\n"); - s->fd = fd; - socket_set_block(s->fd); + s->migration_file = qemu_fopen_socket(fd, "wb"); migrate_fd_connect(s); } } @@ -69,9 +58,7 @@ void tcp_start_outgoing_migration(MigrationState *s, const char *host_port, Erro { s->get_error = socket_errno; s->write = socket_write; - s->close = tcp_close; - - s->fd = inet_nonblocking_connect(host_port, tcp_wait_for_connect, s, errp); + inet_nonblocking_connect(host_port, tcp_wait_for_connect, s, errp); } static void tcp_accept_incoming_migration(void *opaque) diff --git a/migration-unix.c b/migration-unix.c index 4693b43d90..11917f4f2a 100644 --- a/migration-unix.c +++ b/migration-unix.c @@ -39,28 +39,17 @@ static int unix_write(MigrationState *s, const void * buf, size_t size) return write(s->fd, buf, size); } -static int unix_close(MigrationState *s) -{ - int r = 0; - DPRINTF("unix_close\n"); - if (close(s->fd) < 0) { - r = -errno; - } - return r; -} - static void unix_wait_for_connect(int fd, void *opaque) { MigrationState *s = opaque; if (fd < 0) { DPRINTF("migrate connect error\n"); - s->fd = -1; + s->migration_file = NULL; migrate_fd_error(s); } else { DPRINTF("migrate connect success\n"); - s->fd = fd; - socket_set_block(s->fd); + s->migration_file = qemu_fopen_socket(fd, "wb"); migrate_fd_connect(s); } } @@ -69,9 +58,7 @@ void unix_start_outgoing_migration(MigrationState *s, const char *path, Error ** { s->get_error = unix_errno; s->write = unix_write; - s->close = unix_close; - - s->fd = unix_nonblocking_connect(path, unix_wait_for_connect, s, errp); + unix_nonblocking_connect(path, unix_wait_for_connect, s, errp); } static void unix_accept_incoming_migration(void *opaque) diff --git a/migration.c b/migration.c index f35728da8d..52126d86cc 100644 --- a/migration.c +++ b/migration.c @@ -274,7 +274,7 @@ static void migrate_fd_cleanup(void *opaque) s->file = NULL; } - assert(s->fd == -1); + assert(s->migration_file == NULL); assert(s->state != MIG_STATE_ACTIVE); if (s->state != MIG_STATE_COMPLETED) { @@ -330,8 +330,9 @@ static void migrate_fd_cancel(MigrationState *s) int migrate_fd_close(MigrationState *s) { int rc = 0; - if (s->fd != -1) { - rc = s->close(s); + if (s->migration_file != NULL) { + rc = qemu_fclose(s->migration_file); + s->migration_file = NULL; s->fd = -1; } return rc; @@ -720,6 +721,7 @@ void migrate_fd_connect(MigrationState *s) s->xfer_limit = s->bandwidth_limit / XFER_LIMIT_RATIO; s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s); + s->fd = qemu_get_fd(s->migration_file); s->file = qemu_fopen_ops(s, &migration_file_ops); qemu_thread_create(&s->thread, migration_thread, s, diff --git a/savevm.c b/savevm.c index c60ace3218..1414f08c1a 100644 --- a/savevm.c +++ b/savevm.c @@ -400,6 +400,7 @@ QEMUFile *qemu_fopen_socket(int fd, const char *mode) s->fd = fd; if (mode[0] == 'w') { + socket_set_block(s->fd); s->file = qemu_fopen_ops(s, &socket_write_ops); } else { s->file = qemu_fopen_ops(s, &socket_read_ops); From e6a1cf21328802f3a83e84e893b8cb8a468141cc Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:42 +0100 Subject: [PATCH 36/46] migration: use QEMUFile for writing outgoing migration data Second, drop the file descriptor indirection, and write directly to the QEMUFile. Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- include/migration/migration.h | 4 --- migration-exec.c | 12 --------- migration-fd.c | 12 --------- migration-tcp.c | 12 --------- migration-unix.c | 12 --------- migration.c | 46 ++++++----------------------------- 6 files changed, 8 insertions(+), 90 deletions(-) diff --git a/include/migration/migration.h b/include/migration/migration.h index 1f8f305ac9..ae94706bdf 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -40,10 +40,6 @@ struct MigrationState QEMUFile *file; QEMUFile *migration_file; - int fd; - int (*get_error)(MigrationState *s); - int (*write)(MigrationState *s, const void *buff, size_t size); - int state; MigrationParams params; int64_t total_time; diff --git a/migration-exec.c b/migration-exec.c index 8c3f72050a..1c539de931 100644 --- a/migration-exec.c +++ b/migration-exec.c @@ -33,16 +33,6 @@ do { } while (0) #endif -static int file_errno(MigrationState *s) -{ - return errno; -} - -static int file_write(MigrationState *s, const void * buf, size_t size) -{ - return write(s->fd, buf, size); -} - void exec_start_outgoing_migration(MigrationState *s, const char *command, Error **errp) { s->migration_file = qemu_popen_cmd(command, "w"); @@ -51,8 +41,6 @@ void exec_start_outgoing_migration(MigrationState *s, const char *command, Error return; } - s->get_error = file_errno; - s->write = file_write; migrate_fd_connect(s); } diff --git a/migration-fd.c b/migration-fd.c index 463645794c..07c758ab6b 100644 --- a/migration-fd.c +++ b/migration-fd.c @@ -30,16 +30,6 @@ do { } while (0) #endif -static int fd_errno(MigrationState *s) -{ - return errno; -} - -static int fd_write(MigrationState *s, const void * buf, size_t size) -{ - return write(s->fd, buf, size); -} - void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error **errp) { int fd = monitor_get_fd(cur_mon, fdname, errp); @@ -48,8 +38,6 @@ void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error ** } s->migration_file = qemu_fdopen(fd, "wb"); - s->get_error = fd_errno; - s->write = fd_write; migrate_fd_connect(s); } diff --git a/migration-tcp.c b/migration-tcp.c index 1e8e00411b..5ea4f3d2b6 100644 --- a/migration-tcp.c +++ b/migration-tcp.c @@ -29,16 +29,6 @@ do { } while (0) #endif -static int socket_errno(MigrationState *s) -{ - return socket_error(); -} - -static int socket_write(MigrationState *s, const void * buf, size_t size) -{ - return send(s->fd, buf, size, 0); -} - static void tcp_wait_for_connect(int fd, void *opaque) { MigrationState *s = opaque; @@ -56,8 +46,6 @@ static void tcp_wait_for_connect(int fd, void *opaque) void tcp_start_outgoing_migration(MigrationState *s, const char *host_port, Error **errp) { - s->get_error = socket_errno; - s->write = socket_write; inet_nonblocking_connect(host_port, tcp_wait_for_connect, s, errp); } diff --git a/migration-unix.c b/migration-unix.c index 11917f4f2a..64bfa31e35 100644 --- a/migration-unix.c +++ b/migration-unix.c @@ -29,16 +29,6 @@ do { } while (0) #endif -static int unix_errno(MigrationState *s) -{ - return errno; -} - -static int unix_write(MigrationState *s, const void * buf, size_t size) -{ - return write(s->fd, buf, size); -} - static void unix_wait_for_connect(int fd, void *opaque) { MigrationState *s = opaque; @@ -56,8 +46,6 @@ static void unix_wait_for_connect(int fd, void *opaque) void unix_start_outgoing_migration(MigrationState *s, const char *path, Error **errp) { - s->get_error = unix_errno; - s->write = unix_write; unix_nonblocking_connect(path, unix_wait_for_connect, s, errp); } diff --git a/migration.c b/migration.c index 52126d86cc..681a459812 100644 --- a/migration.c +++ b/migration.c @@ -301,25 +301,6 @@ void migrate_fd_error(MigrationState *s) notifier_list_notify(&migration_state_notifiers, s); } -static ssize_t migrate_fd_put_buffer(MigrationState *s, const void *data, - size_t size) -{ - ssize_t ret; - - if (s->state != MIG_STATE_ACTIVE) { - return -EIO; - } - - do { - ret = s->write(s, data, size); - } while (ret == -1 && ((s->get_error(s)) == EINTR)); - - if (ret == -1) - ret = -(s->get_error(s)); - - return ret; -} - static void migrate_fd_cancel(MigrationState *s) { DPRINTF("cancelling migration\n"); @@ -333,7 +314,6 @@ int migrate_fd_close(MigrationState *s) if (s->migration_file != NULL) { rc = qemu_fclose(s->migration_file); s->migration_file = NULL; - s->fd = -1; } return rc; } @@ -519,8 +499,7 @@ static int migration_put_buffer(void *opaque, const uint8_t *buf, int64_t pos, int size) { MigrationState *s = opaque; - ssize_t ret; - size_t sent; + int ret; DPRINTF("putting %d bytes at %" PRId64 "\n", size, pos); @@ -528,22 +507,14 @@ static int migration_put_buffer(void *opaque, const uint8_t *buf, return size; } - sent = 0; - while (size) { - ret = migrate_fd_put_buffer(s, buf, size); - if (ret <= 0) { - DPRINTF("error flushing data, %zd\n", ret); - return ret; - } else { - DPRINTF("flushed %zd byte(s)\n", ret); - sent += ret; - buf += ret; - size -= ret; - s->bytes_xfer += ret; - } + qemu_put_buffer(s->migration_file, buf, size); + ret = qemu_file_get_error(s->migration_file); + if (ret) { + return ret; } - return sent; + s->bytes_xfer += size; + return size; } static int migration_close(void *opaque) @@ -564,7 +535,7 @@ static int migration_get_fd(void *opaque) { MigrationState *s = opaque; - return s->fd; + return qemu_get_fd(s->migration_file); } /* @@ -721,7 +692,6 @@ void migrate_fd_connect(MigrationState *s) s->xfer_limit = s->bandwidth_limit / XFER_LIMIT_RATIO; s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s); - s->fd = qemu_get_fd(s->migration_file); s->file = qemu_fopen_ops(s, &migration_file_ops); qemu_thread_create(&s->thread, migration_thread, s, From be7172e22a9c3bc448894e57f6c2d1af6ffd47fd Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:43 +0100 Subject: [PATCH 37/46] migration: use qemu_ftell to compute bandwidth Prepare for when s->bytes_xfer will be removed. Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- migration.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/migration.c b/migration.c index 681a459812..6da17461f3 100644 --- a/migration.c +++ b/migration.c @@ -589,6 +589,7 @@ static void *migration_thread(void *opaque) MigrationState *s = opaque; int64_t initial_time = qemu_get_clock_ms(rt_clock); int64_t sleep_time = 0; + int64_t initial_bytes = 0; int64_t max_size = 0; int64_t start_time = initial_time; bool old_vm_running = false; @@ -629,7 +630,7 @@ static void *migration_thread(void *opaque) } current_time = qemu_get_clock_ms(rt_clock); if (current_time >= initial_time + BUFFER_DELAY) { - uint64_t transferred_bytes = s->bytes_xfer; + uint64_t transferred_bytes = qemu_ftell(s->file) - initial_bytes; uint64_t time_spent = current_time - initial_time - sleep_time; double bandwidth = transferred_bytes / time_spent; max_size = bandwidth * migrate_max_downtime() / 1000000; @@ -646,6 +647,7 @@ static void *migration_thread(void *opaque) s->bytes_xfer = 0; sleep_time = 0; initial_time = current_time; + initial_bytes = qemu_ftell(s->file); } if (qemu_file_rate_limit(s->file)) { /* usleep expects microseconds */ From 442773cef15092b5927851237850760345d2cf16 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:44 +0100 Subject: [PATCH 38/46] migration: small changes around rate-limiting This patch extracts a few small changes from the next patch, which are unrelated to adding generic rate-limiting functionality to QEMUFile. Make migration_set_rate_limit a simple accessor, and use qemu_file_set_rate_limit consistently. Also fix a typo where INT_MAX should have been SIZE_MAX. Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- migration.c | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) diff --git a/migration.c b/migration.c index 6da17461f3..1f58d77f76 100644 --- a/migration.c +++ b/migration.c @@ -462,10 +462,15 @@ void qmp_migrate_set_speed(int64_t value, Error **errp) if (value < 0) { value = 0; } + if (value > SIZE_MAX) { + value = SIZE_MAX; + } s = migrate_get_current(); s->bandwidth_limit = value; - qemu_file_set_rate_limit(s->file, s->bandwidth_limit); + if (s->file) { + qemu_file_set_rate_limit(s->file, s->bandwidth_limit / XFER_LIMIT_RATIO); + } } void qmp_migrate_set_downtime(double value, Error **errp) @@ -567,11 +572,8 @@ static int64_t migration_set_rate_limit(void *opaque, int64_t new_rate) if (qemu_file_get_error(s->file)) { goto out; } - if (new_rate > SIZE_MAX) { - new_rate = SIZE_MAX; - } - s->xfer_limit = new_rate / XFER_LIMIT_RATIO; + s->xfer_limit = new_rate; out: return s->xfer_limit; @@ -614,7 +616,7 @@ static void *migration_thread(void *opaque) qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER); old_vm_running = runstate_is_running(); vm_stop_force_state(RUN_STATE_FINISH_MIGRATE); - s->xfer_limit = INT_MAX; + qemu_file_set_rate_limit(s->file, INT_MAX); qemu_savevm_state_complete(s->file); qemu_mutex_unlock_iothread(); if (!qemu_file_get_error(s->file)) { @@ -691,11 +693,12 @@ void migrate_fd_connect(MigrationState *s) /* This is a best 1st approximation. ns to ms */ s->expected_downtime = max_downtime/1000000; - s->xfer_limit = s->bandwidth_limit / XFER_LIMIT_RATIO; - s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s); s->file = qemu_fopen_ops(s, &migration_file_ops); + qemu_file_set_rate_limit(s->file, + s->bandwidth_limit / XFER_LIMIT_RATIO); + qemu_thread_create(&s->thread, migration_thread, s, QEMU_THREAD_JOINABLE); notifier_list_notify(&migration_state_notifiers, s); From 1964a397063967acc5ce71a2a24ed26e74824ee1 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:45 +0100 Subject: [PATCH 39/46] migration: move rate limiting to QEMUFile Rate limiting is now simply a byte counter; client call qemu_file_rate_limit() manually to determine if they have to exit. So it is possible and simple to move the functionality to QEMUFile. This makes the remaining functionality of s->file redundant; in the next patch we can remove it and write directly to s->migration_file. Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- docs/migration.txt | 20 +------------- include/migration/qemu-file.h | 18 ++----------- migration.c | 51 +---------------------------------- savevm.c | 31 +++++++++++---------- 4 files changed, 21 insertions(+), 99 deletions(-) diff --git a/docs/migration.txt b/docs/migration.txt index f3ddd2f1a8..0719a55002 100644 --- a/docs/migration.txt +++ b/docs/migration.txt @@ -55,10 +55,7 @@ QEMUFile with: QEMUFile *qemu_fopen_ops(void *opaque, QEMUFilePutBufferFunc *put_buffer, QEMUFileGetBufferFunc *get_buffer, - QEMUFileCloseFunc *close, - QEMUFileRateLimit *rate_limit, - QEMUFileSetRateLimit *set_rate_limit, - QEMUFileGetRateLimit *get_rate_limit); + QEMUFileCloseFunc *close); The functions have the following functionality: @@ -80,24 +77,9 @@ Close a file and return an error code. typedef int (QEMUFileCloseFunc)(void *opaque); -Called to determine if the file has exceeded its bandwidth allocation. The -bandwidth capping is a soft limit, not a hard limit. - -typedef int (QEMUFileRateLimit)(void *opaque); - -Called to change the current bandwidth allocation. This function must return -the new actual bandwidth. It should be new_rate if everything goes OK, and -the old rate otherwise. - -typedef size_t (QEMUFileSetRateLimit)(void *opaque, size_t new_rate); -typedef size_t (QEMUFileGetRateLimit)(void *opaque); - You can use any internal state that you need using the opaque void * pointer that is passed to all functions. -The rate limiting functions are used to limit the bandwidth used by -QEMU migration. - The important functions for us are put_buffer()/get_buffer() that allow to write/read a buffer into the QEMUFile. diff --git a/include/migration/qemu-file.h b/include/migration/qemu-file.h index 25e84613fa..df812617f8 100644 --- a/include/migration/qemu-file.h +++ b/include/migration/qemu-file.h @@ -51,26 +51,11 @@ typedef int (QEMUFileCloseFunc)(void *opaque); */ typedef int (QEMUFileGetFD)(void *opaque); -/* Called to determine if the file has exceeded its bandwidth allocation. The - * bandwidth capping is a soft limit, not a hard limit. - */ -typedef int (QEMUFileRateLimit)(void *opaque); - -/* Called to change the current bandwidth allocation. This function must return - * the new actual bandwidth. It should be new_rate if everything goes ok, and - * the old rate otherwise - */ -typedef int64_t (QEMUFileSetRateLimit)(void *opaque, int64_t new_rate); -typedef int64_t (QEMUFileGetRateLimit)(void *opaque); - typedef struct QEMUFileOps { QEMUFilePutBufferFunc *put_buffer; QEMUFileGetBufferFunc *get_buffer; QEMUFileCloseFunc *close; QEMUFileGetFD *get_fd; - QEMUFileRateLimit *rate_limit; - QEMUFileSetRateLimit *set_rate_limit; - QEMUFileGetRateLimit *get_rate_limit; } QEMUFileOps; QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops); @@ -109,7 +94,8 @@ unsigned int qemu_get_be32(QEMUFile *f); uint64_t qemu_get_be64(QEMUFile *f); int qemu_file_rate_limit(QEMUFile *f); -int64_t qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate); +void qemu_file_reset_rate_limit(QEMUFile *f); +void qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate); int64_t qemu_file_get_rate_limit(QEMUFile *f); int qemu_file_get_error(QEMUFile *f); diff --git a/migration.c b/migration.c index 1f58d77f76..4b04d50d58 100644 --- a/migration.c +++ b/migration.c @@ -518,7 +518,6 @@ static int migration_put_buffer(void *opaque, const uint8_t *buf, return ret; } - s->bytes_xfer += size; return size; } @@ -543,49 +542,6 @@ static int migration_get_fd(void *opaque) return qemu_get_fd(s->migration_file); } -/* - * The meaning of the return values is: - * 0: We can continue sending - * 1: Time to stop - * negative: There has been an error - */ -static int migration_rate_limit(void *opaque) -{ - MigrationState *s = opaque; - int ret; - - ret = qemu_file_get_error(s->file); - if (ret) { - return ret; - } - - if (s->bytes_xfer >= s->xfer_limit) { - return 1; - } - - return 0; -} - -static int64_t migration_set_rate_limit(void *opaque, int64_t new_rate) -{ - MigrationState *s = opaque; - if (qemu_file_get_error(s->file)) { - goto out; - } - - s->xfer_limit = new_rate; - -out: - return s->xfer_limit; -} - -static int64_t migration_get_rate_limit(void *opaque) -{ - MigrationState *s = opaque; - - return s->xfer_limit; -} - static void *migration_thread(void *opaque) { MigrationState *s = opaque; @@ -646,7 +602,7 @@ static void *migration_thread(void *opaque) s->expected_downtime = s->dirty_bytes_rate / bandwidth; } - s->bytes_xfer = 0; + qemu_file_reset_rate_limit(s->file); sleep_time = 0; initial_time = current_time; initial_bytes = qemu_ftell(s->file); @@ -679,9 +635,6 @@ static const QEMUFileOps migration_file_ops = { .get_fd = migration_get_fd, .put_buffer = migration_put_buffer, .close = migration_close, - .rate_limit = migration_rate_limit, - .get_rate_limit = migration_get_rate_limit, - .set_rate_limit = migration_set_rate_limit, }; void migrate_fd_connect(MigrationState *s) @@ -689,10 +642,8 @@ void migrate_fd_connect(MigrationState *s) s->state = MIG_STATE_ACTIVE; trace_migrate_set_state(MIG_STATE_ACTIVE); - s->bytes_xfer = 0; /* This is a best 1st approximation. ns to ms */ s->expected_downtime = max_downtime/1000000; - s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s); s->file = qemu_fopen_ops(s, &migration_file_ops); diff --git a/savevm.c b/savevm.c index 1414f08c1a..147e2d232e 100644 --- a/savevm.c +++ b/savevm.c @@ -119,6 +119,9 @@ struct QEMUFile { void *opaque; int is_write; + int64_t bytes_xfer; + int64_t xfer_limit; + int64_t pos; /* start of buffer when writing, end of buffer when reading */ int buf_index; @@ -479,7 +482,6 @@ QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops) f->opaque = opaque; f->ops = ops; f->is_write = 0; - return f; } @@ -605,6 +607,7 @@ void qemu_put_buffer(QEMUFile *f, const uint8_t *buf, int size) memcpy(f->buf + f->buf_index, buf, l); f->is_write = 1; f->buf_index += l; + f->bytes_xfer += l; buf += l; size -= l; if (f->buf_index >= IO_BUF_SIZE) { @@ -725,28 +728,28 @@ int64_t qemu_ftell(QEMUFile *f) int qemu_file_rate_limit(QEMUFile *f) { - if (f->ops->rate_limit) - return f->ops->rate_limit(f->opaque); - + if (qemu_file_get_error(f)) { + return 1; + } + if (f->xfer_limit > 0 && f->bytes_xfer > f->xfer_limit) { + return 1; + } return 0; } int64_t qemu_file_get_rate_limit(QEMUFile *f) { - if (f->ops->get_rate_limit) - return f->ops->get_rate_limit(f->opaque); - - return 0; + return f->xfer_limit; } -int64_t qemu_file_set_rate_limit(QEMUFile *f, int64_t new_rate) +void qemu_file_set_rate_limit(QEMUFile *f, int64_t limit) { - /* any failed or completed migration keeps its state to allow probing of - * migration data, but has no associated file anymore */ - if (f && f->ops->set_rate_limit) - return f->ops->set_rate_limit(f->opaque, new_rate); + f->xfer_limit = limit; +} - return 0; +void qemu_file_reset_rate_limit(QEMUFile *f) +{ + f->bytes_xfer = 0; } void qemu_put_be16(QEMUFile *f, unsigned int v) From 404a7c05bcc20c51fe1a9bf2deaeb4d6b658d3a3 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:46 +0100 Subject: [PATCH 40/46] migration: move contents of migration_close to migrate_fd_cleanup With this patch, the migration_file is not needed anymore. Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- migration.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/migration.c b/migration.c index 4b04d50d58..64d8e4644e 100644 --- a/migration.c +++ b/migration.c @@ -272,6 +272,12 @@ static void migrate_fd_cleanup(void *opaque) DPRINTF("closing file\n"); qemu_fclose(s->file); s->file = NULL; + + qemu_mutex_unlock_iothread(); + qemu_thread_join(&s->thread); + qemu_mutex_lock_iothread(); + + migrate_fd_close(s); } assert(s->migration_file == NULL); @@ -523,16 +529,7 @@ static int migration_put_buffer(void *opaque, const uint8_t *buf, static int migration_close(void *opaque) { - MigrationState *s = opaque; - - DPRINTF("closing\n"); - - qemu_mutex_unlock_iothread(); - qemu_thread_join(&s->thread); - qemu_mutex_lock_iothread(); - assert(s->state != MIG_STATE_ACTIVE); - - return migrate_fd_close(s); + return 0; } static int migration_get_fd(void *opaque) From b352365f5abec075dede0222f1bc37674d64117c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:47 +0100 Subject: [PATCH 41/46] migration: eliminate s->migration_file The indirection is useless now. Backends can open s->file directly. Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- include/migration/migration.h | 2 -- migration-exec.c | 4 +-- migration-fd.c | 2 +- migration-tcp.c | 4 +-- migration-unix.c | 4 +-- migration.c | 51 +++-------------------------------- 6 files changed, 11 insertions(+), 56 deletions(-) diff --git a/include/migration/migration.h b/include/migration/migration.h index ae94706bdf..bb617fdacf 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -36,9 +36,7 @@ struct MigrationState size_t xfer_limit; QemuThread thread; QEMUBH *cleanup_bh; - QEMUFile *file; - QEMUFile *migration_file; int state; MigrationParams params; diff --git a/migration-exec.c b/migration-exec.c index 1c539de931..deab4e378e 100644 --- a/migration-exec.c +++ b/migration-exec.c @@ -35,8 +35,8 @@ void exec_start_outgoing_migration(MigrationState *s, const char *command, Error **errp) { - s->migration_file = qemu_popen_cmd(command, "w"); - if (s->migration_file == NULL) { + s->file = qemu_popen_cmd(command, "w"); + if (s->file == NULL) { error_setg_errno(errp, errno, "failed to popen the migration target"); return; } diff --git a/migration-fd.c b/migration-fd.c index 07c758ab6b..3d4613cbaf 100644 --- a/migration-fd.c +++ b/migration-fd.c @@ -36,7 +36,7 @@ void fd_start_outgoing_migration(MigrationState *s, const char *fdname, Error ** if (fd == -1) { return; } - s->migration_file = qemu_fdopen(fd, "wb"); + s->file = qemu_fdopen(fd, "wb"); migrate_fd_connect(s); } diff --git a/migration-tcp.c b/migration-tcp.c index 5ea4f3d2b6..b20ee58f55 100644 --- a/migration-tcp.c +++ b/migration-tcp.c @@ -35,11 +35,11 @@ static void tcp_wait_for_connect(int fd, void *opaque) if (fd < 0) { DPRINTF("migrate connect error\n"); - s->migration_file = NULL; + s->file = NULL; migrate_fd_error(s); } else { DPRINTF("migrate connect success\n"); - s->migration_file = qemu_fopen_socket(fd, "wb"); + s->file = qemu_fopen_socket(fd, "wb"); migrate_fd_connect(s); } } diff --git a/migration-unix.c b/migration-unix.c index 64bfa31e35..94b7022fc8 100644 --- a/migration-unix.c +++ b/migration-unix.c @@ -35,11 +35,11 @@ static void unix_wait_for_connect(int fd, void *opaque) if (fd < 0) { DPRINTF("migrate connect error\n"); - s->migration_file = NULL; + s->file = NULL; migrate_fd_error(s); } else { DPRINTF("migrate connect success\n"); - s->migration_file = qemu_fopen_socket(fd, "wb"); + s->file = qemu_fopen_socket(fd, "wb"); migrate_fd_connect(s); } } diff --git a/migration.c b/migration.c index 64d8e4644e..5d048ef74c 100644 --- a/migration.c +++ b/migration.c @@ -270,9 +270,6 @@ static void migrate_fd_cleanup(void *opaque) if (s->file) { DPRINTF("closing file\n"); - qemu_fclose(s->file); - s->file = NULL; - qemu_mutex_unlock_iothread(); qemu_thread_join(&s->thread); qemu_mutex_lock_iothread(); @@ -280,7 +277,7 @@ static void migrate_fd_cleanup(void *opaque) migrate_fd_close(s); } - assert(s->migration_file == NULL); + assert(s->file == NULL); assert(s->state != MIG_STATE_ACTIVE); if (s->state != MIG_STATE_COMPLETED) { @@ -317,9 +314,9 @@ static void migrate_fd_cancel(MigrationState *s) int migrate_fd_close(MigrationState *s) { int rc = 0; - if (s->migration_file != NULL) { - rc = qemu_fclose(s->migration_file); - s->migration_file = NULL; + if (s->file != NULL) { + rc = qemu_fclose(s->file); + s->file = NULL; } return rc; } @@ -506,39 +503,6 @@ int64_t migrate_xbzrle_cache_size(void) /* migration thread support */ -static int migration_put_buffer(void *opaque, const uint8_t *buf, - int64_t pos, int size) -{ - MigrationState *s = opaque; - int ret; - - DPRINTF("putting %d bytes at %" PRId64 "\n", size, pos); - - if (size <= 0) { - return size; - } - - qemu_put_buffer(s->migration_file, buf, size); - ret = qemu_file_get_error(s->migration_file); - if (ret) { - return ret; - } - - return size; -} - -static int migration_close(void *opaque) -{ - return 0; -} - -static int migration_get_fd(void *opaque) -{ - MigrationState *s = opaque; - - return qemu_get_fd(s->migration_file); -} - static void *migration_thread(void *opaque) { MigrationState *s = opaque; @@ -628,12 +592,6 @@ static void *migration_thread(void *opaque) return NULL; } -static const QEMUFileOps migration_file_ops = { - .get_fd = migration_get_fd, - .put_buffer = migration_put_buffer, - .close = migration_close, -}; - void migrate_fd_connect(MigrationState *s) { s->state = MIG_STATE_ACTIVE; @@ -642,7 +600,6 @@ void migrate_fd_connect(MigrationState *s) /* This is a best 1st approximation. ns to ms */ s->expected_downtime = max_downtime/1000000; s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s); - s->file = qemu_fopen_ops(s, &migration_file_ops); qemu_file_set_rate_limit(s->file, s->bandwidth_limit / XFER_LIMIT_RATIO); From 6f190a0641f5b06a462b62955c15c77b8fb3990c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 22 Feb 2013 17:36:48 +0100 Subject: [PATCH 42/46] migration: inline migrate_fd_close Reviewed-by: Orit Wasserman Reviewed-by: Juan Quintela Signed-off-by: Paolo Bonzini Signed-off-by: Juan Quintela --- migration.c | 14 ++------------ 1 file changed, 2 insertions(+), 12 deletions(-) diff --git a/migration.c b/migration.c index 5d048ef74c..185d11260d 100644 --- a/migration.c +++ b/migration.c @@ -274,10 +274,10 @@ static void migrate_fd_cleanup(void *opaque) qemu_thread_join(&s->thread); qemu_mutex_lock_iothread(); - migrate_fd_close(s); + qemu_fclose(s->file); + s->file = NULL; } - assert(s->file == NULL); assert(s->state != MIG_STATE_ACTIVE); if (s->state != MIG_STATE_COMPLETED) { @@ -311,16 +311,6 @@ static void migrate_fd_cancel(MigrationState *s) migrate_finish_set_state(s, MIG_STATE_CANCELLED); } -int migrate_fd_close(MigrationState *s) -{ - int rc = 0; - if (s->file != NULL) { - rc = qemu_fclose(s->file); - s->file = NULL; - } - return rc; -} - void add_migration_state_change_notifier(Notifier *notify) { notifier_list_add(&migration_state_notifiers, notify); From 0db65d624e0211a43c011579d6607a50d8f06082 Mon Sep 17 00:00:00 2001 From: Orit Wasserman Date: Mon, 25 Feb 2013 19:12:01 +0200 Subject: [PATCH 43/46] Fix page_cache leak in cache_resize Signed-off-by: Orit Wasserman Reviewed-by: Peter Maydell Signed-off-by: Juan Quintela --- page_cache.c | 1 + 1 file changed, 1 insertion(+) diff --git a/page_cache.c b/page_cache.c index ba5640bd73..748957bc42 100644 --- a/page_cache.c +++ b/page_cache.c @@ -208,6 +208,7 @@ int64_t cache_resize(PageCache *cache, int64_t new_num_pages) } } + g_free(cache->page_cache); cache->page_cache = new_cache->page_cache; cache->max_num_items = new_cache->max_num_items; cache->num_items = new_cache->num_items; From a0ee2031dbf5f0183412d4b20a30cbfd404616a8 Mon Sep 17 00:00:00 2001 From: Orit Wasserman Date: Mon, 25 Feb 2013 19:12:02 +0200 Subject: [PATCH 44/46] Fix cache_resize to keep old entry age Instead of using cache_insert do the update itself Signed-off-by: Orit Wasserman Reviewed-by: Peter Maydell Signed-off-by: Juan Quintela --- page_cache.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) diff --git a/page_cache.c b/page_cache.c index 748957bc42..e5717d53c9 100644 --- a/page_cache.c +++ b/page_cache.c @@ -192,18 +192,17 @@ int64_t cache_resize(PageCache *cache, int64_t new_num_pages) if (old_it->it_addr != -1) { /* check for collision, if there is, keep MRU page */ new_it = cache_get_by_addr(new_cache, old_it->it_addr); - if (new_it->it_data) { + if (new_it->it_data && new_it->it_age >= old_it->it_age) { /* keep the MRU page */ - if (new_it->it_age >= old_it->it_age) { - g_free(old_it->it_data); - } else { - g_free(new_it->it_data); - new_it->it_data = old_it->it_data; - new_it->it_age = old_it->it_age; - new_it->it_addr = old_it->it_addr; - } + g_free(old_it->it_data); } else { - cache_insert(new_cache, old_it->it_addr, old_it->it_data); + if (!new_it->it_data) { + new_cache->num_items++; + } + g_free(new_it->it_data); + new_it->it_data = old_it->it_data; + new_it->it_age = old_it->it_age; + new_it->it_addr = old_it->it_addr; } } } From 32a1c08b60a8ac0e63b54a5793a26b5e32b36618 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Mon, 25 Feb 2013 19:12:03 +0200 Subject: [PATCH 45/46] page_cache: fix memory leak XBZRLE encoded migration introduced a MRU page cache meachnism. Unfortunately, cached items where never freed in case of a collision in the page cache on cache_insert(). This lead to out of memory conditions during XBZRLE migration if the page cache was small and there where a lot of collisions in the cache. Signed-off-by: Peter Lieven Signed-off-by: Orit Wasserman Reviewed-by: Peter Maydell Signed-off-by: Juan Quintela --- page_cache.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/page_cache.c b/page_cache.c index e5717d53c9..809dadc7eb 100644 --- a/page_cache.c +++ b/page_cache.c @@ -152,6 +152,9 @@ void cache_insert(PageCache *cache, uint64_t addr, uint8_t *pdata) /* actual update of entry */ it = cache_get_by_addr(cache, addr); + /* free old cached data if any */ + g_free(it->it_data); + if (!it->it_data) { cache->num_items++; } From ee0b44aa9d9450e873a761ca2030b2fa3ec52eb0 Mon Sep 17 00:00:00 2001 From: Peter Lieven Date: Mon, 25 Feb 2013 19:12:04 +0200 Subject: [PATCH 46/46] page_cache: dup memory on insert The page cache frees all data on finish, on resize and if there is collision on insert. So it should be the caches responsibility to dup the data that is stored in the cache. Signed-off-by: Peter Lieven Signed-off-by: Orit Wasserman Reviewed-by: Peter Maydell Signed-off-by: Juan Quintela --- arch_init.c | 3 +-- include/migration/page_cache.h | 3 ++- page_cache.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/arch_init.c b/arch_init.c index 6089c53386..98e2bc6f55 100644 --- a/arch_init.c +++ b/arch_init.c @@ -293,8 +293,7 @@ static int save_xbzrle_page(QEMUFile *f, uint8_t *current_data, if (!cache_is_cached(XBZRLE.cache, current_addr)) { if (!last_stage) { - cache_insert(XBZRLE.cache, current_addr, - g_memdup(current_data, TARGET_PAGE_SIZE)); + cache_insert(XBZRLE.cache, current_addr, current_data); } acct_info.xbzrle_cache_miss++; return -1; diff --git a/include/migration/page_cache.h b/include/migration/page_cache.h index 3839ac7726..87894fea9f 100644 --- a/include/migration/page_cache.h +++ b/include/migration/page_cache.h @@ -57,7 +57,8 @@ bool cache_is_cached(const PageCache *cache, uint64_t addr); uint8_t *get_cached_data(const PageCache *cache, uint64_t addr); /** - * cache_insert: insert the page into the cache. the previous value will be overwritten + * cache_insert: insert the page into the cache. the page cache + * will dup the data on insert. the previous value will be overwritten * * @cache pointer to the PageCache struct * @addr: page address diff --git a/page_cache.c b/page_cache.c index 809dadc7eb..938a79c9ea 100644 --- a/page_cache.c +++ b/page_cache.c @@ -159,7 +159,7 @@ void cache_insert(PageCache *cache, uint64_t addr, uint8_t *pdata) cache->num_items++; } - it->it_data = pdata; + it->it_data = g_memdup(pdata, cache->page_size); it->it_age = ++cache->max_item_age; it->it_addr = addr; }