From 5e32ffd346429b2e92545c425de96d92e88a7498 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Mon, 19 Jul 2021 19:52:17 +0100 Subject: [PATCH 1/7] tests/qtest/migration-test.c: use 127.0.0.1 instead of 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit OpenBSD doesn't like :0 as an address, switch to using 127.0.0.1 in baddest; it's really testing the :0 port number that isn't allowed on anything. (The test doesn't currently run anyway because of the userfault problem that Peter noticed, but this gets us closer to being able to reenable it) Signed-off-by: Dr. David Alan Gilbert Message-Id: <20210719185217.122105-1-dgilbert@redhat.com> Acked-by: Peter Xu Reviewed-by: Daniel P. Berrangé Reviewed-by: Juan Quintela Reviewed-by: Philippe Mathieu-Daude Signed-off-by: Dr. David Alan Gilbert --- tests/qtest/migration-test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index 328d6dbe97..1e8b7784ef 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -787,10 +787,10 @@ static void test_baddest(void) args->hide_stderr = true; - if (test_migrate_start(&from, &to, "tcp:0:0", args)) { + if (test_migrate_start(&from, &to, "tcp:127.0.0.1:0", args)) { return; } - migrate_qmp(from, "tcp:0:0", "{}"); + migrate_qmp(from, "tcp:127.0.0.1:0", "{}"); wait_for_migration_fail(from, false); test_migrate_end(from, to, false); } From 53021ea1659b8a9074c6f5eb6c65a4e5dddddaec Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 22 Jul 2021 13:58:37 -0400 Subject: [PATCH 2/7] migration: Fix missing join() of rp_thread It's possible that the migration thread skip the join() of the rp_thread in below race and crash on src right at finishing migration: migration_thread rp_thread ---------------- --------- migration_completion() (before rp_thread quits) from_dst_file=NULL [thread got scheduled out] s->rp_state.from_dst_file==NULL (skip join() of rp_thread) migrate_fd_cleanup() qemu_fclose(s->to_dst_file) yank_unregister_instance() assert(yank_find_entry()) <------- crash It could mostly happen with postcopy, but that shouldn't be required, e.g., I think it could also trigger with MIGRATION_CAPABILITY_RETURN_PATH set. It's suspected that above race could be the root cause of a recent (but rare) migration-test break reported by either Dave or PMM: https://lore.kernel.org/qemu-devel/YPamXAHwan%2FPPXLf@work-vm/ The issue is: from_dst_file is reset in the rp_thread, so if the thread reset it to NULL fast enough then the migration thread will assume there's no rp_thread at all. This could potentially cause more severe issue (e.g. crash) after the yank code. Fix it by using a boolean to keep "whether we've created rp_thread". Cc: Dr. David Alan Gilbert Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Peter Xu Message-Id: <20210722175841.938739-2-peterx@redhat.com> Reviewed-by: Lukas Straub Signed-off-by: Dr. David Alan Gilbert --- migration/migration.c | 4 +++- migration/migration.h | 7 +++++++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/migration/migration.c b/migration/migration.c index 2d306582eb..21b94f75a3 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -2867,6 +2867,7 @@ static int open_return_path_on_source(MigrationState *ms, qemu_thread_create(&ms->rp_state.rp_thread, "return path", source_return_path_thread, ms, QEMU_THREAD_JOINABLE); + ms->rp_state.rp_thread_created = true; trace_open_return_path_on_source_continue(); @@ -2891,6 +2892,7 @@ static int await_return_path_close_on_source(MigrationState *ms) } trace_await_return_path_close_on_source_joining(); qemu_thread_join(&ms->rp_state.rp_thread); + ms->rp_state.rp_thread_created = false; trace_await_return_path_close_on_source_close(); return ms->rp_state.error; } @@ -3170,7 +3172,7 @@ static void migration_completion(MigrationState *s) * it will wait for the destination to send it's status in * a SHUT command). */ - if (s->rp_state.from_dst_file) { + if (s->rp_state.rp_thread_created) { int rp_error; trace_migration_return_path_end_before(); rp_error = await_return_path_close_on_source(s); diff --git a/migration/migration.h b/migration/migration.h index 2ebb740dfa..c302879fad 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -195,6 +195,13 @@ struct MigrationState { QEMUFile *from_dst_file; QemuThread rp_thread; bool error; + /* + * We can also check non-zero of rp_thread, but there's no "official" + * way to do this, so this bool makes it slightly more elegant. + * Checking from_dst_file for this is racy because from_dst_file will + * be cleared in the rp_thread! + */ + bool rp_thread_created; QemuSemaphore rp_sem; } rp_state; From 43044ac0ee5758d92b639843c045123c2de578d1 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 22 Jul 2021 13:58:38 -0400 Subject: [PATCH 3/7] migration: Make from_dst_file accesses thread-safe Accessing from_dst_file is potentially racy in current code base like below: if (s->from_dst_file) do_something(s->from_dst_file); Because from_dst_file can be reset right after the check in another thread (rp_thread). One example is migrate_fd_cancel(). Use the same qemu_file_lock to protect it too, just like to_dst_file. When it's safe to access without lock, comment it. There's one special reference in migration_thread() that can be replaced by the newly introduced rp_thread_created flag. Reported-by: Dr. David Alan Gilbert Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Peter Xu Reviewed-by: Lukas Straub Message-Id: <20210722175841.938739-3-peterx@redhat.com> Signed-off-by: Dr. David Alan Gilbert with Peter's fixup --- migration/migration.c | 39 +++++++++++++++++++++++++++++---------- migration/migration.h | 8 +++++--- migration/ram.c | 1 + 3 files changed, 35 insertions(+), 13 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 21b94f75a3..62dbcb7d52 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1879,9 +1879,11 @@ static void migrate_fd_cancel(MigrationState *s) QEMUFile *f = migrate_get_current()->to_dst_file; trace_migrate_fd_cancel(); - if (s->rp_state.from_dst_file) { - /* shutdown the rp socket, so causing the rp thread to shutdown */ - qemu_file_shutdown(s->rp_state.from_dst_file); + WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) { + if (s->rp_state.from_dst_file) { + /* shutdown the rp socket, so causing the rp thread to shutdown */ + qemu_file_shutdown(s->rp_state.from_dst_file); + } } do { @@ -2686,6 +2688,23 @@ static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value) return 0; } +/* Release ms->rp_state.from_dst_file in a safe way */ +static void migration_release_from_dst_file(MigrationState *ms) +{ + QEMUFile *file; + + WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) { + /* + * Reset the from_dst_file pointer first before releasing it, as we + * can't block within lock section + */ + file = ms->rp_state.from_dst_file; + ms->rp_state.from_dst_file = NULL; + } + + qemu_fclose(file); +} + /* * Handles messages sent on the return path towards the source VM * @@ -2827,11 +2846,13 @@ out: * Maybe there is something we can do: it looks like a * network down issue, and we pause for a recovery. */ - qemu_fclose(rp); - ms->rp_state.from_dst_file = NULL; + migration_release_from_dst_file(ms); rp = NULL; if (postcopy_pause_return_path_thread(ms)) { - /* Reload rp, reset the rest */ + /* + * Reload rp, reset the rest. Referencing it is safe since + * it's reset only by us above, or when migration completes + */ rp = ms->rp_state.from_dst_file; ms->rp_state.error = false; goto retry; @@ -2843,8 +2864,7 @@ out: } trace_source_return_path_thread_end(); - ms->rp_state.from_dst_file = NULL; - qemu_fclose(rp); + migration_release_from_dst_file(ms); rcu_unregister_thread(); return NULL; } @@ -2852,7 +2872,6 @@ out: static int open_return_path_on_source(MigrationState *ms, bool create_thread) { - ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file); if (!ms->rp_state.from_dst_file) { return -1; @@ -3746,7 +3765,7 @@ static void *migration_thread(void *opaque) * If we opened the return path, we need to make sure dst has it * opened as well. */ - if (s->rp_state.from_dst_file) { + if (s->rp_state.rp_thread_created) { /* Now tell the dest that it should open its end so it can reply */ qemu_savevm_send_open_return_path(s->to_dst_file); diff --git a/migration/migration.h b/migration/migration.h index c302879fad..7a5aa8c2fd 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -154,12 +154,13 @@ struct MigrationState { QemuThread thread; QEMUBH *vm_start_bh; QEMUBH *cleanup_bh; + /* Protected by qemu_file_lock */ QEMUFile *to_dst_file; QIOChannelBuffer *bioc; /* - * Protects to_dst_file pointer. We need to make sure we won't - * yield or hang during the critical section, since this lock will - * be used in OOB command handler. + * Protects to_dst_file/from_dst_file pointers. We need to make sure we + * won't yield or hang during the critical section, since this lock will be + * used in OOB command handler. */ QemuMutex qemu_file_lock; @@ -192,6 +193,7 @@ struct MigrationState { /* State related to return path */ struct { + /* Protected by qemu_file_lock */ QEMUFile *from_dst_file; QemuThread rp_thread; bool error; diff --git a/migration/ram.c b/migration/ram.c index b5fc454b2f..f728f5072f 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -4012,6 +4012,7 @@ static void ram_dirty_bitmap_reload_notify(MigrationState *s) int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block) { int ret = -EINVAL; + /* from_dst_file is always valid because we're within rp_thread */ QEMUFile *file = s->rp_state.from_dst_file; unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS; uint64_t local_size = DIV_ROUND_UP(nbits, 8); From 18711405b506e7ca3822ed19830f1c562e0247f9 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 22 Jul 2021 13:58:39 -0400 Subject: [PATCH 4/7] migration: Introduce migration_ioc_[un]register_yank() There're plenty of places in migration/* that checks against either socket or tls typed ioc for yank operations. Provide two helpers to hide all these information. Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Peter Xu Message-Id: <20210722175841.938739-4-peterx@redhat.com> Reviewed-by: Lukas Straub Signed-off-by: Dr. David Alan Gilbert --- migration/channel.c | 15 ++------------- migration/multifd.c | 8 ++------ migration/qemu-file-channel.c | 8 ++------ migration/yank_functions.c | 28 ++++++++++++++++++++++++++++ migration/yank_functions.h | 2 ++ 5 files changed, 36 insertions(+), 25 deletions(-) diff --git a/migration/channel.c b/migration/channel.c index 01275a9162..c4fc000a1a 100644 --- a/migration/channel.c +++ b/migration/channel.c @@ -44,13 +44,7 @@ void migration_channel_process_incoming(QIOChannel *ioc) TYPE_QIO_CHANNEL_TLS)) { migration_tls_channel_process_incoming(s, ioc, &local_err); } else { - if (object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET) || - object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS)) { - yank_register_function(MIGRATION_YANK_INSTANCE, - migration_yank_iochannel, - QIO_CHANNEL(ioc)); - } - + migration_ioc_register_yank(ioc); migration_ioc_process_incoming(ioc, &local_err); } @@ -94,12 +88,7 @@ void migration_channel_connect(MigrationState *s, } else { QEMUFile *f = qemu_fopen_channel_output(ioc); - if (object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET) || - object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS)) { - yank_register_function(MIGRATION_YANK_INSTANCE, - migration_yank_iochannel, - QIO_CHANNEL(ioc)); - } + migration_ioc_register_yank(ioc); qemu_mutex_lock(&s->qemu_file_lock); s->to_dst_file = f; diff --git a/migration/multifd.c b/migration/multifd.c index ab41590e71..377da78f5b 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -987,12 +987,8 @@ int multifd_load_cleanup(Error **errp) for (i = 0; i < migrate_multifd_channels(); i++) { MultiFDRecvParams *p = &multifd_recv_state->params[i]; - if ((object_dynamic_cast(OBJECT(p->c), TYPE_QIO_CHANNEL_SOCKET) || - object_dynamic_cast(OBJECT(p->c), TYPE_QIO_CHANNEL_TLS)) - && OBJECT(p->c)->ref == 1) { - yank_unregister_function(MIGRATION_YANK_INSTANCE, - migration_yank_iochannel, - QIO_CHANNEL(p->c)); + if (OBJECT(p->c)->ref == 1) { + migration_ioc_unregister_yank(p->c); } object_unref(OBJECT(p->c)); diff --git a/migration/qemu-file-channel.c b/migration/qemu-file-channel.c index fad340ea7a..867a5ed0c3 100644 --- a/migration/qemu-file-channel.c +++ b/migration/qemu-file-channel.c @@ -107,12 +107,8 @@ static int channel_close(void *opaque, Error **errp) int ret; QIOChannel *ioc = QIO_CHANNEL(opaque); ret = qio_channel_close(ioc, errp); - if ((object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET) || - object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS)) - && OBJECT(ioc)->ref == 1) { - yank_unregister_function(MIGRATION_YANK_INSTANCE, - migration_yank_iochannel, - QIO_CHANNEL(ioc)); + if (OBJECT(ioc)->ref == 1) { + migration_ioc_unregister_yank(ioc); } object_unref(OBJECT(ioc)); return ret; diff --git a/migration/yank_functions.c b/migration/yank_functions.c index 96c90e17dc..23697173ae 100644 --- a/migration/yank_functions.c +++ b/migration/yank_functions.c @@ -11,6 +11,9 @@ #include "qapi/error.h" #include "io/channel.h" #include "yank_functions.h" +#include "qemu/yank.h" +#include "io/channel-socket.h" +#include "io/channel-tls.h" void migration_yank_iochannel(void *opaque) { @@ -18,3 +21,28 @@ void migration_yank_iochannel(void *opaque) qio_channel_shutdown(ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); } + +/* Return whether yank is supported on this ioc */ +static bool migration_ioc_yank_supported(QIOChannel *ioc) +{ + return object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET) || + object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS); +} + +void migration_ioc_register_yank(QIOChannel *ioc) +{ + if (migration_ioc_yank_supported(ioc)) { + yank_register_function(MIGRATION_YANK_INSTANCE, + migration_yank_iochannel, + QIO_CHANNEL(ioc)); + } +} + +void migration_ioc_unregister_yank(QIOChannel *ioc) +{ + if (migration_ioc_yank_supported(ioc)) { + yank_unregister_function(MIGRATION_YANK_INSTANCE, + migration_yank_iochannel, + QIO_CHANNEL(ioc)); + } +} diff --git a/migration/yank_functions.h b/migration/yank_functions.h index 055ea22523..74c7f18c91 100644 --- a/migration/yank_functions.h +++ b/migration/yank_functions.h @@ -15,3 +15,5 @@ * @opaque: QIOChannel to shutdown */ void migration_yank_iochannel(void *opaque); +void migration_ioc_register_yank(QIOChannel *ioc); +void migration_ioc_unregister_yank(QIOChannel *ioc); From c6ad5be7ae5b8c6bbbd0c592bbf18d4a78540516 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 22 Jul 2021 13:58:40 -0400 Subject: [PATCH 5/7] migration: Teach QEMUFile to be QIOChannel-aware migration uses QIOChannel typed qemufiles. In follow up patches, we'll need the capability to identify this fact, so that we can get the backing QIOChannel from a QEMUFile. We can also define types for QEMUFile but so far since we only need to be able to identify QIOChannel, introduce a boolean which is simpler. Introduce another helper qemu_file_get_ioc() to return the ioc backend of a qemufile if has_ioc is set. No functional change. Reviewed-by: Dr. David Alan Gilbert Signed-off-by: Peter Xu Message-Id: <20210722175841.938739-5-peterx@redhat.com> Reviewed-by: Lukas Straub Signed-off-by: Dr. David Alan Gilbert --- migration/qemu-file-channel.c | 4 ++-- migration/qemu-file.c | 17 ++++++++++++++++- migration/qemu-file.h | 4 +++- migration/ram.c | 2 +- migration/savevm.c | 4 ++-- 5 files changed, 24 insertions(+), 7 deletions(-) diff --git a/migration/qemu-file-channel.c b/migration/qemu-file-channel.c index 867a5ed0c3..2f8b1fcd46 100644 --- a/migration/qemu-file-channel.c +++ b/migration/qemu-file-channel.c @@ -187,11 +187,11 @@ static const QEMUFileOps channel_output_ops = { QEMUFile *qemu_fopen_channel_input(QIOChannel *ioc) { object_ref(OBJECT(ioc)); - return qemu_fopen_ops(ioc, &channel_input_ops); + return qemu_fopen_ops(ioc, &channel_input_ops, true); } QEMUFile *qemu_fopen_channel_output(QIOChannel *ioc) { object_ref(OBJECT(ioc)); - return qemu_fopen_ops(ioc, &channel_output_ops); + return qemu_fopen_ops(ioc, &channel_output_ops, true); } diff --git a/migration/qemu-file.c b/migration/qemu-file.c index 1eacf9e831..6338d8e2ff 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -55,6 +55,8 @@ struct QEMUFile { Error *last_error_obj; /* has the file has been shutdown */ bool shutdown; + /* Whether opaque points to a QIOChannel */ + bool has_ioc; }; /* @@ -101,7 +103,7 @@ bool qemu_file_mode_is_not_valid(const char *mode) return false; } -QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops) +QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops, bool has_ioc) { QEMUFile *f; @@ -109,6 +111,7 @@ QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops) f->opaque = opaque; f->ops = ops; + f->has_ioc = has_ioc; return f; } @@ -851,3 +854,15 @@ void qemu_file_set_blocking(QEMUFile *f, bool block) f->ops->set_blocking(f->opaque, block, NULL); } } + +/* + * Return the ioc object if it's a migration channel. Note: it can return NULL + * for callers passing in a non-migration qemufile. E.g. see qemu_fopen_bdrv() + * and its usage in e.g. load_snapshot(). So we need to check against NULL + * before using it. If without the check, migration_incoming_state_destroy() + * could fail for load_snapshot(). + */ +QIOChannel *qemu_file_get_ioc(QEMUFile *file) +{ + return file->has_ioc ? QIO_CHANNEL(file->opaque) : NULL; +} diff --git a/migration/qemu-file.h b/migration/qemu-file.h index a9b6d6ccb7..3f36d4dc8c 100644 --- a/migration/qemu-file.h +++ b/migration/qemu-file.h @@ -27,6 +27,7 @@ #include #include "exec/cpu-common.h" +#include "io/channel.h" /* Read a chunk of data from a file at the given position. The pos argument * can be ignored if the file is only be used for streaming. The number of @@ -119,7 +120,7 @@ typedef struct QEMUFileHooks { QEMURamSaveFunc *save_page; } QEMUFileHooks; -QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops); +QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops, bool has_ioc); void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks); int qemu_get_fd(QEMUFile *f); int qemu_fclose(QEMUFile *f); @@ -179,5 +180,6 @@ void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data); size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, ram_addr_t offset, size_t size, uint64_t *bytes_sent); +QIOChannel *qemu_file_get_ioc(QEMUFile *file); #endif diff --git a/migration/ram.c b/migration/ram.c index f728f5072f..08b3cb7a4a 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -550,7 +550,7 @@ static int compress_threads_save_setup(void) /* comp_param[i].file is just used as a dummy buffer to save data, * set its ops to empty. */ - comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops); + comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops, false); comp_param[i].done = true; comp_param[i].quit = false; qemu_mutex_init(&comp_param[i].mutex); diff --git a/migration/savevm.c b/migration/savevm.c index 72848b946c..96b5e5d639 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -168,9 +168,9 @@ static const QEMUFileOps bdrv_write_ops = { static QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int is_writable) { if (is_writable) { - return qemu_fopen_ops(bs, &bdrv_write_ops); + return qemu_fopen_ops(bs, &bdrv_write_ops, false); } - return qemu_fopen_ops(bs, &bdrv_read_ops); + return qemu_fopen_ops(bs, &bdrv_read_ops, false); } From 39675ffffb3394d44b880d083a214c5e44786170 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Thu, 22 Jul 2021 13:58:41 -0400 Subject: [PATCH 6/7] migration: Move the yank unregister of channel_close out It's efficient, but hackish to call yank unregister calls in channel_close(), especially it'll be hard to debug when qemu crashed with some yank function leaked. Remove that hack, but instead explicitly unregister yank functions at the places where needed, they are: (on src) - migrate_fd_cleanup - postcopy_pause (on dst) - migration_incoming_state_destroy - postcopy_pause_incoming Signed-off-by: Peter Xu Message-Id: <20210722175841.938739-6-peterx@redhat.com> Reviewed-by: Lukas Straub Signed-off-by: Dr. David Alan Gilbert --- migration/migration.c | 14 +++++++++++++- migration/qemu-file-channel.c | 3 --- migration/savevm.c | 7 +++++++ migration/yank_functions.c | 14 ++++++++++++++ migration/yank_functions.h | 1 + 5 files changed, 35 insertions(+), 4 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 62dbcb7d52..041b8451a6 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -59,6 +59,7 @@ #include "multifd.h" #include "qemu/yank.h" #include "sysemu/cpus.h" +#include "yank_functions.h" #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ @@ -273,6 +274,7 @@ void migration_incoming_state_destroy(void) } if (mis->from_src_file) { + migration_ioc_unregister_yank_from_file(mis->from_src_file); qemu_fclose(mis->from_src_file); mis->from_src_file = NULL; } @@ -1811,6 +1813,7 @@ static void migrate_fd_cleanup(MigrationState *s) * Close the file handle without the lock to make sure the * critical section won't block for long. */ + migration_ioc_unregister_yank_from_file(tmp); qemu_fclose(tmp); } @@ -3351,8 +3354,17 @@ static MigThrError postcopy_pause(MigrationState *s) while (true) { QEMUFile *file; - /* Current channel is possibly broken. Release it. */ + /* + * Current channel is possibly broken. Release it. Note that this is + * guaranteed even without lock because to_dst_file should only be + * modified by the migration thread. That also guarantees that the + * unregister of yank is safe too without the lock. It should be safe + * even to be within the qemu_file_lock, but we didn't do that to avoid + * taking more mutex (yank_lock) within qemu_file_lock. TL;DR: we make + * the qemu_file_lock critical section as small as possible. + */ assert(s->to_dst_file); + migration_ioc_unregister_yank_from_file(s->to_dst_file); qemu_mutex_lock(&s->qemu_file_lock); file = s->to_dst_file; s->to_dst_file = NULL; diff --git a/migration/qemu-file-channel.c b/migration/qemu-file-channel.c index 2f8b1fcd46..bb5a5752df 100644 --- a/migration/qemu-file-channel.c +++ b/migration/qemu-file-channel.c @@ -107,9 +107,6 @@ static int channel_close(void *opaque, Error **errp) int ret; QIOChannel *ioc = QIO_CHANNEL(opaque); ret = qio_channel_close(ioc, errp); - if (OBJECT(ioc)->ref == 1) { - migration_ioc_unregister_yank(ioc); - } object_unref(OBJECT(ioc)); return ret; } diff --git a/migration/savevm.c b/migration/savevm.c index 96b5e5d639..7b7b64bd13 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -65,6 +65,7 @@ #include "qemu/bitmap.h" #include "net/announce.h" #include "qemu/yank.h" +#include "yank_functions.h" const unsigned int postcopy_ram_discard_version; @@ -2568,6 +2569,12 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis) /* Clear the triggered bit to allow one recovery */ mis->postcopy_recover_triggered = false; + /* + * Unregister yank with either from/to src would work, since ioc behind it + * is the same + */ + migration_ioc_unregister_yank_from_file(mis->from_src_file); + assert(mis->from_src_file); qemu_file_shutdown(mis->from_src_file); qemu_fclose(mis->from_src_file); diff --git a/migration/yank_functions.c b/migration/yank_functions.c index 23697173ae..8c08aef14a 100644 --- a/migration/yank_functions.c +++ b/migration/yank_functions.c @@ -14,6 +14,7 @@ #include "qemu/yank.h" #include "io/channel-socket.h" #include "io/channel-tls.h" +#include "qemu-file.h" void migration_yank_iochannel(void *opaque) { @@ -46,3 +47,16 @@ void migration_ioc_unregister_yank(QIOChannel *ioc) QIO_CHANNEL(ioc)); } } + +void migration_ioc_unregister_yank_from_file(QEMUFile *file) +{ + QIOChannel *ioc = qemu_file_get_ioc(file); + + if (ioc) { + /* + * For migration qemufiles, we'll always reach here. Though we'll skip + * calls from e.g. savevm/loadvm as they don't use yank. + */ + migration_ioc_unregister_yank(ioc); + } +} diff --git a/migration/yank_functions.h b/migration/yank_functions.h index 74c7f18c91..a7577955ed 100644 --- a/migration/yank_functions.h +++ b/migration/yank_functions.h @@ -17,3 +17,4 @@ void migration_yank_iochannel(void *opaque); void migration_ioc_register_yank(QIOChannel *ioc); void migration_ioc_unregister_yank(QIOChannel *ioc); +void migration_ioc_unregister_yank_from_file(QEMUFile *file); From 3143577d6a3f363514219c03d936e653ede44f32 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Thu, 22 Jul 2021 04:30:55 -0400 Subject: [PATCH 7/7] migration: clear the memory region dirty bitmap when skipping free pages When skipping free pages to send, their corresponding dirty bits in the memory region dirty bitmap need to be cleared. Otherwise the skipped pages will be sent in the next round after the migration thread syncs dirty bits from the memory region dirty bitmap. Cc: David Hildenbrand Cc: Peter Xu Cc: Michael S. Tsirkin Reported-by: David Hildenbrand Signed-off-by: Wei Wang Message-Id: <20210722083055.23352-1-wei.w.wang@intel.com> Reviewed-by: David Hildenbrand Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 74 +++++++++++++++++++++++++++++++++++++------------ 1 file changed, 56 insertions(+), 18 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index 08b3cb7a4a..7a43bfd7af 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -789,6 +789,53 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb, return find_next_bit(bitmap, size, start); } +static void migration_clear_memory_region_dirty_bitmap(RAMState *rs, + RAMBlock *rb, + unsigned long page) +{ + uint8_t shift; + hwaddr size, start; + + if (!rb->clear_bmap || !clear_bmap_test_and_clear(rb, page)) { + return; + } + + shift = rb->clear_bmap_shift; + /* + * CLEAR_BITMAP_SHIFT_MIN should always guarantee this... this + * can make things easier sometimes since then start address + * of the small chunk will always be 64 pages aligned so the + * bitmap will always be aligned to unsigned long. We should + * even be able to remove this restriction but I'm simply + * keeping it. + */ + assert(shift >= 6); + + size = 1ULL << (TARGET_PAGE_BITS + shift); + start = (((ram_addr_t)page) << TARGET_PAGE_BITS) & (-size); + trace_migration_bitmap_clear_dirty(rb->idstr, start, size, page); + memory_region_clear_dirty_bitmap(rb->mr, start, size); +} + +static void +migration_clear_memory_region_dirty_bitmap_range(RAMState *rs, + RAMBlock *rb, + unsigned long start, + unsigned long npages) +{ + unsigned long i, chunk_pages = 1UL << rb->clear_bmap_shift; + unsigned long chunk_start = QEMU_ALIGN_DOWN(start, chunk_pages); + unsigned long chunk_end = QEMU_ALIGN_UP(start + npages, chunk_pages); + + /* + * Clear pages from start to start + npages - 1, so the end boundary is + * exclusive. + */ + for (i = chunk_start; i < chunk_end; i += chunk_pages) { + migration_clear_memory_region_dirty_bitmap(rs, rb, i); + } +} + static inline bool migration_bitmap_clear_dirty(RAMState *rs, RAMBlock *rb, unsigned long page) @@ -803,26 +850,9 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs, * the page in the chunk we clear the remote dirty bitmap for all. * Clearing it earlier won't be a problem, but too late will. */ - if (rb->clear_bmap && clear_bmap_test_and_clear(rb, page)) { - uint8_t shift = rb->clear_bmap_shift; - hwaddr size = 1ULL << (TARGET_PAGE_BITS + shift); - hwaddr start = (((ram_addr_t)page) << TARGET_PAGE_BITS) & (-size); - - /* - * CLEAR_BITMAP_SHIFT_MIN should always guarantee this... this - * can make things easier sometimes since then start address - * of the small chunk will always be 64 pages aligned so the - * bitmap will always be aligned to unsigned long. We should - * even be able to remove this restriction but I'm simply - * keeping it. - */ - assert(shift >= 6); - trace_migration_bitmap_clear_dirty(rb->idstr, start, size, page); - memory_region_clear_dirty_bitmap(rb->mr, start, size); - } + migration_clear_memory_region_dirty_bitmap(rs, rb, page); ret = test_and_clear_bit(page, rb->bmap); - if (ret) { rs->migration_dirty_pages--; } @@ -2741,6 +2771,14 @@ void qemu_guest_free_page_hint(void *addr, size_t len) npages = used_len >> TARGET_PAGE_BITS; qemu_mutex_lock(&ram_state->bitmap_mutex); + /* + * The skipped free pages are equavalent to be sent from clear_bmap's + * perspective, so clear the bits from the memory region bitmap which + * are initially set. Otherwise those skipped pages will be sent in + * the next round after syncing from the memory region bitmap. + */ + migration_clear_memory_region_dirty_bitmap_range(ram_state, block, + start, npages); ram_state->migration_dirty_pages -= bitmap_count_one_with_offset(block->bmap, start, npages); bitmap_clear(block->bmap, start, npages);