diff --git a/migration/channel.c b/migration/channel.c index 01275a9162..c4fc000a1a 100644 --- a/migration/channel.c +++ b/migration/channel.c @@ -44,13 +44,7 @@ void migration_channel_process_incoming(QIOChannel *ioc) TYPE_QIO_CHANNEL_TLS)) { migration_tls_channel_process_incoming(s, ioc, &local_err); } else { - if (object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET) || - object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS)) { - yank_register_function(MIGRATION_YANK_INSTANCE, - migration_yank_iochannel, - QIO_CHANNEL(ioc)); - } - + migration_ioc_register_yank(ioc); migration_ioc_process_incoming(ioc, &local_err); } @@ -94,12 +88,7 @@ void migration_channel_connect(MigrationState *s, } else { QEMUFile *f = qemu_fopen_channel_output(ioc); - if (object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET) || - object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS)) { - yank_register_function(MIGRATION_YANK_INSTANCE, - migration_yank_iochannel, - QIO_CHANNEL(ioc)); - } + migration_ioc_register_yank(ioc); qemu_mutex_lock(&s->qemu_file_lock); s->to_dst_file = f; diff --git a/migration/migration.c b/migration/migration.c index 2d306582eb..041b8451a6 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -59,6 +59,7 @@ #include "multifd.h" #include "qemu/yank.h" #include "sysemu/cpus.h" +#include "yank_functions.h" #define MAX_THROTTLE (128 << 20) /* Migration transfer speed throttling */ @@ -273,6 +274,7 @@ void migration_incoming_state_destroy(void) } if (mis->from_src_file) { + migration_ioc_unregister_yank_from_file(mis->from_src_file); qemu_fclose(mis->from_src_file); mis->from_src_file = NULL; } @@ -1811,6 +1813,7 @@ static void migrate_fd_cleanup(MigrationState *s) * Close the file handle without the lock to make sure the * critical section won't block for long. */ + migration_ioc_unregister_yank_from_file(tmp); qemu_fclose(tmp); } @@ -1879,9 +1882,11 @@ static void migrate_fd_cancel(MigrationState *s) QEMUFile *f = migrate_get_current()->to_dst_file; trace_migrate_fd_cancel(); - if (s->rp_state.from_dst_file) { - /* shutdown the rp socket, so causing the rp thread to shutdown */ - qemu_file_shutdown(s->rp_state.from_dst_file); + WITH_QEMU_LOCK_GUARD(&s->qemu_file_lock) { + if (s->rp_state.from_dst_file) { + /* shutdown the rp socket, so causing the rp thread to shutdown */ + qemu_file_shutdown(s->rp_state.from_dst_file); + } } do { @@ -2686,6 +2691,23 @@ static int migrate_handle_rp_resume_ack(MigrationState *s, uint32_t value) return 0; } +/* Release ms->rp_state.from_dst_file in a safe way */ +static void migration_release_from_dst_file(MigrationState *ms) +{ + QEMUFile *file; + + WITH_QEMU_LOCK_GUARD(&ms->qemu_file_lock) { + /* + * Reset the from_dst_file pointer first before releasing it, as we + * can't block within lock section + */ + file = ms->rp_state.from_dst_file; + ms->rp_state.from_dst_file = NULL; + } + + qemu_fclose(file); +} + /* * Handles messages sent on the return path towards the source VM * @@ -2827,11 +2849,13 @@ out: * Maybe there is something we can do: it looks like a * network down issue, and we pause for a recovery. */ - qemu_fclose(rp); - ms->rp_state.from_dst_file = NULL; + migration_release_from_dst_file(ms); rp = NULL; if (postcopy_pause_return_path_thread(ms)) { - /* Reload rp, reset the rest */ + /* + * Reload rp, reset the rest. Referencing it is safe since + * it's reset only by us above, or when migration completes + */ rp = ms->rp_state.from_dst_file; ms->rp_state.error = false; goto retry; @@ -2843,8 +2867,7 @@ out: } trace_source_return_path_thread_end(); - ms->rp_state.from_dst_file = NULL; - qemu_fclose(rp); + migration_release_from_dst_file(ms); rcu_unregister_thread(); return NULL; } @@ -2852,7 +2875,6 @@ out: static int open_return_path_on_source(MigrationState *ms, bool create_thread) { - ms->rp_state.from_dst_file = qemu_file_get_return_path(ms->to_dst_file); if (!ms->rp_state.from_dst_file) { return -1; @@ -2867,6 +2889,7 @@ static int open_return_path_on_source(MigrationState *ms, qemu_thread_create(&ms->rp_state.rp_thread, "return path", source_return_path_thread, ms, QEMU_THREAD_JOINABLE); + ms->rp_state.rp_thread_created = true; trace_open_return_path_on_source_continue(); @@ -2891,6 +2914,7 @@ static int await_return_path_close_on_source(MigrationState *ms) } trace_await_return_path_close_on_source_joining(); qemu_thread_join(&ms->rp_state.rp_thread); + ms->rp_state.rp_thread_created = false; trace_await_return_path_close_on_source_close(); return ms->rp_state.error; } @@ -3170,7 +3194,7 @@ static void migration_completion(MigrationState *s) * it will wait for the destination to send it's status in * a SHUT command). */ - if (s->rp_state.from_dst_file) { + if (s->rp_state.rp_thread_created) { int rp_error; trace_migration_return_path_end_before(); rp_error = await_return_path_close_on_source(s); @@ -3330,8 +3354,17 @@ static MigThrError postcopy_pause(MigrationState *s) while (true) { QEMUFile *file; - /* Current channel is possibly broken. Release it. */ + /* + * Current channel is possibly broken. Release it. Note that this is + * guaranteed even without lock because to_dst_file should only be + * modified by the migration thread. That also guarantees that the + * unregister of yank is safe too without the lock. It should be safe + * even to be within the qemu_file_lock, but we didn't do that to avoid + * taking more mutex (yank_lock) within qemu_file_lock. TL;DR: we make + * the qemu_file_lock critical section as small as possible. + */ assert(s->to_dst_file); + migration_ioc_unregister_yank_from_file(s->to_dst_file); qemu_mutex_lock(&s->qemu_file_lock); file = s->to_dst_file; s->to_dst_file = NULL; @@ -3744,7 +3777,7 @@ static void *migration_thread(void *opaque) * If we opened the return path, we need to make sure dst has it * opened as well. */ - if (s->rp_state.from_dst_file) { + if (s->rp_state.rp_thread_created) { /* Now tell the dest that it should open its end so it can reply */ qemu_savevm_send_open_return_path(s->to_dst_file); diff --git a/migration/migration.h b/migration/migration.h index 2ebb740dfa..7a5aa8c2fd 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -154,12 +154,13 @@ struct MigrationState { QemuThread thread; QEMUBH *vm_start_bh; QEMUBH *cleanup_bh; + /* Protected by qemu_file_lock */ QEMUFile *to_dst_file; QIOChannelBuffer *bioc; /* - * Protects to_dst_file pointer. We need to make sure we won't - * yield or hang during the critical section, since this lock will - * be used in OOB command handler. + * Protects to_dst_file/from_dst_file pointers. We need to make sure we + * won't yield or hang during the critical section, since this lock will be + * used in OOB command handler. */ QemuMutex qemu_file_lock; @@ -192,9 +193,17 @@ struct MigrationState { /* State related to return path */ struct { + /* Protected by qemu_file_lock */ QEMUFile *from_dst_file; QemuThread rp_thread; bool error; + /* + * We can also check non-zero of rp_thread, but there's no "official" + * way to do this, so this bool makes it slightly more elegant. + * Checking from_dst_file for this is racy because from_dst_file will + * be cleared in the rp_thread! + */ + bool rp_thread_created; QemuSemaphore rp_sem; } rp_state; diff --git a/migration/multifd.c b/migration/multifd.c index ab41590e71..377da78f5b 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -987,12 +987,8 @@ int multifd_load_cleanup(Error **errp) for (i = 0; i < migrate_multifd_channels(); i++) { MultiFDRecvParams *p = &multifd_recv_state->params[i]; - if ((object_dynamic_cast(OBJECT(p->c), TYPE_QIO_CHANNEL_SOCKET) || - object_dynamic_cast(OBJECT(p->c), TYPE_QIO_CHANNEL_TLS)) - && OBJECT(p->c)->ref == 1) { - yank_unregister_function(MIGRATION_YANK_INSTANCE, - migration_yank_iochannel, - QIO_CHANNEL(p->c)); + if (OBJECT(p->c)->ref == 1) { + migration_ioc_unregister_yank(p->c); } object_unref(OBJECT(p->c)); diff --git a/migration/qemu-file-channel.c b/migration/qemu-file-channel.c index fad340ea7a..bb5a5752df 100644 --- a/migration/qemu-file-channel.c +++ b/migration/qemu-file-channel.c @@ -107,13 +107,6 @@ static int channel_close(void *opaque, Error **errp) int ret; QIOChannel *ioc = QIO_CHANNEL(opaque); ret = qio_channel_close(ioc, errp); - if ((object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET) || - object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS)) - && OBJECT(ioc)->ref == 1) { - yank_unregister_function(MIGRATION_YANK_INSTANCE, - migration_yank_iochannel, - QIO_CHANNEL(ioc)); - } object_unref(OBJECT(ioc)); return ret; } @@ -191,11 +184,11 @@ static const QEMUFileOps channel_output_ops = { QEMUFile *qemu_fopen_channel_input(QIOChannel *ioc) { object_ref(OBJECT(ioc)); - return qemu_fopen_ops(ioc, &channel_input_ops); + return qemu_fopen_ops(ioc, &channel_input_ops, true); } QEMUFile *qemu_fopen_channel_output(QIOChannel *ioc) { object_ref(OBJECT(ioc)); - return qemu_fopen_ops(ioc, &channel_output_ops); + return qemu_fopen_ops(ioc, &channel_output_ops, true); } diff --git a/migration/qemu-file.c b/migration/qemu-file.c index 1eacf9e831..6338d8e2ff 100644 --- a/migration/qemu-file.c +++ b/migration/qemu-file.c @@ -55,6 +55,8 @@ struct QEMUFile { Error *last_error_obj; /* has the file has been shutdown */ bool shutdown; + /* Whether opaque points to a QIOChannel */ + bool has_ioc; }; /* @@ -101,7 +103,7 @@ bool qemu_file_mode_is_not_valid(const char *mode) return false; } -QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops) +QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops, bool has_ioc) { QEMUFile *f; @@ -109,6 +111,7 @@ QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops) f->opaque = opaque; f->ops = ops; + f->has_ioc = has_ioc; return f; } @@ -851,3 +854,15 @@ void qemu_file_set_blocking(QEMUFile *f, bool block) f->ops->set_blocking(f->opaque, block, NULL); } } + +/* + * Return the ioc object if it's a migration channel. Note: it can return NULL + * for callers passing in a non-migration qemufile. E.g. see qemu_fopen_bdrv() + * and its usage in e.g. load_snapshot(). So we need to check against NULL + * before using it. If without the check, migration_incoming_state_destroy() + * could fail for load_snapshot(). + */ +QIOChannel *qemu_file_get_ioc(QEMUFile *file) +{ + return file->has_ioc ? QIO_CHANNEL(file->opaque) : NULL; +} diff --git a/migration/qemu-file.h b/migration/qemu-file.h index a9b6d6ccb7..3f36d4dc8c 100644 --- a/migration/qemu-file.h +++ b/migration/qemu-file.h @@ -27,6 +27,7 @@ #include #include "exec/cpu-common.h" +#include "io/channel.h" /* Read a chunk of data from a file at the given position. The pos argument * can be ignored if the file is only be used for streaming. The number of @@ -119,7 +120,7 @@ typedef struct QEMUFileHooks { QEMURamSaveFunc *save_page; } QEMUFileHooks; -QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops); +QEMUFile *qemu_fopen_ops(void *opaque, const QEMUFileOps *ops, bool has_ioc); void qemu_file_set_hooks(QEMUFile *f, const QEMUFileHooks *hooks); int qemu_get_fd(QEMUFile *f); int qemu_fclose(QEMUFile *f); @@ -179,5 +180,6 @@ void ram_control_load_hook(QEMUFile *f, uint64_t flags, void *data); size_t ram_control_save_page(QEMUFile *f, ram_addr_t block_offset, ram_addr_t offset, size_t size, uint64_t *bytes_sent); +QIOChannel *qemu_file_get_ioc(QEMUFile *file); #endif diff --git a/migration/ram.c b/migration/ram.c index b5fc454b2f..7a43bfd7af 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -550,7 +550,7 @@ static int compress_threads_save_setup(void) /* comp_param[i].file is just used as a dummy buffer to save data, * set its ops to empty. */ - comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops); + comp_param[i].file = qemu_fopen_ops(NULL, &empty_ops, false); comp_param[i].done = true; comp_param[i].quit = false; qemu_mutex_init(&comp_param[i].mutex); @@ -789,6 +789,53 @@ unsigned long migration_bitmap_find_dirty(RAMState *rs, RAMBlock *rb, return find_next_bit(bitmap, size, start); } +static void migration_clear_memory_region_dirty_bitmap(RAMState *rs, + RAMBlock *rb, + unsigned long page) +{ + uint8_t shift; + hwaddr size, start; + + if (!rb->clear_bmap || !clear_bmap_test_and_clear(rb, page)) { + return; + } + + shift = rb->clear_bmap_shift; + /* + * CLEAR_BITMAP_SHIFT_MIN should always guarantee this... this + * can make things easier sometimes since then start address + * of the small chunk will always be 64 pages aligned so the + * bitmap will always be aligned to unsigned long. We should + * even be able to remove this restriction but I'm simply + * keeping it. + */ + assert(shift >= 6); + + size = 1ULL << (TARGET_PAGE_BITS + shift); + start = (((ram_addr_t)page) << TARGET_PAGE_BITS) & (-size); + trace_migration_bitmap_clear_dirty(rb->idstr, start, size, page); + memory_region_clear_dirty_bitmap(rb->mr, start, size); +} + +static void +migration_clear_memory_region_dirty_bitmap_range(RAMState *rs, + RAMBlock *rb, + unsigned long start, + unsigned long npages) +{ + unsigned long i, chunk_pages = 1UL << rb->clear_bmap_shift; + unsigned long chunk_start = QEMU_ALIGN_DOWN(start, chunk_pages); + unsigned long chunk_end = QEMU_ALIGN_UP(start + npages, chunk_pages); + + /* + * Clear pages from start to start + npages - 1, so the end boundary is + * exclusive. + */ + for (i = chunk_start; i < chunk_end; i += chunk_pages) { + migration_clear_memory_region_dirty_bitmap(rs, rb, i); + } +} + static inline bool migration_bitmap_clear_dirty(RAMState *rs, RAMBlock *rb, unsigned long page) @@ -803,26 +850,9 @@ static inline bool migration_bitmap_clear_dirty(RAMState *rs, * the page in the chunk we clear the remote dirty bitmap for all. * Clearing it earlier won't be a problem, but too late will. */ - if (rb->clear_bmap && clear_bmap_test_and_clear(rb, page)) { - uint8_t shift = rb->clear_bmap_shift; - hwaddr size = 1ULL << (TARGET_PAGE_BITS + shift); - hwaddr start = (((ram_addr_t)page) << TARGET_PAGE_BITS) & (-size); - - /* - * CLEAR_BITMAP_SHIFT_MIN should always guarantee this... this - * can make things easier sometimes since then start address - * of the small chunk will always be 64 pages aligned so the - * bitmap will always be aligned to unsigned long. We should - * even be able to remove this restriction but I'm simply - * keeping it. - */ - assert(shift >= 6); - trace_migration_bitmap_clear_dirty(rb->idstr, start, size, page); - memory_region_clear_dirty_bitmap(rb->mr, start, size); - } + migration_clear_memory_region_dirty_bitmap(rs, rb, page); ret = test_and_clear_bit(page, rb->bmap); - if (ret) { rs->migration_dirty_pages--; } @@ -2741,6 +2771,14 @@ void qemu_guest_free_page_hint(void *addr, size_t len) npages = used_len >> TARGET_PAGE_BITS; qemu_mutex_lock(&ram_state->bitmap_mutex); + /* + * The skipped free pages are equavalent to be sent from clear_bmap's + * perspective, so clear the bits from the memory region bitmap which + * are initially set. Otherwise those skipped pages will be sent in + * the next round after syncing from the memory region bitmap. + */ + migration_clear_memory_region_dirty_bitmap_range(ram_state, block, + start, npages); ram_state->migration_dirty_pages -= bitmap_count_one_with_offset(block->bmap, start, npages); bitmap_clear(block->bmap, start, npages); @@ -4012,6 +4050,7 @@ static void ram_dirty_bitmap_reload_notify(MigrationState *s) int ram_dirty_bitmap_reload(MigrationState *s, RAMBlock *block) { int ret = -EINVAL; + /* from_dst_file is always valid because we're within rp_thread */ QEMUFile *file = s->rp_state.from_dst_file; unsigned long *le_bitmap, nbits = block->used_length >> TARGET_PAGE_BITS; uint64_t local_size = DIV_ROUND_UP(nbits, 8); diff --git a/migration/savevm.c b/migration/savevm.c index 72848b946c..7b7b64bd13 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -65,6 +65,7 @@ #include "qemu/bitmap.h" #include "net/announce.h" #include "qemu/yank.h" +#include "yank_functions.h" const unsigned int postcopy_ram_discard_version; @@ -168,9 +169,9 @@ static const QEMUFileOps bdrv_write_ops = { static QEMUFile *qemu_fopen_bdrv(BlockDriverState *bs, int is_writable) { if (is_writable) { - return qemu_fopen_ops(bs, &bdrv_write_ops); + return qemu_fopen_ops(bs, &bdrv_write_ops, false); } - return qemu_fopen_ops(bs, &bdrv_read_ops); + return qemu_fopen_ops(bs, &bdrv_read_ops, false); } @@ -2568,6 +2569,12 @@ static bool postcopy_pause_incoming(MigrationIncomingState *mis) /* Clear the triggered bit to allow one recovery */ mis->postcopy_recover_triggered = false; + /* + * Unregister yank with either from/to src would work, since ioc behind it + * is the same + */ + migration_ioc_unregister_yank_from_file(mis->from_src_file); + assert(mis->from_src_file); qemu_file_shutdown(mis->from_src_file); qemu_fclose(mis->from_src_file); diff --git a/migration/yank_functions.c b/migration/yank_functions.c index 96c90e17dc..8c08aef14a 100644 --- a/migration/yank_functions.c +++ b/migration/yank_functions.c @@ -11,6 +11,10 @@ #include "qapi/error.h" #include "io/channel.h" #include "yank_functions.h" +#include "qemu/yank.h" +#include "io/channel-socket.h" +#include "io/channel-tls.h" +#include "qemu-file.h" void migration_yank_iochannel(void *opaque) { @@ -18,3 +22,41 @@ void migration_yank_iochannel(void *opaque) qio_channel_shutdown(ioc, QIO_CHANNEL_SHUTDOWN_BOTH, NULL); } + +/* Return whether yank is supported on this ioc */ +static bool migration_ioc_yank_supported(QIOChannel *ioc) +{ + return object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_SOCKET) || + object_dynamic_cast(OBJECT(ioc), TYPE_QIO_CHANNEL_TLS); +} + +void migration_ioc_register_yank(QIOChannel *ioc) +{ + if (migration_ioc_yank_supported(ioc)) { + yank_register_function(MIGRATION_YANK_INSTANCE, + migration_yank_iochannel, + QIO_CHANNEL(ioc)); + } +} + +void migration_ioc_unregister_yank(QIOChannel *ioc) +{ + if (migration_ioc_yank_supported(ioc)) { + yank_unregister_function(MIGRATION_YANK_INSTANCE, + migration_yank_iochannel, + QIO_CHANNEL(ioc)); + } +} + +void migration_ioc_unregister_yank_from_file(QEMUFile *file) +{ + QIOChannel *ioc = qemu_file_get_ioc(file); + + if (ioc) { + /* + * For migration qemufiles, we'll always reach here. Though we'll skip + * calls from e.g. savevm/loadvm as they don't use yank. + */ + migration_ioc_unregister_yank(ioc); + } +} diff --git a/migration/yank_functions.h b/migration/yank_functions.h index 055ea22523..a7577955ed 100644 --- a/migration/yank_functions.h +++ b/migration/yank_functions.h @@ -15,3 +15,6 @@ * @opaque: QIOChannel to shutdown */ void migration_yank_iochannel(void *opaque); +void migration_ioc_register_yank(QIOChannel *ioc); +void migration_ioc_unregister_yank(QIOChannel *ioc); +void migration_ioc_unregister_yank_from_file(QEMUFile *file); diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index 328d6dbe97..1e8b7784ef 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -787,10 +787,10 @@ static void test_baddest(void) args->hide_stderr = true; - if (test_migrate_start(&from, &to, "tcp:0:0", args)) { + if (test_migrate_start(&from, &to, "tcp:127.0.0.1:0", args)) { return; } - migrate_qmp(from, "tcp:0:0", "{}"); + migrate_qmp(from, "tcp:127.0.0.1:0", "{}"); wait_for_migration_fail(from, false); test_migrate_end(from, to, false); }