diff --git a/include/migration/migration.h b/include/migration/migration.h index b382d7774f..6e42b58dc3 100644 --- a/include/migration/migration.h +++ b/include/migration/migration.h @@ -86,6 +86,8 @@ struct MigrationIncomingState { */ QemuEvent main_thread_load_event; + /* For the kernel to send us notifications */ + int userfault_fd; QEMUFile *to_src_file; QemuMutex rp_mutex; /* We send replies from multiple threads */ @@ -204,6 +206,7 @@ int ram_postcopy_send_discard_bitmap(MigrationState *ms); /* For incoming postcopy discard */ int ram_discard_range(MigrationIncomingState *mis, const char *block_name, uint64_t start, size_t length); +int ram_postcopy_incoming_init(MigrationIncomingState *mis); /** * @migrate_add_blocker - prevent migration from proceeding diff --git a/include/migration/postcopy-ram.h b/include/migration/postcopy-ram.h index de79fa778f..f87020c582 100644 --- a/include/migration/postcopy-ram.h +++ b/include/migration/postcopy-ram.h @@ -16,6 +16,18 @@ /* Return true if the host supports everything we need to do postcopy-ram */ bool postcopy_ram_supported_by_host(void); +/* + * Initialise postcopy-ram, setting the RAM to a state where we can go into + * postcopy later; must be called prior to any precopy. + * called from ram.c's similarly named ram_postcopy_incoming_init + */ +int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages); + +/* + * At the end of a migration where postcopy_ram_incoming_init was called. + */ +int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis); + /* * Discard the contents of 'length' bytes from 'start' * We can assume that if we've been called postcopy_ram_hosttest returned true diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 261feda4c6..8478bfd3b4 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -184,6 +184,97 @@ int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start, return 0; } +/* + * Setup an area of RAM so that it *can* be used for postcopy later; this + * must be done right at the start prior to pre-copy. + * opaque should be the MIS. + */ +static int init_range(const char *block_name, void *host_addr, + ram_addr_t offset, ram_addr_t length, void *opaque) +{ + MigrationIncomingState *mis = opaque; + + trace_postcopy_init_range(block_name, host_addr, offset, length); + + /* + * We need the whole of RAM to be truly empty for postcopy, so things + * like ROMs and any data tables built during init must be zero'd + * - we're going to get the copy from the source anyway. + * (Precopy will just overwrite this data, so doesn't need the discard) + */ + if (postcopy_ram_discard_range(mis, host_addr, length)) { + return -1; + } + + return 0; +} + +/* + * At the end of migration, undo the effects of init_range + * opaque should be the MIS. + */ +static int cleanup_range(const char *block_name, void *host_addr, + ram_addr_t offset, ram_addr_t length, void *opaque) +{ + MigrationIncomingState *mis = opaque; + struct uffdio_range range_struct; + trace_postcopy_cleanup_range(block_name, host_addr, offset, length); + + /* + * We turned off hugepage for the precopy stage with postcopy enabled + * we can turn it back on now. + */ +#ifdef MADV_HUGEPAGE + if (madvise(host_addr, length, MADV_HUGEPAGE)) { + error_report("%s HUGEPAGE: %s", __func__, strerror(errno)); + return -1; + } +#endif + + /* + * We can also turn off userfault now since we should have all the + * pages. It can be useful to leave it on to debug postcopy + * if you're not sure it's always getting every page. + */ + range_struct.start = (uintptr_t)host_addr; + range_struct.len = length; + + if (ioctl(mis->userfault_fd, UFFDIO_UNREGISTER, &range_struct)) { + error_report("%s: userfault unregister %s", __func__, strerror(errno)); + + return -1; + } + + return 0; +} + +/* + * Initialise postcopy-ram, setting the RAM to a state where we can go into + * postcopy later; must be called prior to any precopy. + * called from arch_init's similarly named ram_postcopy_incoming_init + */ +int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages) +{ + if (qemu_ram_foreach_block(init_range, mis)) { + return -1; + } + + return 0; +} + +/* + * At the end of a migration where postcopy_ram_incoming_init was called. + */ +int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) +{ + /* TODO: Join the fault thread once we're sure it will exit */ + if (qemu_ram_foreach_block(cleanup_range, mis)) { + return -1; + } + + return 0; +} + #else /* No target OS support, stubs just fail */ bool postcopy_ram_supported_by_host(void) @@ -192,10 +283,23 @@ bool postcopy_ram_supported_by_host(void) return false; } +int postcopy_ram_incoming_init(MigrationIncomingState *mis, size_t ram_pages) +{ + error_report("postcopy_ram_incoming_init: No OS support"); + return -1; +} + +int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) +{ + assert(0); + return -1; +} + int postcopy_ram_discard_range(MigrationIncomingState *mis, uint8_t *start, size_t length) { assert(0); + return -1; } #endif diff --git a/migration/ram.c b/migration/ram.c index b481d555e9..2e27b26d4c 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -1794,6 +1794,17 @@ static void decompress_data_with_multi_threads(uint8_t *compbuf, } } +/* + * Allocate data structures etc needed by incoming migration with postcopy-ram + * postcopy-ram's similarly names postcopy_ram_incoming_init does the work + */ +int ram_postcopy_incoming_init(MigrationIncomingState *mis) +{ + size_t ram_pages = last_ram_offset() >> TARGET_PAGE_BITS; + + return postcopy_ram_incoming_init(mis, ram_pages); +} + static int ram_load(QEMUFile *f, void *opaque, int version_id) { int flags = 0, ret = 0; diff --git a/migration/savevm.c b/migration/savevm.c index 8f07abd1c6..674f0fb953 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -1281,6 +1281,12 @@ static int loadvm_postcopy_handle_advise(MigrationIncomingState *mis) return -1; } + if (ram_postcopy_incoming_init(mis)) { + return -1; + } + + postcopy_state_set(POSTCOPY_INCOMING_ADVISE); + return 0; } diff --git a/trace-events b/trace-events index e74675e5bf..47d3be31e1 100644 --- a/trace-events +++ b/trace-events @@ -1537,6 +1537,8 @@ rdma_start_outgoing_migration_after_rdma_source_init(void) "" postcopy_discard_send_finish(const char *ramblock, int nwords, int ncmds) "%s mask words sent=%d in %d commands" postcopy_discard_send_range(const char *ramblock, unsigned long start, unsigned long length) "%s:%lx/%lx" postcopy_ram_discard_range(void *start, size_t length) "%p,+%zx" +postcopy_cleanup_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx" +postcopy_init_range(const char *ramblock, void *host_addr, size_t offset, size_t length) "%s: %p offset=%zx length=%zx" # kvm-all.c kvm_ioctl(int type, void *arg) "type 0x%x, arg %p"