diff --git a/migration/migration.h b/migration/migration.h index 8130b703eb..42c7395094 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -45,6 +45,24 @@ struct PostcopyBlocktimeContext; */ #define CLEAR_BITMAP_SHIFT_MAX 31 +/* This is an abstraction of a "temp huge page" for postcopy's purpose */ +typedef struct { + /* + * This points to a temporary huge page as a buffer for UFFDIO_COPY. It's + * mmap()ed and needs to be freed when cleanup. + */ + void *tmp_huge_page; + /* + * This points to the host page we're going to install for this temp page. + * It tells us after we've received the whole page, where we should put it. + */ + void *host_addr; + /* Number of small pages copied (in size of TARGET_PAGE_SIZE) */ + unsigned int target_pages; + /* Whether this page contains all zeros */ + bool all_zero; +} PostcopyTmpPage; + /* State for the incoming migration */ struct MigrationIncomingState { QEMUFile *from_src_file; @@ -81,7 +99,22 @@ struct MigrationIncomingState { QemuMutex rp_mutex; /* We send replies from multiple threads */ /* RAMBlock of last request sent to source */ RAMBlock *last_rb; - void *postcopy_tmp_page; + /* + * Number of postcopy channels including the default precopy channel, so + * vanilla postcopy will only contain one channel which contain both + * precopy and postcopy streams. + * + * This is calculated when the src requests to enable postcopy but before + * it starts. Its value can depend on e.g. whether postcopy preemption is + * enabled. + */ + unsigned int postcopy_channels; + /* + * An array of temp host huge pages to be used, one for each postcopy + * channel. + */ + PostcopyTmpPage *postcopy_tmp_pages; + /* This is shared for all postcopy channels */ void *postcopy_tmp_zero_page; /* PostCopyFD's for external userfaultfds & handlers of shared memory */ GArray *postcopy_remote_fds; @@ -391,5 +424,6 @@ bool migration_rate_limit(void); void migration_cancel(const Error *error); void populate_vfio_info(MigrationInfo *info); +void postcopy_temp_page_reset(PostcopyTmpPage *tmp_page); #endif diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 2a2cc5faf8..30c3508f44 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -526,9 +526,18 @@ int postcopy_ram_incoming_init(MigrationIncomingState *mis) static void postcopy_temp_pages_cleanup(MigrationIncomingState *mis) { - if (mis->postcopy_tmp_page) { - munmap(mis->postcopy_tmp_page, mis->largest_page_size); - mis->postcopy_tmp_page = NULL; + int i; + + if (mis->postcopy_tmp_pages) { + for (i = 0; i < mis->postcopy_channels; i++) { + if (mis->postcopy_tmp_pages[i].tmp_huge_page) { + munmap(mis->postcopy_tmp_pages[i].tmp_huge_page, + mis->largest_page_size); + mis->postcopy_tmp_pages[i].tmp_huge_page = NULL; + } + } + g_free(mis->postcopy_tmp_pages); + mis->postcopy_tmp_pages = NULL; } if (mis->postcopy_tmp_zero_page) { @@ -1092,17 +1101,30 @@ retry: static int postcopy_temp_pages_setup(MigrationIncomingState *mis) { - int err; + PostcopyTmpPage *tmp_page; + int err, i, channels; + void *temp_page; - mis->postcopy_tmp_page = mmap(NULL, mis->largest_page_size, - PROT_READ | PROT_WRITE, - MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); - if (mis->postcopy_tmp_page == MAP_FAILED) { - err = errno; - mis->postcopy_tmp_page = NULL; - error_report("%s: Failed to map postcopy_tmp_page %s", - __func__, strerror(err)); - return -err; + /* TODO: will be boosted when enable postcopy preemption */ + mis->postcopy_channels = 1; + + channels = mis->postcopy_channels; + mis->postcopy_tmp_pages = g_malloc0_n(sizeof(PostcopyTmpPage), channels); + + for (i = 0; i < channels; i++) { + tmp_page = &mis->postcopy_tmp_pages[i]; + temp_page = mmap(NULL, mis->largest_page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, -1, 0); + if (temp_page == MAP_FAILED) { + err = errno; + error_report("%s: Failed to map postcopy_tmp_pages[%d]: %s", + __func__, i, strerror(err)); + /* Clean up will be done later */ + return -err; + } + tmp_page->tmp_huge_page = temp_page; + /* Initialize default states for each tmp page */ + postcopy_temp_page_reset(tmp_page); } /* @@ -1352,6 +1374,16 @@ int postcopy_wake_shared(struct PostCopyFD *pcfd, #endif /* ------------------------------------------------------------------------- */ +void postcopy_temp_page_reset(PostcopyTmpPage *tmp_page) +{ + tmp_page->target_pages = 0; + tmp_page->host_addr = NULL; + /* + * This is set to true when reset, and cleared as long as we received any + * of the non-zero small page within this huge page. + */ + tmp_page->all_zero = true; +} void postcopy_fault_thread_notify(MigrationIncomingState *mis) { diff --git a/migration/ram.c b/migration/ram.c index 781f0745dc..fe3de84856 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -3641,11 +3641,8 @@ static int ram_load_postcopy(QEMUFile *f) bool place_needed = false; bool matches_target_page_size = false; MigrationIncomingState *mis = migration_incoming_get_current(); - /* Temporary page that is later 'placed' */ - void *postcopy_host_page = mis->postcopy_tmp_page; - void *host_page = NULL; - bool all_zero = true; - int target_pages = 0; + /* Currently we only use channel 0. TODO: use all the channels */ + PostcopyTmpPage *tmp_page = &mis->postcopy_tmp_pages[0]; while (!ret && !(flags & RAM_SAVE_FLAG_EOS)) { ram_addr_t addr; @@ -3689,7 +3686,7 @@ static int ram_load_postcopy(QEMUFile *f) ret = -EINVAL; break; } - target_pages++; + tmp_page->target_pages++; matches_target_page_size = block->page_size == TARGET_PAGE_SIZE; /* * Postcopy requires that we place whole host pages atomically; @@ -3701,15 +3698,16 @@ static int ram_load_postcopy(QEMUFile *f) * however the source ensures it always sends all the components * of a host page in one chunk. */ - page_buffer = postcopy_host_page + + page_buffer = tmp_page->tmp_huge_page + host_page_offset_from_ram_block_offset(block, addr); /* If all TP are zero then we can optimise the place */ - if (target_pages == 1) { - host_page = host_page_from_ram_block_offset(block, addr); - } else if (host_page != host_page_from_ram_block_offset(block, - addr)) { + if (tmp_page->target_pages == 1) { + tmp_page->host_addr = + host_page_from_ram_block_offset(block, addr); + } else if (tmp_page->host_addr != + host_page_from_ram_block_offset(block, addr)) { /* not the 1st TP within the HP */ - error_report("Non-same host page %p/%p", host_page, + error_report("Non-same host page %p/%p", tmp_page->host_addr, host_page_from_ram_block_offset(block, addr)); ret = -EINVAL; break; @@ -3719,10 +3717,11 @@ static int ram_load_postcopy(QEMUFile *f) * If it's the last part of a host page then we place the host * page */ - if (target_pages == (block->page_size / TARGET_PAGE_SIZE)) { + if (tmp_page->target_pages == + (block->page_size / TARGET_PAGE_SIZE)) { place_needed = true; } - place_source = postcopy_host_page; + place_source = tmp_page->tmp_huge_page; } switch (flags & ~RAM_SAVE_FLAG_CONTINUE) { @@ -3736,12 +3735,12 @@ static int ram_load_postcopy(QEMUFile *f) memset(page_buffer, ch, TARGET_PAGE_SIZE); } if (ch) { - all_zero = false; + tmp_page->all_zero = false; } break; case RAM_SAVE_FLAG_PAGE: - all_zero = false; + tmp_page->all_zero = false; if (!matches_target_page_size) { /* For huge pages, we always use temporary buffer */ qemu_get_buffer(f, page_buffer, TARGET_PAGE_SIZE); @@ -3759,7 +3758,7 @@ static int ram_load_postcopy(QEMUFile *f) } break; case RAM_SAVE_FLAG_COMPRESS_PAGE: - all_zero = false; + tmp_page->all_zero = false; len = qemu_get_be32(f); if (len < 0 || len > compressBound(TARGET_PAGE_SIZE)) { error_report("Invalid compressed data length: %d", len); @@ -3791,16 +3790,14 @@ static int ram_load_postcopy(QEMUFile *f) } if (!ret && place_needed) { - if (all_zero) { - ret = postcopy_place_page_zero(mis, host_page, block); + if (tmp_page->all_zero) { + ret = postcopy_place_page_zero(mis, tmp_page->host_addr, block); } else { - ret = postcopy_place_page(mis, host_page, place_source, - block); + ret = postcopy_place_page(mis, tmp_page->host_addr, + place_source, block); } place_needed = false; - target_pages = 0; - /* Assume we have a zero page until we detect something different */ - all_zero = true; + postcopy_temp_page_reset(tmp_page); } } diff --git a/migration/savevm.c b/migration/savevm.c index 41e3238798..0ccd7e5e3f 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -2579,6 +2579,18 @@ void qemu_loadvm_state_cleanup(void) /* Return true if we should continue the migration, or false. */ static bool postcopy_pause_incoming(MigrationIncomingState *mis) { + int i; + + /* + * If network is interrupted, any temp page we received will be useless + * because we didn't mark them as "received" in receivedmap. After a + * proper recovery later (which will sync src dirty bitmap with receivedmap + * on dest) these cached small pages will be resent again. + */ + for (i = 0; i < mis->postcopy_channels; i++) { + postcopy_temp_page_reset(&mis->postcopy_tmp_pages[i]); + } + trace_postcopy_pause_incoming(); assert(migrate_postcopy_ram());