diff --git a/hw/core/machine.c b/hw/core/machine.c index 45e3d24fdc..cd13b8b0a3 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -42,6 +42,7 @@ GlobalProperty hw_compat_7_2[] = { { "e1000e", "migrate-timadj", "off" }, { "virtio-mem", "x-early-migration", "false" }, + { "migration", "x-preempt-pre-7-2", "true" }, }; const size_t hw_compat_7_2_len = G_N_ELEMENTS(hw_compat_7_2); diff --git a/io/channel-tls.c b/io/channel-tls.c index 5a7a3d48d6..9805dd0a3f 100644 --- a/io/channel-tls.c +++ b/io/channel-tls.c @@ -74,6 +74,9 @@ qio_channel_tls_new_server(QIOChannel *master, ioc = QIO_CHANNEL_TLS(object_new(TYPE_QIO_CHANNEL_TLS)); ioc->master = master; + if (qio_channel_has_feature(master, QIO_CHANNEL_FEATURE_SHUTDOWN)) { + qio_channel_set_feature(QIO_CHANNEL(ioc), QIO_CHANNEL_FEATURE_SHUTDOWN); + } object_ref(OBJECT(master)); ioc->session = qcrypto_tls_session_new( diff --git a/migration/migration.c b/migration/migration.c index ae2025d9d8..bda4789193 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -3464,8 +3464,12 @@ static void migration_completion(MigrationState *s) qemu_savevm_state_complete_postcopy(s->to_dst_file); qemu_mutex_unlock_iothread(); - /* Shutdown the postcopy fast path thread */ - if (migrate_postcopy_preempt()) { + /* + * Shutdown the postcopy fast path thread. This is only needed + * when dest QEMU binary is old (7.1/7.2). QEMU 8.0+ doesn't need + * this. + */ + if (migrate_postcopy_preempt() && s->preempt_pre_7_2) { postcopy_preempt_shutdown_file(s); } @@ -4384,6 +4388,15 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) } } + /* + * This needs to be done before resuming a postcopy. Note: for newer + * QEMUs we will delay the channel creation until postcopy_start(), to + * avoid disorder of channel creations. + */ + if (migrate_postcopy_preempt() && s->preempt_pre_7_2) { + postcopy_preempt_setup(s); + } + if (resume) { /* Wakeup the main migration thread to do the recovery */ migrate_set_state(&s->state, MIGRATION_STATUS_POSTCOPY_PAUSED, @@ -4443,6 +4456,8 @@ static Property migration_properties[] = { decompress_error_check, true), DEFINE_PROP_UINT8("x-clear-bitmap-shift", MigrationState, clear_bitmap_shift, CLEAR_BITMAP_SHIFT_DEFAULT), + DEFINE_PROP_BOOL("x-preempt-pre-7-2", MigrationState, + preempt_pre_7_2, false), /* Migration parameters */ DEFINE_PROP_UINT8("x-compress-level", MigrationState, diff --git a/migration/migration.h b/migration/migration.h index 2da2f8a164..310ae8901b 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -65,6 +65,12 @@ typedef struct { bool all_zero; } PostcopyTmpPage; +typedef enum { + PREEMPT_THREAD_NONE = 0, + PREEMPT_THREAD_CREATED, + PREEMPT_THREAD_QUIT, +} PreemptThreadStatus; + /* State for the incoming migration */ struct MigrationIncomingState { QEMUFile *from_src_file; @@ -124,7 +130,12 @@ struct MigrationIncomingState { QemuSemaphore postcopy_qemufile_dst_done; /* Postcopy priority thread is used to receive postcopy requested pages */ QemuThread postcopy_prio_thread; - bool postcopy_prio_thread_created; + /* + * Always set by the main vm load thread only, but can be read by the + * postcopy preempt thread. "volatile" makes sure all reads will be + * uptodate across cores. + */ + volatile PreemptThreadStatus preempt_thread_status; /* * Used to sync between the ram load main thread and the fast ram load * thread. It protects postcopy_qemufile_dst, which is the postcopy @@ -364,6 +375,34 @@ struct MigrationState { * do not trigger spurious decompression errors. */ bool decompress_error_check; + /* + * This variable only affects behavior when postcopy preempt mode is + * enabled. + * + * When set: + * + * - postcopy preempt src QEMU instance will generate an EOS message at + * the end of migration to shut the preempt channel on dest side. + * + * - postcopy preempt channel will be created at the setup phase on src + QEMU. + * + * When clear: + * + * - postcopy preempt src QEMU instance will _not_ generate an EOS + * message at the end of migration, the dest qemu will shutdown the + * channel itself. + * + * - postcopy preempt channel will be created at the switching phase + * from precopy -> postcopy (to avoid race condtion of misordered + * creation of channels). + * + * NOTE: See message-id on qemu-devel + * mailing list for more information on the possible race. Everyone + * should probably just keep this value untouched after set by the + * machine type (or the default). + */ + bool preempt_pre_7_2; /* * This decides the size of guest memory chunk that will be used diff --git a/migration/postcopy-ram.c b/migration/postcopy-ram.c index 41c0713650..93f39f8e06 100644 --- a/migration/postcopy-ram.c +++ b/migration/postcopy-ram.c @@ -568,9 +568,14 @@ int postcopy_ram_incoming_cleanup(MigrationIncomingState *mis) { trace_postcopy_ram_incoming_cleanup_entry(); - if (mis->postcopy_prio_thread_created) { + if (mis->preempt_thread_status == PREEMPT_THREAD_CREATED) { + /* Notify the fast load thread to quit */ + mis->preempt_thread_status = PREEMPT_THREAD_QUIT; + if (mis->postcopy_qemufile_dst) { + qemu_file_shutdown(mis->postcopy_qemufile_dst); + } qemu_thread_join(&mis->postcopy_prio_thread); - mis->postcopy_prio_thread_created = false; + mis->preempt_thread_status = PREEMPT_THREAD_NONE; } if (mis->have_fault_thread) { @@ -1203,7 +1208,7 @@ int postcopy_ram_incoming_setup(MigrationIncomingState *mis) */ postcopy_thread_create(mis, &mis->postcopy_prio_thread, "fault-fast", postcopy_preempt_thread, QEMU_THREAD_JOINABLE); - mis->postcopy_prio_thread_created = true; + mis->preempt_thread_status = PREEMPT_THREAD_CREATED; } trace_postcopy_ram_enable_notify(); @@ -1625,8 +1630,14 @@ int postcopy_preempt_establish_channel(MigrationState *s) return 0; } - /* Kick off async task to establish preempt channel */ - postcopy_preempt_setup(s); + /* + * Kick off async task to establish preempt channel. Only do so with + * 8.0+ machines, because 7.1/7.2 require the channel to be created in + * setup phase of migration (even if racy in an unreliable network). + */ + if (!s->preempt_pre_7_2) { + postcopy_preempt_setup(s); + } /* * We need the postcopy preempt channel to be established before @@ -1652,6 +1663,11 @@ static void postcopy_pause_ram_fast_load(MigrationIncomingState *mis) trace_postcopy_pause_fast_load_continued(); } +static bool preempt_thread_should_run(MigrationIncomingState *mis) +{ + return mis->preempt_thread_status != PREEMPT_THREAD_QUIT; +} + void *postcopy_preempt_thread(void *opaque) { MigrationIncomingState *mis = opaque; @@ -1671,11 +1687,11 @@ void *postcopy_preempt_thread(void *opaque) /* Sending RAM_SAVE_FLAG_EOS to terminate this thread */ qemu_mutex_lock(&mis->postcopy_prio_thread_mutex); - while (1) { + while (preempt_thread_should_run(mis)) { ret = ram_load_postcopy(mis->postcopy_qemufile_dst, RAM_CHANNEL_POSTCOPY); /* If error happened, go into recovery routine */ - if (ret) { + if (ret && preempt_thread_should_run(mis)) { postcopy_pause_ram_fast_load(mis); } else { /* We're done */ diff --git a/migration/ram.c b/migration/ram.c index 96e8a19a58..79d881f735 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -688,12 +688,11 @@ exit: * @offset: offset inside the block for the page * in the lower bits, it contains flags */ -static size_t save_page_header(PageSearchStatus *pss, RAMBlock *block, - ram_addr_t offset) +static size_t save_page_header(PageSearchStatus *pss, QEMUFile *f, + RAMBlock *block, ram_addr_t offset) { size_t size, len; bool same_block = (block == pss->last_sent_block); - QEMUFile *f = pss->pss_channel; if (same_block) { offset |= RAM_SAVE_FLAG_CONTINUE; @@ -867,7 +866,7 @@ static int save_xbzrle_page(RAMState *rs, PageSearchStatus *pss, } /* Send XBZRLE based compressed page */ - bytes_xbzrle = save_page_header(pss, block, + bytes_xbzrle = save_page_header(pss, pss->pss_channel, block, offset | RAM_SAVE_FLAG_XBZRLE); qemu_put_byte(file, ENCODING_FLAG_XBZRLE); qemu_put_be16(file, encoded_len); @@ -1302,15 +1301,14 @@ void ram_release_page(const char *rbname, uint64_t offset) * @block: block that contains the page we want to send * @offset: offset inside the block for the page */ -static int save_zero_page_to_file(PageSearchStatus *pss, +static int save_zero_page_to_file(PageSearchStatus *pss, QEMUFile *file, RAMBlock *block, ram_addr_t offset) { uint8_t *p = block->host + offset; - QEMUFile *file = pss->pss_channel; int len = 0; if (buffer_is_zero(p, TARGET_PAGE_SIZE)) { - len += save_page_header(pss, block, offset | RAM_SAVE_FLAG_ZERO); + len += save_page_header(pss, file, block, offset | RAM_SAVE_FLAG_ZERO); qemu_put_byte(file, 0); len += 1; ram_release_page(block->idstr, offset); @@ -1327,10 +1325,10 @@ static int save_zero_page_to_file(PageSearchStatus *pss, * @block: block that contains the page we want to send * @offset: offset inside the block for the page */ -static int save_zero_page(PageSearchStatus *pss, RAMBlock *block, +static int save_zero_page(PageSearchStatus *pss, QEMUFile *f, RAMBlock *block, ram_addr_t offset) { - int len = save_zero_page_to_file(pss, block, offset); + int len = save_zero_page_to_file(pss, f, block, offset); if (len) { stat64_add(&ram_atomic_counters.duplicate, 1); @@ -1394,7 +1392,7 @@ static int save_normal_page(PageSearchStatus *pss, RAMBlock *block, { QEMUFile *file = pss->pss_channel; - ram_transferred_add(save_page_header(pss, block, + ram_transferred_add(save_page_header(pss, pss->pss_channel, block, offset | RAM_SAVE_FLAG_PAGE)); if (async) { qemu_put_buffer_async(file, buf, TARGET_PAGE_SIZE, @@ -1473,11 +1471,11 @@ static bool do_compress_ram_page(QEMUFile *f, z_stream *stream, RAMBlock *block, uint8_t *p = block->host + offset; int ret; - if (save_zero_page_to_file(pss, block, offset)) { + if (save_zero_page_to_file(pss, f, block, offset)) { return true; } - save_page_header(pss, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE); + save_page_header(pss, f, block, offset | RAM_SAVE_FLAG_COMPRESS_PAGE); /* * copy it to a internal buffer to avoid it being modified by VM @@ -2355,7 +2353,7 @@ static int ram_save_target_page_legacy(RAMState *rs, PageSearchStatus *pss) return 1; } - res = save_zero_page(pss, block, offset); + res = save_zero_page(pss, pss->pss_channel, block, offset); if (res > 0) { /* Must let xbzrle know, otherwise a previous (now 0'd) cached * page would be stale @@ -3508,12 +3506,13 @@ static void ram_state_pending_estimate(void *opaque, uint64_t *must_precopy, static void ram_state_pending_exact(void *opaque, uint64_t *must_precopy, uint64_t *can_postcopy) { + MigrationState *s = migrate_get_current(); RAMState **temp = opaque; RAMState *rs = *temp; uint64_t remaining_size = rs->migration_dirty_pages * TARGET_PAGE_SIZE; - if (!migration_in_postcopy()) { + if (!migration_in_postcopy() && remaining_size < s->threshold_size) { qemu_mutex_lock_iothread(); WITH_RCU_READ_LOCK_GUARD() { migration_bitmap_sync_precopy(rs);