Migration pull

- William's fix on hwpoison migration which used to crash QEMU
 - Peter's multifd cleanup + bugfix + optimizations
 - Avihai's fix on multifd crash over non-socket channels
 - Fabiano's multifd thread-race fix
 - Peter's CI fix series
 -----BEGIN PGP SIGNATURE-----
 
 iIgEABYKADAWIQS5GE3CDMRX2s990ak7X8zN86vXBgUCZcREtRIccGV0ZXJ4QHJl
 ZGhhdC5jb20ACgkQO1/MzfOr1wacrwEAl2aeQkh51h/e+OKX7MG4/4Y6Edf6Oz7o
 IJLk/cyrUFQA/2exo2lOdv5zHNOJKwAYj8HYDraezrC/MK1eED4Wji0M
 =k53l
 -----END PGP SIGNATURE-----

Merge tag 'migration-staging-pull-request' of https://gitlab.com/peterx/qemu into staging

Migration pull

- William's fix on hwpoison migration which used to crash QEMU
- Peter's multifd cleanup + bugfix + optimizations
- Avihai's fix on multifd crash over non-socket channels
- Fabiano's multifd thread-race fix
- Peter's CI fix series

# -----BEGIN PGP SIGNATURE-----
#
# iIgEABYKADAWIQS5GE3CDMRX2s990ak7X8zN86vXBgUCZcREtRIccGV0ZXJ4QHJl
# ZGhhdC5jb20ACgkQO1/MzfOr1wacrwEAl2aeQkh51h/e+OKX7MG4/4Y6Edf6Oz7o
# IJLk/cyrUFQA/2exo2lOdv5zHNOJKwAYj8HYDraezrC/MK1eED4Wji0M
# =k53l
# -----END PGP SIGNATURE-----
# gpg: Signature made Thu 08 Feb 2024 03:04:21 GMT
# gpg:                using EDDSA key B9184DC20CC457DACF7DD1A93B5FCCCDF3ABD706
# gpg:                issuer "peterx@redhat.com"
# gpg: Good signature from "Peter Xu <xzpeter@gmail.com>" [marginal]
# gpg:                 aka "Peter Xu <peterx@redhat.com>" [marginal]
# gpg: WARNING: This key is not certified with sufficiently trusted signatures!
# gpg:          It is not certain that the signature belongs to the owner.
# Primary key fingerprint: B918 4DC2 0CC4 57DA CF7D  D1A9 3B5F CCCD F3AB D706

* tag 'migration-staging-pull-request' of https://gitlab.com/peterx/qemu: (34 commits)
  ci: Update comment for migration-compat-aarch64
  ci: Remove tag dependency for build-previous-qemu
  tests/migration-test: Stick with gicv3 in aarch64 test
  migration/multifd: Add a synchronization point for channel creation
  migration/multifd: Unify multifd and TLS connection paths
  migration/multifd: Move multifd_send_setup into migration thread
  migration/multifd: Move multifd_send_setup error handling in to the function
  migration/multifd: Remove p->running
  migration/multifd: Join the TLS thread
  migration: Fix logic of channels and transport compatibility check
  migration/multifd: Optimize sender side to be lockless
  migration/multifd: Fix MultiFDSendParams.packet_num race
  migration/multifd: Stick with send/recv on function names
  migration/multifd: Cleanup multifd_load_cleanup()
  migration/multifd: Cleanup multifd_save_cleanup()
  migration/multifd: Rewrite multifd_queue_page()
  migration/multifd: Change retval of multifd_send_pages()
  migration/multifd: Change retval of multifd_queue_page()
  migration/multifd: Split multifd_send_terminate_threads()
  migration/multifd: Forbid spurious wakeups
  ...

Signed-off-by: Peter Maydell <peter.maydell@linaro.org>
This commit is contained in:
Peter Maydell 2024-02-09 11:22:20 +00:00
commit 5d1fc61441
12 changed files with 553 additions and 402 deletions

View File

@ -189,6 +189,8 @@ build-previous-qemu:
TARGETS: x86_64-softmmu aarch64-softmmu
before_script:
- export QEMU_PREV_VERSION="$(sed 's/\([0-9.]*\)\.[0-9]*/v\1.0/' VERSION)"
- git remote add upstream https://gitlab.com/qemu-project/qemu
- git fetch upstream $QEMU_PREV_VERSION
- git checkout $QEMU_PREV_VERSION
after_script:
- mv build build-previous
@ -217,9 +219,10 @@ build-previous-qemu:
- QTEST_QEMU_BINARY_DST=./qemu-system-${TARGET}
QTEST_QEMU_BINARY=../build/qemu-system-${TARGET} ./tests/qtest/migration-test
# This job is disabled until we release 9.0. The existing
# migration-test in 8.2 is broken on aarch64. The fix was already
# commited, but it will only take effect once 9.0 is out.
# This job needs to be disabled until we can have an aarch64 CPU model that
# will both (1) support both KVM and TCG, and (2) provide a stable ABI.
# Currently only "-cpu max" can provide (1), however it doesn't guarantee
# (2). Mark this test skipped until later.
migration-compat-aarch64:
extends: .migration-compat-common
variables:

View File

@ -1119,6 +1119,11 @@ int kvm_vm_check_extension(KVMState *s, unsigned int extension)
return ret;
}
/*
* We track the poisoned pages to be able to:
* - replace them on VM reset
* - block a migration for a VM with a poisoned page
*/
typedef struct HWPoisonPage {
ram_addr_t ram_addr;
QLIST_ENTRY(HWPoisonPage) list;
@ -1152,6 +1157,11 @@ void kvm_hwpoison_page_add(ram_addr_t ram_addr)
QLIST_INSERT_HEAD(&hwpoison_page_list, page, list);
}
bool kvm_hwpoisoned_mem(void)
{
return !QLIST_EMPTY(&hwpoison_page_list);
}
static uint32_t adjust_ioeventfd_endianness(uint32_t val, uint32_t size)
{
#if HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN

View File

@ -124,3 +124,8 @@ uint32_t kvm_dirty_ring_size(void)
{
return 0;
}
bool kvm_hwpoisoned_mem(void)
{
return false;
}

View File

@ -538,4 +538,10 @@ bool kvm_arch_cpu_check_are_resettable(void);
bool kvm_dirty_ring_enabled(void);
uint32_t kvm_dirty_ring_size(void);
/**
* kvm_hwpoisoned_mem - indicate if there is any hwpoisoned page
* reported for the VM.
*/
bool kvm_hwpoisoned_mem(void);
#endif

View File

@ -67,6 +67,7 @@
#include "options.h"
#include "sysemu/dirtylimit.h"
#include "qemu/sockets.h"
#include "sysemu/kvm.h"
static NotifierList migration_state_notifiers =
NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
@ -128,11 +129,17 @@ static bool migration_needs_multiple_sockets(void)
return migrate_multifd() || migrate_postcopy_preempt();
}
static bool transport_supports_multi_channels(SocketAddress *saddr)
static bool transport_supports_multi_channels(MigrationAddress *addr)
{
return saddr->type == SOCKET_ADDRESS_TYPE_INET ||
saddr->type == SOCKET_ADDRESS_TYPE_UNIX ||
saddr->type == SOCKET_ADDRESS_TYPE_VSOCK;
if (addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) {
SocketAddress *saddr = &addr->u.socket;
return saddr->type == SOCKET_ADDRESS_TYPE_INET ||
saddr->type == SOCKET_ADDRESS_TYPE_UNIX ||
saddr->type == SOCKET_ADDRESS_TYPE_VSOCK;
}
return false;
}
static bool
@ -140,8 +147,7 @@ migration_channels_and_transport_compatible(MigrationAddress *addr,
Error **errp)
{
if (migration_needs_multiple_sockets() &&
(addr->transport == MIGRATION_ADDRESS_TYPE_SOCKET) &&
!transport_supports_multi_channels(&addr->u.socket)) {
!transport_supports_multi_channels(addr)) {
error_setg(errp, "Migration requires multi-channel URIs (e.g. tcp)");
return false;
}
@ -311,7 +317,7 @@ void migration_incoming_state_destroy(void)
{
struct MigrationIncomingState *mis = migration_incoming_get_current();
multifd_load_cleanup();
multifd_recv_cleanup();
compress_threads_load_cleanup();
if (mis->to_src_file) {
@ -662,7 +668,7 @@ static void process_incoming_migration_bh(void *opaque)
trace_vmstate_downtime_checkpoint("dst-precopy-bh-announced");
multifd_load_shutdown();
multifd_recv_shutdown();
dirty_bitmap_mig_before_vm_start();
@ -759,7 +765,7 @@ fail:
MIGRATION_STATUS_FAILED);
qemu_fclose(mis->from_src_file);
multifd_load_cleanup();
multifd_recv_cleanup();
compress_threads_load_cleanup();
exit(EXIT_FAILURE);
@ -885,7 +891,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp)
default_channel = !mis->from_src_file;
}
if (multifd_load_setup(errp) != 0) {
if (multifd_recv_setup(errp) != 0) {
return;
}
@ -1331,7 +1337,7 @@ static void migrate_fd_cleanup(MigrationState *s)
}
bql_lock();
multifd_save_cleanup();
multifd_send_shutdown();
qemu_mutex_lock(&s->qemu_file_lock);
tmp = s->to_dst_file;
s->to_dst_file = NULL;
@ -1906,6 +1912,12 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
return false;
}
if (kvm_hwpoisoned_mem()) {
error_setg(errp, "Can't migrate this vm with hardware poisoned memory, "
"please reboot the vm and try again");
return false;
}
if (migration_is_blocked(errp)) {
return false;
}
@ -3315,6 +3327,10 @@ static void *migration_thread(void *opaque)
object_ref(OBJECT(s));
update_iteration_initial_status(s);
if (!multifd_send_setup()) {
goto out;
}
bql_lock();
qemu_savevm_state_header(s->to_dst_file);
bql_unlock();
@ -3386,6 +3402,7 @@ static void *migration_thread(void *opaque)
urgent = migration_rate_limit();
}
out:
trace_migration_thread_after_loop();
migration_iteration_finish(s);
object_unref(OBJECT(s));
@ -3623,15 +3640,6 @@ void migrate_fd_connect(MigrationState *s, Error *error_in)
return;
}
if (multifd_save_setup(&local_err) != 0) {
migrate_set_error(s, local_err);
error_report_err(local_err);
migrate_set_state(&s->state, MIGRATION_STATUS_SETUP,
MIGRATION_STATUS_FAILED);
migrate_fd_cleanup(s);
return;
}
if (migrate_background_snapshot()) {
qemu_thread_create(&s->thread, "bg_snapshot",
bg_migration_thread, s, QEMU_THREAD_JOINABLE);

View File

@ -116,17 +116,20 @@ static void zlib_send_cleanup(MultiFDSendParams *p, Error **errp)
*/
static int zlib_send_prepare(MultiFDSendParams *p, Error **errp)
{
MultiFDPages_t *pages = p->pages;
struct zlib_data *z = p->data;
z_stream *zs = &z->zs;
uint32_t out_size = 0;
int ret;
uint32_t i;
for (i = 0; i < p->normal_num; i++) {
multifd_send_prepare_header(p);
for (i = 0; i < pages->num; i++) {
uint32_t available = z->zbuff_len - out_size;
int flush = Z_NO_FLUSH;
if (i == p->normal_num - 1) {
if (i == pages->num - 1) {
flush = Z_SYNC_FLUSH;
}
@ -135,7 +138,7 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp)
* with compression. zlib does not guarantee that this is safe,
* therefore copy the page before calling deflate().
*/
memcpy(z->buf, p->pages->block->host + p->normal[i], p->page_size);
memcpy(z->buf, p->pages->block->host + pages->offset[i], p->page_size);
zs->avail_in = p->page_size;
zs->next_in = z->buf;
@ -171,6 +174,8 @@ static int zlib_send_prepare(MultiFDSendParams *p, Error **errp)
p->next_packet_size = out_size;
p->flags |= MULTIFD_FLAG_ZLIB;
multifd_send_fill_packet(p);
return 0;
}

View File

@ -113,21 +113,24 @@ static void zstd_send_cleanup(MultiFDSendParams *p, Error **errp)
*/
static int zstd_send_prepare(MultiFDSendParams *p, Error **errp)
{
MultiFDPages_t *pages = p->pages;
struct zstd_data *z = p->data;
int ret;
uint32_t i;
multifd_send_prepare_header(p);
z->out.dst = z->zbuff;
z->out.size = z->zbuff_len;
z->out.pos = 0;
for (i = 0; i < p->normal_num; i++) {
for (i = 0; i < pages->num; i++) {
ZSTD_EndDirective flush = ZSTD_e_continue;
if (i == p->normal_num - 1) {
if (i == pages->num - 1) {
flush = ZSTD_e_flush;
}
z->in.src = p->pages->block->host + p->normal[i];
z->in.src = p->pages->block->host + pages->offset[i];
z->in.size = p->page_size;
z->in.pos = 0;
@ -160,6 +163,8 @@ static int zstd_send_prepare(MultiFDSendParams *p, Error **errp)
p->next_packet_size = z->out.pos;
p->flags |= MULTIFD_FLAG_ZSTD;
multifd_send_fill_packet(p);
return 0;
}

File diff suppressed because it is too large Load Diff

View File

@ -13,16 +13,16 @@
#ifndef QEMU_MIGRATION_MULTIFD_H
#define QEMU_MIGRATION_MULTIFD_H
int multifd_save_setup(Error **errp);
void multifd_save_cleanup(void);
int multifd_load_setup(Error **errp);
void multifd_load_cleanup(void);
void multifd_load_shutdown(void);
bool multifd_send_setup(void);
void multifd_send_shutdown(void);
int multifd_recv_setup(Error **errp);
void multifd_recv_cleanup(void);
void multifd_recv_shutdown(void);
bool multifd_recv_all_channels_created(void);
void multifd_recv_new_channel(QIOChannel *ioc, Error **errp);
void multifd_recv_sync_main(void);
int multifd_send_sync_main(void);
int multifd_queue_page(RAMBlock *block, ram_addr_t offset);
bool multifd_queue_page(RAMBlock *block, ram_addr_t offset);
/* Multifd Compression flags */
#define MULTIFD_FLAG_SYNC (1 << 0)
@ -73,6 +73,9 @@ typedef struct {
char *name;
/* channel thread id */
QemuThread thread;
bool thread_created;
QemuThread tls_thread;
bool tls_thread_created;
/* communication channel */
QIOChannel *c;
/* is the yank function registered */
@ -91,18 +94,19 @@ typedef struct {
/* syncs main thread and channels */
QemuSemaphore sem_sync;
/* this mutex protects the following parameters */
QemuMutex mutex;
/* is this channel thread running */
bool running;
/* should this thread finish */
bool quit;
/* multifd flags for each packet */
uint32_t flags;
/* global number of generated multifd packets */
uint64_t packet_num;
/* thread has work to do */
int pending_job;
/*
* The sender thread has work to do if either of below boolean is set.
*
* @pending_job: a job is pending
* @pending_sync: a sync request is pending
*
* For both of these fields, they're only set by the requesters, and
* cleared by the multifd sender threads.
*/
bool pending_job;
bool pending_sync;
/* array of pages to sent.
* The owner of 'pages' depends of 'pending_job' value:
* pending_job == 0 -> migration_thread can use it.
@ -117,17 +121,13 @@ typedef struct {
/* size of the next packet that contains pages */
uint32_t next_packet_size;
/* packets sent through this channel */
uint64_t num_packets;
uint64_t packets_sent;
/* non zero pages sent through this channel */
uint64_t total_normal_pages;
/* buffers to send */
struct iovec *iov;
/* number of iovs used */
uint32_t iovs_num;
/* Pages that are not zero */
ram_addr_t *normal;
/* num of non zero pages */
uint32_t normal_num;
/* used for compression methods */
void *data;
} MultiFDSendParams;
@ -142,6 +142,7 @@ typedef struct {
char *name;
/* channel thread id */
QemuThread thread;
bool thread_created;
/* communication channel */
QIOChannel *c;
/* packet allocated len */
@ -156,8 +157,6 @@ typedef struct {
/* this mutex protects the following parameters */
QemuMutex mutex;
/* is this channel thread running */
bool running;
/* should this thread finish */
bool quit;
/* multifd flags for each packet */
@ -171,8 +170,8 @@ typedef struct {
MultiFDPacket_t *packet;
/* size of the next packet that contains pages */
uint32_t next_packet_size;
/* packets sent through this channel */
uint64_t num_packets;
/* packets received through this channel */
uint64_t packets_recved;
/* ramblock */
RAMBlock *block;
/* ramblock host address */
@ -205,6 +204,14 @@ typedef struct {
} MultiFDMethods;
void multifd_register_ops(int method, MultiFDMethods *ops);
void multifd_send_fill_packet(MultiFDSendParams *p);
static inline void multifd_send_prepare_header(MultiFDSendParams *p)
{
p->iov[0].iov_len = p->packet_len;
p->iov[0].iov_base = p->packet;
p->iovs_num++;
}
#endif

View File

@ -1252,7 +1252,7 @@ static int ram_save_page(RAMState *rs, PageSearchStatus *pss)
static int ram_save_multifd_page(RAMBlock *block, ram_addr_t offset)
{
if (multifd_queue_page(block, offset) < 0) {
if (!multifd_queue_page(block, offset)) {
return -1;
}
stat64_add(&mig_stats.normal_pages, 1);

View File

@ -141,7 +141,7 @@ multifd_send_error(uint8_t id) "channel %u"
multifd_send_sync_main(long packet_num) "packet num %ld"
multifd_send_sync_main_signal(uint8_t id) "channel %u"
multifd_send_sync_main_wait(uint8_t id) "channel %u"
multifd_send_terminate_threads(bool error) "error %d"
multifd_send_terminate_threads(void) ""
multifd_send_thread_end(uint8_t id, uint64_t packets, uint64_t normal_pages) "channel %u packets %" PRIu64 " normal pages %" PRIu64
multifd_send_thread_start(uint8_t id) "%u"
multifd_tls_outgoing_handshake_start(void *ioc, void *tioc, const char *hostname) "ioc=%p tioc=%p hostname=%s"

View File

@ -819,7 +819,7 @@ static int test_migrate_start(QTestState **from, QTestState **to,
} else if (strcmp(arch, "aarch64") == 0) {
memory_size = "150M";
machine_alias = "virt";
machine_opts = "gic-version=max";
machine_opts = "gic-version=3";
arch_opts = g_strdup_printf("-cpu max -kernel %s", bootpath);
start_address = ARM_TEST_MEM_START;
end_address = ARM_TEST_MEM_END;