From e1c2b0e133edd76168d20b80a13e00ad6942568e Mon Sep 17 00:00:00 2001 From: Juan Quintela Date: Tue, 2 Jan 2024 21:19:07 +0100 Subject: [PATCH 01/26] MAINTAINERS: Leaving Migration I am leaving Red Hat, and as part of that I am leaving Migration maintenarship. You are left in good hands with Peter and Fabiano. Thanks for all the fish. Signed-off-by: Juan Quintela Reviewed-by: Bin Meng Link: https://lore.kernel.org/r/20240102201908.1987-2-quintela@redhat.com [peterx: prefix the subject] Signed-off-by: Peter Xu --- .mailmap | 1 + MAINTAINERS | 3 --- 2 files changed, 1 insertion(+), 3 deletions(-) diff --git a/.mailmap b/.mailmap index e12e19f691..d94572af05 100644 --- a/.mailmap +++ b/.mailmap @@ -81,6 +81,7 @@ Greg Kurz Huacai Chen Huacai Chen James Hogan +Juan Quintela Leif Lindholm Leif Lindholm Luc Michel diff --git a/MAINTAINERS b/MAINTAINERS index 395f26ba86..56464cd916 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -70,7 +70,6 @@ R: Daniel P. Berrangé R: Thomas Huth R: Markus Armbruster R: Philippe Mathieu-Daudé -R: Juan Quintela W: https://www.qemu.org/docs/master/devel/index.html S: Odd Fixes F: docs/devel/style.rst @@ -3355,7 +3354,6 @@ S: Odd Fixes F: scripts/checkpatch.pl Migration -M: Juan Quintela M: Peter Xu M: Fabiano Rosas R: Leonardo Bras @@ -3375,7 +3373,6 @@ F: util/userfaultfd.c X: migration/rdma* RDMA Migration -M: Juan Quintela R: Li Zhijian R: Peter Xu R: Leonardo Bras From 5d799717c21438423d81c58dd56fc01d5464be12 Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Thu, 21 Dec 2023 14:07:34 -0300 Subject: [PATCH 02/26] MAINTAINERS: Remove myself as reviewer from Live Migration I am currently focusing in kernel development, so I will probably not be of much help in reviewing general Live Migration changes. For above reason I am removing my Reviewer status from Migration and RDMA Migration. Signed-off-by: Leonardo Bras Link: https://lore.kernel.org/r/20231221170739.332378-1-leobras@redhat.com Signed-off-by: Peter Xu --- MAINTAINERS | 2 -- 1 file changed, 2 deletions(-) diff --git a/MAINTAINERS b/MAINTAINERS index 56464cd916..00ec1f7eca 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3356,7 +3356,6 @@ F: scripts/checkpatch.pl Migration M: Peter Xu M: Fabiano Rosas -R: Leonardo Bras S: Maintained F: hw/core/vmstate-if.c F: include/hw/vmstate-if.h @@ -3375,7 +3374,6 @@ X: migration/rdma* RDMA Migration R: Li Zhijian R: Peter Xu -R: Leonardo Bras S: Odd Fixes F: migration/rdma* From f06f316d3e7f100cbbeaf0ea001d6332c12e7ab5 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 3 Jan 2024 12:05:30 -0800 Subject: [PATCH 03/26] cpus: vm_was_suspended Add a state variable to remember if a vm previously transitioned into a suspended state. Signed-off-by: Steve Sistare Reviewed-by: Peter Xu Link: https://lore.kernel.org/r/1704312341-66640-2-git-send-email-steven.sistare@oracle.com Signed-off-by: Peter Xu --- include/sysemu/runstate.h | 2 ++ system/cpus.c | 15 +++++++++++++++ 2 files changed, 17 insertions(+) diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h index c8c2bd8a61..88a67e22b0 100644 --- a/include/sysemu/runstate.h +++ b/include/sysemu/runstate.h @@ -51,6 +51,8 @@ int vm_prepare_start(bool step_pending); int vm_stop(RunState state); int vm_stop_force_state(RunState state); int vm_shutdown(void); +void vm_set_suspended(bool suspended); +bool vm_get_suspended(void); typedef enum WakeupReason { /* Always keep QEMU_WAKEUP_REASON_NONE = 0 */ diff --git a/system/cpus.c b/system/cpus.c index a444a747f0..9f631ab734 100644 --- a/system/cpus.c +++ b/system/cpus.c @@ -259,6 +259,21 @@ void cpu_interrupt(CPUState *cpu, int mask) } } +/* + * True if the vm was previously suspended, and has not been woken or reset. + */ +static int vm_was_suspended; + +void vm_set_suspended(bool suspended) +{ + vm_was_suspended = suspended; +} + +bool vm_get_suspended(void) +{ + return vm_was_suspended; +} + static int do_vm_stop(RunState state, bool send_stop) { int ret = 0; From b9ae473d80302519a7b89f98795a80abfea1deea Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 3 Jan 2024 12:05:31 -0800 Subject: [PATCH 04/26] cpus: stop vm in suspended runstate Currently, a vm in the suspended state is not completely stopped. The VCPUs have been paused, but the cpu clock still runs, and runstate notifiers for the transition to stopped have not been called. This causes problems for live migration. Stale cpu timers_state is saved to the migration stream, causing time errors in the guest when it wakes from suspend, and state that would have been modified by runstate notifiers is wrong. Modify vm_stop to completely stop the vm if the current state is suspended, transition to RUN_STATE_PAUSED, and remember that the machine was suspended. Modify vm_start to restore the suspended state. This affects all callers of vm_stop and vm_start, notably, the qapi stop and cont commands: old behavior: RUN_STATE_SUSPENDED --> stop --> RUN_STATE_SUSPENDED new behavior: RUN_STATE_SUSPENDED --> stop --> RUN_STATE_PAUSED RUN_STATE_PAUSED --> cont --> RUN_STATE_SUSPENDED For example: (qemu) info status VM status: paused (suspended) (qemu) stop (qemu) info status VM status: paused (qemu) system_wakeup Error: Unable to wake up: guest is not in suspended state (qemu) cont (qemu) info status VM status: paused (suspended) (qemu) system_wakeup (qemu) info status VM status: running Suggested-by: Peter Xu Signed-off-by: Steve Sistare Reviewed-by: Peter Xu Link: https://lore.kernel.org/r/1704312341-66640-3-git-send-email-steven.sistare@oracle.com Signed-off-by: Peter Xu --- include/sysemu/runstate.h | 9 +++++++++ qapi/misc.json | 11 +++++++++-- qapi/run-state.json | 6 +++--- system/cpus.c | 23 +++++++++++++++-------- system/runstate.c | 3 +++ 5 files changed, 39 insertions(+), 13 deletions(-) diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h index 88a67e22b0..618eb491af 100644 --- a/include/sysemu/runstate.h +++ b/include/sysemu/runstate.h @@ -40,6 +40,15 @@ static inline bool shutdown_caused_by_guest(ShutdownCause cause) return cause >= SHUTDOWN_CAUSE_GUEST_SHUTDOWN; } +/* + * In a "live" state, the vcpu clock is ticking, and the runstate notifiers + * think we are running. + */ +static inline bool runstate_is_live(RunState state) +{ + return state == RUN_STATE_RUNNING || state == RUN_STATE_SUSPENDED; +} + void vm_start(void); /** diff --git a/qapi/misc.json b/qapi/misc.json index cda2effa81..3622d98d01 100644 --- a/qapi/misc.json +++ b/qapi/misc.json @@ -134,7 +134,7 @@ ## # @stop: # -# Stop all guest VCPU execution. +# Stop guest VM execution. # # Since: 0.14 # @@ -143,6 +143,9 @@ # the guest remains paused once migration finishes, as if the -S # option was passed on the command line. # +# In the "suspended" state, it will completely stop the VM and +# cause a transition to the "paused" state. (Since 9.0) +# # Example: # # -> { "execute": "stop" } @@ -153,7 +156,7 @@ ## # @cont: # -# Resume guest VCPU execution. +# Resume guest VM execution. # # Since: 0.14 # @@ -165,6 +168,10 @@ # guest starts once migration finishes, removing the effect of the # -S command line option if it was passed. # +# If the VM was previously suspended, and not been reset or woken, +# this command will transition back to the "suspended" state. +# (Since 9.0) +# # Example: # # -> { "execute": "cont" } diff --git a/qapi/run-state.json b/qapi/run-state.json index f216ba54ec..ca05502e0a 100644 --- a/qapi/run-state.json +++ b/qapi/run-state.json @@ -102,7 +102,7 @@ ## # @StatusInfo: # -# Information about VCPU run state +# Information about VM run state # # @running: true if all VCPUs are runnable, false if not runnable # @@ -130,9 +130,9 @@ ## # @query-status: # -# Query the run status of all VCPUs +# Query the run status of the VM # -# Returns: @StatusInfo reflecting all VCPUs +# Returns: @StatusInfo reflecting the VM # # Since: 0.14 # diff --git a/system/cpus.c b/system/cpus.c index 9f631ab734..f162435dd4 100644 --- a/system/cpus.c +++ b/system/cpus.c @@ -277,11 +277,15 @@ bool vm_get_suspended(void) static int do_vm_stop(RunState state, bool send_stop) { int ret = 0; + RunState oldstate = runstate_get(); - if (runstate_is_running()) { + if (runstate_is_live(oldstate)) { + vm_was_suspended = (oldstate == RUN_STATE_SUSPENDED); runstate_set(state); cpu_disable_ticks(); - pause_all_vcpus(); + if (oldstate == RUN_STATE_RUNNING) { + pause_all_vcpus(); + } vm_state_notify(0, state); if (send_stop) { qapi_event_send_stop(); @@ -694,11 +698,13 @@ int vm_stop(RunState state) /** * Prepare for (re)starting the VM. - * Returns -1 if the vCPUs are not to be restarted (e.g. if they are already - * running or in case of an error condition), 0 otherwise. + * Returns 0 if the vCPUs should be restarted, -1 on an error condition, + * and 1 otherwise. */ int vm_prepare_start(bool step_pending) { + int ret = vm_was_suspended ? 1 : 0; + RunState state = vm_was_suspended ? RUN_STATE_SUSPENDED : RUN_STATE_RUNNING; RunState requested; qemu_vmstop_requested(&requested); @@ -729,9 +735,10 @@ int vm_prepare_start(bool step_pending) qapi_event_send_resume(); cpu_enable_ticks(); - runstate_set(RUN_STATE_RUNNING); - vm_state_notify(1, RUN_STATE_RUNNING); - return 0; + runstate_set(state); + vm_state_notify(1, state); + vm_was_suspended = false; + return ret; } void vm_start(void) @@ -745,7 +752,7 @@ void vm_start(void) current state is forgotten forever */ int vm_stop_force_state(RunState state) { - if (runstate_is_running()) { + if (runstate_is_live(runstate_get())) { return vm_stop(state); } else { int ret; diff --git a/system/runstate.c b/system/runstate.c index ea9d6c2a32..e2fa2040cb 100644 --- a/system/runstate.c +++ b/system/runstate.c @@ -108,6 +108,7 @@ static const RunStateTransition runstate_transitions_def[] = { { RUN_STATE_PAUSED, RUN_STATE_POSTMIGRATE }, { RUN_STATE_PAUSED, RUN_STATE_PRELAUNCH }, { RUN_STATE_PAUSED, RUN_STATE_COLO}, + { RUN_STATE_PAUSED, RUN_STATE_SUSPENDED}, { RUN_STATE_POSTMIGRATE, RUN_STATE_RUNNING }, { RUN_STATE_POSTMIGRATE, RUN_STATE_FINISH_MIGRATE }, @@ -161,6 +162,7 @@ static const RunStateTransition runstate_transitions_def[] = { { RUN_STATE_SUSPENDED, RUN_STATE_FINISH_MIGRATE }, { RUN_STATE_SUSPENDED, RUN_STATE_PRELAUNCH }, { RUN_STATE_SUSPENDED, RUN_STATE_COLO}, + { RUN_STATE_SUSPENDED, RUN_STATE_PAUSED}, { RUN_STATE_WATCHDOG, RUN_STATE_RUNNING }, { RUN_STATE_WATCHDOG, RUN_STATE_FINISH_MIGRATE }, @@ -502,6 +504,7 @@ void qemu_system_reset(ShutdownCause reason) qapi_event_send_reset(shutdown_caused_by_guest(reason), reason); } cpu_synchronize_all_post_reset(); + vm_set_suspended(false); } /* From 0f1db069b6c5e7f7ddcdcc9c64fd4b6dbe4afcdf Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 3 Jan 2024 12:05:32 -0800 Subject: [PATCH 05/26] cpus: check running not RUN_STATE_RUNNING When a vm transitions from running to suspended, runstate notifiers are not called, so the notifiers still think the vm is running. Hence, when we call vm_start to restore the suspended state, we call vm_state_notify with running=1. However, some notifiers check for RUN_STATE_RUNNING. They must check the running boolean instead. No functional change. Signed-off-by: Steve Sistare Reviewed-by: Peter Xu Link: https://lore.kernel.org/r/1704312341-66640-4-git-send-email-steven.sistare@oracle.com Signed-off-by: Peter Xu --- backends/tpm/tpm_emulator.c | 2 +- hw/usb/hcd-ehci.c | 2 +- hw/usb/redirect.c | 2 +- hw/xen/xen-hvm-common.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/backends/tpm/tpm_emulator.c b/backends/tpm/tpm_emulator.c index f7f1b4ad7a..254fce7764 100644 --- a/backends/tpm/tpm_emulator.c +++ b/backends/tpm/tpm_emulator.c @@ -904,7 +904,7 @@ static void tpm_emulator_vm_state_change(void *opaque, bool running, trace_tpm_emulator_vm_state_change(running, state); - if (!running || state != RUN_STATE_RUNNING || !tpm_emu->relock_storage) { + if (!running || !tpm_emu->relock_storage) { return; } diff --git a/hw/usb/hcd-ehci.c b/hw/usb/hcd-ehci.c index 19b4534c20..10c82ce472 100644 --- a/hw/usb/hcd-ehci.c +++ b/hw/usb/hcd-ehci.c @@ -2451,7 +2451,7 @@ static void usb_ehci_vm_state_change(void *opaque, bool running, RunState state) * USB-devices which have async handled packages have a packet in the * ep queue to match the completion with. */ - if (state == RUN_STATE_RUNNING) { + if (running) { ehci_advance_async_state(ehci); } diff --git a/hw/usb/redirect.c b/hw/usb/redirect.c index c9893df867..3785bb057b 100644 --- a/hw/usb/redirect.c +++ b/hw/usb/redirect.c @@ -1403,7 +1403,7 @@ static void usbredir_vm_state_change(void *priv, bool running, RunState state) { USBRedirDevice *dev = priv; - if (state == RUN_STATE_RUNNING && dev->parser != NULL) { + if (running && dev->parser != NULL) { usbredirparser_do_write(dev->parser); /* Flush any pending writes */ } } diff --git a/hw/xen/xen-hvm-common.c b/hw/xen/xen-hvm-common.c index 565dc39c8f..47e6cb1db3 100644 --- a/hw/xen/xen-hvm-common.c +++ b/hw/xen/xen-hvm-common.c @@ -623,7 +623,7 @@ void xen_hvm_change_state_handler(void *opaque, bool running, xen_set_ioreq_server_state(xen_domid, state->ioservid, - (rstate == RUN_STATE_RUNNING)); + running); } void xen_exit_notifier(Notifier *n, void *data) From 9ff5e79f2ec0be3e2d7d81c41b33cbfeac393d99 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 3 Jan 2024 12:05:33 -0800 Subject: [PATCH 06/26] cpus: vm_resume Define the vm_resume helper, for use in subsequent patches. Signed-off-by: Steve Sistare Reviewed-by: Peter Xu Link: https://lore.kernel.org/r/1704312341-66640-5-git-send-email-steven.sistare@oracle.com Signed-off-by: Peter Xu --- include/sysemu/runstate.h | 9 +++++++++ system/cpus.c | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/include/sysemu/runstate.h b/include/sysemu/runstate.h index 618eb491af..0117d243c4 100644 --- a/include/sysemu/runstate.h +++ b/include/sysemu/runstate.h @@ -57,6 +57,15 @@ void vm_start(void); * @step_pending: whether any of the CPUs is about to be single-stepped by gdb */ int vm_prepare_start(bool step_pending); + +/** + * vm_resume: If @state is a live state, start the vm and set the state, + * else just set the state. + * + * @state: the state to restore + */ +void vm_resume(RunState state); + int vm_stop(RunState state); int vm_stop_force_state(RunState state); int vm_shutdown(void); diff --git a/system/cpus.c b/system/cpus.c index f162435dd4..7d2c28b1d1 100644 --- a/system/cpus.c +++ b/system/cpus.c @@ -748,6 +748,15 @@ void vm_start(void) } } +void vm_resume(RunState state) +{ + if (runstate_is_live(state)) { + vm_start(); + } else { + runstate_set(state); + } +} + /* does a state transition even if the VM is already stopped, current state is forgotten forever */ int vm_stop_force_state(RunState state) From d3c86c99f36ac37a92dd40191b9571c0d12d415a Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 3 Jan 2024 12:05:34 -0800 Subject: [PATCH 07/26] migration: propagate suspended runstate If the outgoing machine was previously suspended, propagate that to the incoming side via global_state, so a subsequent vm_start restores the suspended state. To maintain backward and forward compatibility, reclaim some space from the runstate member. Signed-off-by: Steve Sistare Reviewed-by: Peter Xu Link: https://lore.kernel.org/r/1704312341-66640-6-git-send-email-steven.sistare@oracle.com Signed-off-by: Peter Xu --- migration/global_state.c | 47 +++++++++++++++++++++++----------------- 1 file changed, 27 insertions(+), 20 deletions(-) diff --git a/migration/global_state.c b/migration/global_state.c index 4e2a9d8ec0..64a573c683 100644 --- a/migration/global_state.c +++ b/migration/global_state.c @@ -22,7 +22,16 @@ typedef struct { uint32_t size; - uint8_t runstate[100]; + + /* + * runstate was 100 bytes, zero padded, but we trimmed it to add a + * few fields and maintain backwards compatibility. + */ + uint8_t runstate[32]; + uint8_t has_vm_was_suspended; + uint8_t vm_was_suspended; + uint8_t unused[66]; + RunState state; bool received; } GlobalState; @@ -35,6 +44,10 @@ static void global_state_do_store(RunState state) assert(strlen(state_str) < sizeof(global_state.runstate)); strpadcpy((char *)global_state.runstate, sizeof(global_state.runstate), state_str, '\0'); + global_state.has_vm_was_suspended = true; + global_state.vm_was_suspended = vm_get_suspended(); + + memset(global_state.unused, 0, sizeof(global_state.unused)); } void global_state_store(void) @@ -59,24 +72,7 @@ RunState global_state_get_runstate(void) static bool global_state_needed(void *opaque) { - GlobalState *s = opaque; - char *runstate = (char *)s->runstate; - - /* If it is not optional, it is mandatory */ - - if (migrate_get_current()->store_global_state) { - return true; - } - - /* If state is running or paused, it is not needed */ - - if (strcmp(runstate, "running") == 0 || - strcmp(runstate, "paused") == 0) { - return false; - } - - /* for any other state it is needed */ - return true; + return migrate_get_current()->store_global_state; } static int global_state_post_load(void *opaque, int version_id) @@ -93,7 +89,7 @@ static int global_state_post_load(void *opaque, int version_id) sizeof(s->runstate)) == sizeof(s->runstate)) { /* * This condition should never happen during migration, because - * all runstate names are shorter than 100 bytes (the size of + * all runstate names are shorter than 32 bytes (the size of * s->runstate). However, a malicious stream could overflow * the qapi_enum_parse() call, so we force the last character * to a NUL byte. @@ -110,6 +106,14 @@ static int global_state_post_load(void *opaque, int version_id) } s->state = r; + /* + * global_state is saved on the outgoing side before forcing a stopped + * state, so it may have saved state=suspended and vm_was_suspended=0. + * Now we are in a paused state, and when we later call vm_start, it must + * restore the suspended state, so we must set vm_was_suspended=1 here. + */ + vm_set_suspended(s->vm_was_suspended || r == RUN_STATE_SUSPENDED); + return 0; } @@ -134,6 +138,9 @@ static const VMStateDescription vmstate_globalstate = { .fields = (VMStateField[]) { VMSTATE_UINT32(size, GlobalState), VMSTATE_BUFFER(runstate, GlobalState), + VMSTATE_UINT8(has_vm_was_suspended, GlobalState), + VMSTATE_UINT8(vm_was_suspended, GlobalState), + VMSTATE_BUFFER(unused, GlobalState), VMSTATE_END_OF_LIST() }, }; From b4e9ddccd1a64239c07d7229afd7e00eed5afcd8 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 3 Jan 2024 12:05:35 -0800 Subject: [PATCH 08/26] migration: preserve suspended runstate A guest that is migrated in the suspended state automaticaly wakes and continues execution. This is wrong; the guest should end migration in the same state it started. The root cause is that the outgoing migration code automatically wakes the guest, then saves the RUNNING runstate in global_state_store(), hence the incoming migration code thinks the guest is running and continues the guest if autostart is true. On the outgoing side, delete the call to qemu_system_wakeup_request(). Now that vm_stop completely stops a vm in the suspended state (from the preceding patches), the existing call to vm_stop_force_state is sufficient to correctly migrate all vmstate. On the incoming side, call vm_start if the pre-migration state was running or suspended. For the latter, vm_start correctly restores the suspended state, and a future system_wakeup monitor request will cause the vm to resume running. Signed-off-by: Steve Sistare Reviewed-by: Peter Xu Link: https://lore.kernel.org/r/1704312341-66640-7-git-send-email-steven.sistare@oracle.com Signed-off-by: Peter Xu --- migration/migration.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 3ce04b2aaf..8124811045 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -604,7 +604,7 @@ static void process_incoming_migration_bh(void *opaque) */ if (!migrate_late_block_activate() || (autostart && (!global_state_received() || - global_state_get_runstate() == RUN_STATE_RUNNING))) { + runstate_is_live(global_state_get_runstate())))) { /* Make sure all file formats throw away their mutable metadata. * If we get an error here, just don't restart the VM yet. */ bdrv_activate_all(&local_err); @@ -628,7 +628,7 @@ static void process_incoming_migration_bh(void *opaque) dirty_bitmap_mig_before_vm_start(); if (!global_state_received() || - global_state_get_runstate() == RUN_STATE_RUNNING) { + runstate_is_live(global_state_get_runstate())) { if (autostart) { vm_start(); } else { @@ -2416,7 +2416,6 @@ static int postcopy_start(MigrationState *ms, Error **errp) migration_downtime_start(ms); - qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); global_state_store(); ret = migration_stop_vm(RUN_STATE_FINISH_MIGRATE); if (ret < 0) { @@ -2615,7 +2614,6 @@ static int migration_completion_precopy(MigrationState *s, qemu_mutex_lock_iothread(); migration_downtime_start(s); - qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); s->vm_old_state = runstate_get(); global_state_store(); @@ -3136,7 +3134,7 @@ static void migration_iteration_finish(MigrationState *s) case MIGRATION_STATUS_FAILED: case MIGRATION_STATUS_CANCELLED: case MIGRATION_STATUS_CANCELLING: - if (s->vm_old_state == RUN_STATE_RUNNING) { + if (runstate_is_live(s->vm_old_state)) { if (!runstate_check(RUN_STATE_SHUTDOWN)) { vm_start(); } From 58b105703e86c1721d1a4cb9f6aaaca49c82145e Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 3 Jan 2024 12:05:36 -0800 Subject: [PATCH 09/26] migration: preserve suspended for snapshot Restoring a snapshot can break a suspended guest. Snapshots suffer from the same suspended-state issues that affect live migration, plus they must handle an additional problematic scenario, which is that a running vm must remain running if it loads a suspended snapshot. To save, the existing vm_stop call now completely stops the suspended state. Finish with vm_resume to leave the vm in the state it had prior to the save, correctly restoring the suspended state. To load, if the snapshot is not suspended, then vm_stop + vm_resume correctly handles all states, and leaves the vm in the state it had prior to the load. However, if the snapshot is suspended, restoration is trickier. First, call vm_resume to restore the state to suspended so the current state matches the saved state. Then, if the pre-load state is running, call wakeup to resume running. Prior to these changes, the vm_stop to RUN_STATE_SAVE_VM and RUN_STATE_RESTORE_VM did not change runstate if the current state was suspended, but now it does, so allow these transitions. Signed-off-by: Steve Sistare Reviewed-by: Peter Xu Link: https://lore.kernel.org/r/1704312341-66640-8-git-send-email-steven.sistare@oracle.com Signed-off-by: Peter Xu --- include/migration/snapshot.h | 7 +++++++ migration/migration-hmp-cmds.c | 8 +++++--- migration/savevm.c | 23 +++++++++++++---------- system/runstate.c | 5 +++++ system/vl.c | 2 ++ 5 files changed, 32 insertions(+), 13 deletions(-) diff --git a/include/migration/snapshot.h b/include/migration/snapshot.h index e72083b117..9e4dcaaa75 100644 --- a/include/migration/snapshot.h +++ b/include/migration/snapshot.h @@ -16,6 +16,7 @@ #define QEMU_MIGRATION_SNAPSHOT_H #include "qapi/qapi-builtin-types.h" +#include "qapi/qapi-types-run-state.h" /** * save_snapshot: Save an internal snapshot. @@ -61,4 +62,10 @@ bool delete_snapshot(const char *name, bool has_devices, strList *devices, Error **errp); +/** + * load_snapshot_resume: Restore runstate after loading snapshot. + * @state: state to restore + */ +void load_snapshot_resume(RunState state); + #endif diff --git a/migration/migration-hmp-cmds.c b/migration/migration-hmp-cmds.c index 99710c8ffb..740a219aa4 100644 --- a/migration/migration-hmp-cmds.c +++ b/migration/migration-hmp-cmds.c @@ -399,15 +399,17 @@ void hmp_info_migrate_parameters(Monitor *mon, const QDict *qdict) void hmp_loadvm(Monitor *mon, const QDict *qdict) { - int saved_vm_running = runstate_is_running(); + RunState saved_state = runstate_get(); + const char *name = qdict_get_str(qdict, "name"); Error *err = NULL; vm_stop(RUN_STATE_RESTORE_VM); - if (load_snapshot(name, NULL, false, NULL, &err) && saved_vm_running) { - vm_start(); + if (load_snapshot(name, NULL, false, NULL, &err)) { + load_snapshot_resume(saved_state); } + hmp_handle_error(mon, err); } diff --git a/migration/savevm.c b/migration/savevm.c index 1b9ab7b8ee..74f915f3ac 100644 --- a/migration/savevm.c +++ b/migration/savevm.c @@ -3046,7 +3046,7 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate, QEMUSnapshotInfo sn1, *sn = &sn1; int ret = -1, ret2; QEMUFile *f; - int saved_vm_running; + RunState saved_state = runstate_get(); uint64_t vm_state_size; g_autoptr(GDateTime) now = g_date_time_new_now_local(); @@ -3092,8 +3092,6 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate, return false; } - saved_vm_running = runstate_is_running(); - global_state_store(); vm_stop(RUN_STATE_SAVE_VM); @@ -3147,9 +3145,7 @@ bool save_snapshot(const char *name, bool overwrite, const char *vmstate, the_end: bdrv_drain_all_end(); - if (saved_vm_running) { - vm_start(); - } + vm_resume(saved_state); return ret == 0; } @@ -3317,6 +3313,14 @@ err_drain: return false; } +void load_snapshot_resume(RunState state) +{ + vm_resume(state); + if (state == RUN_STATE_RUNNING && runstate_get() == RUN_STATE_SUSPENDED) { + qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, &error_abort); + } +} + bool delete_snapshot(const char *name, bool has_devices, strList *devices, Error **errp) { @@ -3381,16 +3385,15 @@ static void snapshot_load_job_bh(void *opaque) { Job *job = opaque; SnapshotJob *s = container_of(job, SnapshotJob, common); - int orig_vm_running; + RunState orig_state = runstate_get(); job_progress_set_remaining(&s->common, 1); - orig_vm_running = runstate_is_running(); vm_stop(RUN_STATE_RESTORE_VM); s->ret = load_snapshot(s->tag, s->vmstate, true, s->devices, s->errp); - if (s->ret && orig_vm_running) { - vm_start(); + if (s->ret) { + load_snapshot_resume(orig_state); } job_progress_update(&s->common, 1); diff --git a/system/runstate.c b/system/runstate.c index e2fa2040cb..ca9eb54cde 100644 --- a/system/runstate.c +++ b/system/runstate.c @@ -77,6 +77,7 @@ typedef struct { static const RunStateTransition runstate_transitions_def[] = { { RUN_STATE_PRELAUNCH, RUN_STATE_INMIGRATE }, + { RUN_STATE_PRELAUNCH, RUN_STATE_SUSPENDED }, { RUN_STATE_DEBUG, RUN_STATE_RUNNING }, { RUN_STATE_DEBUG, RUN_STATE_FINISH_MIGRATE }, @@ -132,6 +133,7 @@ static const RunStateTransition runstate_transitions_def[] = { { RUN_STATE_RESTORE_VM, RUN_STATE_RUNNING }, { RUN_STATE_RESTORE_VM, RUN_STATE_PRELAUNCH }, + { RUN_STATE_RESTORE_VM, RUN_STATE_SUSPENDED }, { RUN_STATE_COLO, RUN_STATE_RUNNING }, { RUN_STATE_COLO, RUN_STATE_PRELAUNCH }, @@ -150,6 +152,7 @@ static const RunStateTransition runstate_transitions_def[] = { { RUN_STATE_RUNNING, RUN_STATE_COLO}, { RUN_STATE_SAVE_VM, RUN_STATE_RUNNING }, + { RUN_STATE_SAVE_VM, RUN_STATE_SUSPENDED }, { RUN_STATE_SHUTDOWN, RUN_STATE_PAUSED }, { RUN_STATE_SHUTDOWN, RUN_STATE_FINISH_MIGRATE }, @@ -163,6 +166,8 @@ static const RunStateTransition runstate_transitions_def[] = { { RUN_STATE_SUSPENDED, RUN_STATE_PRELAUNCH }, { RUN_STATE_SUSPENDED, RUN_STATE_COLO}, { RUN_STATE_SUSPENDED, RUN_STATE_PAUSED}, + { RUN_STATE_SUSPENDED, RUN_STATE_SAVE_VM }, + { RUN_STATE_SUSPENDED, RUN_STATE_RESTORE_VM }, { RUN_STATE_WATCHDOG, RUN_STATE_RUNNING }, { RUN_STATE_WATCHDOG, RUN_STATE_FINISH_MIGRATE }, diff --git a/system/vl.c b/system/vl.c index 6b87bfa32c..1eec5f627f 100644 --- a/system/vl.c +++ b/system/vl.c @@ -2710,7 +2710,9 @@ void qmp_x_exit_preconfig(Error **errp) qemu_machine_creation_done(); if (loadvm) { + RunState state = autostart ? RUN_STATE_RUNNING : runstate_get(); load_snapshot(loadvm, NULL, false, NULL, &error_fatal); + load_snapshot_resume(state); } if (replay_mode != REPLAY_MODE_NONE) { replay_vmstate_init(); From 49a5020697001eb96693e8a289c13b89149c7fef Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 3 Jan 2024 12:05:37 -0800 Subject: [PATCH 10/26] migration: preserve suspended for bg_migration Do not wake a suspended guest during bg_migration, and restore the prior state at finish rather than unconditionally running. Allow the additional state transitions that occur. Signed-off-by: Steve Sistare Reviewed-by: Fabiano Rosas Reviewed-by: Peter Xu Link: https://lore.kernel.org/r/1704312341-66640-9-git-send-email-steven.sistare@oracle.com Signed-off-by: Peter Xu --- migration/migration.c | 7 +------ system/runstate.c | 1 + 2 files changed, 2 insertions(+), 6 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 8124811045..2cc7e2a56c 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -3390,7 +3390,7 @@ static void bg_migration_vm_start_bh(void *opaque) qemu_bh_delete(s->vm_start_bh); s->vm_start_bh = NULL; - vm_start(); + vm_resume(s->vm_old_state); migration_downtime_end(s); } @@ -3462,11 +3462,6 @@ static void *bg_migration_thread(void *opaque) qemu_mutex_lock_iothread(); - /* - * If VM is currently in suspended state, then, to make a valid runstate - * transition in vm_stop_force_state() we need to wakeup it up. - */ - qemu_system_wakeup_request(QEMU_WAKEUP_REASON_OTHER, NULL); s->vm_old_state = runstate_get(); global_state_store(); diff --git a/system/runstate.c b/system/runstate.c index ca9eb54cde..621a023120 100644 --- a/system/runstate.c +++ b/system/runstate.c @@ -168,6 +168,7 @@ static const RunStateTransition runstate_transitions_def[] = { { RUN_STATE_SUSPENDED, RUN_STATE_PAUSED}, { RUN_STATE_SUSPENDED, RUN_STATE_SAVE_VM }, { RUN_STATE_SUSPENDED, RUN_STATE_RESTORE_VM }, + { RUN_STATE_SUSPENDED, RUN_STATE_SHUTDOWN }, { RUN_STATE_WATCHDOG, RUN_STATE_RUNNING }, { RUN_STATE_WATCHDOG, RUN_STATE_FINISH_MIGRATE }, From f0649758be63da16d215344011fc67aa07531542 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 3 Jan 2024 12:05:38 -0800 Subject: [PATCH 11/26] tests/qtest: migration events MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Define a state object to capture events seen by migration tests, to allow more events to be captured in a subsequent patch, and simplify event checking in wait_for_migration_pass. No functional change. Signed-off-by: Steve Sistare Reviewed-by: Fabiano Rosas Reviewed-by: "Daniel P. Berrangé" Link: https://lore.kernel.org/r/1704312341-66640-10-git-send-email-steven.sistare@oracle.com Signed-off-by: Peter Xu --- tests/qtest/migration-helpers.c | 24 +++------ tests/qtest/migration-helpers.h | 9 ++-- tests/qtest/migration-test.c | 91 ++++++++++++++++----------------- 3 files changed, 56 insertions(+), 68 deletions(-) diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c index 24fb7b3525..fd3b94efa2 100644 --- a/tests/qtest/migration-helpers.c +++ b/tests/qtest/migration-helpers.c @@ -24,26 +24,16 @@ */ #define MIGRATION_STATUS_WAIT_TIMEOUT 120 -bool migrate_watch_for_stop(QTestState *who, const char *name, - QDict *event, void *opaque) -{ - bool *seen = opaque; - - if (g_str_equal(name, "STOP")) { - *seen = true; - return true; - } - - return false; -} - -bool migrate_watch_for_resume(QTestState *who, const char *name, +bool migrate_watch_for_events(QTestState *who, const char *name, QDict *event, void *opaque) { - bool *seen = opaque; + QTestMigrationState *state = opaque; - if (g_str_equal(name, "RESUME")) { - *seen = true; + if (g_str_equal(name, "STOP")) { + state->stop_seen = true; + return true; + } else if (g_str_equal(name, "RESUME")) { + state->resume_seen = true; return true; } diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h index e31dc85cc7..3d32699c45 100644 --- a/tests/qtest/migration-helpers.h +++ b/tests/qtest/migration-helpers.h @@ -15,9 +15,12 @@ #include "libqtest.h" -bool migrate_watch_for_stop(QTestState *who, const char *name, - QDict *event, void *opaque); -bool migrate_watch_for_resume(QTestState *who, const char *name, +typedef struct QTestMigrationState { + bool stop_seen; + bool resume_seen; +} QTestMigrationState; + +bool migrate_watch_for_events(QTestState *who, const char *name, QDict *event, void *opaque); G_GNUC_PRINTF(3, 4) diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index d520c587f7..41fc837f36 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -43,8 +43,8 @@ unsigned start_address; unsigned end_address; static bool uffd_feature_thread_id; -static bool got_src_stop; -static bool got_dst_resume; +static QTestMigrationState src_state; +static QTestMigrationState dst_state; /* * An initial 3 MB offset is used as that corresponds @@ -230,6 +230,20 @@ static void wait_for_serial(const char *side) } while (true); } +static void wait_for_stop(QTestState *who, QTestMigrationState *state) +{ + if (!state->stop_seen) { + qtest_qmp_eventwait(who, "STOP"); + } +} + +static void wait_for_resume(QTestState *who, QTestMigrationState *state) +{ + if (!state->resume_seen) { + qtest_qmp_eventwait(who, "RESUME"); + } +} + /* * It's tricky to use qemu's migration event capability with qtest, * events suddenly appearing confuse the qmp()/hmp() responses. @@ -277,21 +291,19 @@ static void read_blocktime(QTestState *who) qobject_unref(rsp_return); } +/* + * Wait for two changes in the migration pass count, but bail if we stop. + */ static void wait_for_migration_pass(QTestState *who) { - uint64_t initial_pass = get_migration_pass(who); - uint64_t pass; + uint64_t pass, prev_pass = 0, changes = 0; - /* Wait for the 1st sync */ - while (!got_src_stop && !initial_pass) { - usleep(1000); - initial_pass = get_migration_pass(who); - } - - do { + while (changes < 2 && !src_state.stop_seen) { usleep(1000); pass = get_migration_pass(who); - } while (pass == initial_pass && !got_src_stop); + changes += (pass != prev_pass); + prev_pass = pass; + } } static void check_guests_ram(QTestState *who) @@ -617,10 +629,7 @@ static void migrate_postcopy_start(QTestState *from, QTestState *to) { qtest_qmp_assert_success(from, "{ 'execute': 'migrate-start-postcopy' }"); - if (!got_src_stop) { - qtest_qmp_eventwait(from, "STOP"); - } - + wait_for_stop(from, &src_state); qtest_qmp_eventwait(to, "RESUME"); } @@ -756,8 +765,8 @@ static int test_migrate_start(QTestState **from, QTestState **to, } } - got_src_stop = false; - got_dst_resume = false; + dst_state = (QTestMigrationState) { }; + src_state = (QTestMigrationState) { }; if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) { memory_size = "150M"; @@ -848,8 +857,8 @@ static int test_migrate_start(QTestState **from, QTestState **to, if (!args->only_target) { *from = qtest_init_with_env(QEMU_ENV_SRC, cmd_source); qtest_qmp_set_event_callback(*from, - migrate_watch_for_stop, - &got_src_stop); + migrate_watch_for_events, + &src_state); } cmd_target = g_strdup_printf("-accel kvm%s -accel tcg " @@ -869,8 +878,8 @@ static int test_migrate_start(QTestState **from, QTestState **to, ignore_stderr); *to = qtest_init_with_env(QEMU_ENV_DST, cmd_target); qtest_qmp_set_event_callback(*to, - migrate_watch_for_resume, - &got_dst_resume); + migrate_watch_for_events, + &dst_state); /* * Remove shmem file immediately to avoid memory leak in test failed case. @@ -1717,9 +1726,7 @@ static void test_precopy_common(MigrateCommon *args) */ if (args->result == MIG_TEST_SUCCEED) { qtest_qmp_assert_success(from, "{ 'execute' : 'stop'}"); - if (!got_src_stop) { - qtest_qmp_eventwait(from, "STOP"); - } + wait_for_stop(from, &src_state); migrate_ensure_converge(from); } } @@ -1765,9 +1772,8 @@ static void test_precopy_common(MigrateCommon *args) */ wait_for_migration_complete(from); - if (!got_src_stop) { - qtest_qmp_eventwait(from, "STOP"); - } + wait_for_stop(from, &src_state); + } else { wait_for_migration_complete(from); /* @@ -1780,9 +1786,7 @@ static void test_precopy_common(MigrateCommon *args) qtest_qmp_assert_success(to, "{ 'execute' : 'cont'}"); } - if (!got_dst_resume) { - qtest_qmp_eventwait(to, "RESUME"); - } + wait_for_resume(to, &dst_state); wait_for_serial("dest_serial"); } @@ -1821,9 +1825,7 @@ static void test_file_common(MigrateCommon *args, bool stop_src) if (stop_src) { qtest_qmp_assert_success(from, "{ 'execute' : 'stop'}"); - if (!got_src_stop) { - qtest_qmp_eventwait(from, "STOP"); - } + wait_for_stop(from, &src_state); } if (args->result == MIG_TEST_QMP_ERROR) { @@ -1844,10 +1846,7 @@ static void test_file_common(MigrateCommon *args, bool stop_src) if (stop_src) { qtest_qmp_assert_success(to, "{ 'execute' : 'cont'}"); } - - if (!got_dst_resume) { - qtest_qmp_eventwait(to, "RESUME"); - } + wait_for_resume(to, &dst_state); wait_for_serial("dest_serial"); @@ -1966,9 +1965,7 @@ static void test_ignore_shared(void) migrate_wait_for_dirty_mem(from, to); - if (!got_src_stop) { - qtest_qmp_eventwait(from, "STOP"); - } + wait_for_stop(from, &src_state); qtest_qmp_eventwait(to, "RESUME"); @@ -2503,7 +2500,7 @@ static void test_migrate_auto_converge(void) break; } usleep(20); - g_assert_false(got_src_stop); + g_assert_false(src_state.stop_seen); } while (true); /* The first percentage of throttling should be at least init_pct */ g_assert_cmpint(percentage, >=, init_pct); @@ -2842,9 +2839,7 @@ static void test_multifd_tcp_cancel(void) migrate_ensure_converge(from); - if (!got_src_stop) { - qtest_qmp_eventwait(from, "STOP"); - } + wait_for_stop(from, &src_state); qtest_qmp_eventwait(to2, "RESUME"); wait_for_serial("dest_serial"); @@ -3177,7 +3172,7 @@ static void test_migrate_dirty_limit(void) throttle_us_per_full = read_migrate_property_int(from, "dirty-limit-throttle-time-per-round"); usleep(100); - g_assert_false(got_src_stop); + g_assert_false(src_state.stop_seen); } /* Now cancel migrate and wait for dirty limit throttle switch off */ @@ -3189,7 +3184,7 @@ static void test_migrate_dirty_limit(void) throttle_us_per_full = read_migrate_property_int(from, "dirty-limit-throttle-time-per-round"); usleep(100); - g_assert_false(got_src_stop); + g_assert_false(src_state.stop_seen); } while (throttle_us_per_full != 0 && --max_try_count); /* Assert dirty limit is not in service */ @@ -3218,7 +3213,7 @@ static void test_migrate_dirty_limit(void) throttle_us_per_full = read_migrate_property_int(from, "dirty-limit-throttle-time-per-round"); usleep(100); - g_assert_false(got_src_stop); + g_assert_false(src_state.stop_seen); } /* From 5014478e0dc0e7e542e462fe271a24fd07bd813d Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 3 Jan 2024 12:05:39 -0800 Subject: [PATCH 12/26] tests/qtest: option to suspend during migration Add an option to suspend the src in a-b-bootblock.S, which puts the guest in S3 state after one round of writing to memory. The option is enabled by poking a 1 into the suspend_me word in the boot block prior to starting the src vm. Generate symbol offsets in a-b-bootblock.h so that the suspend_me offset is known. Generate the bootblock for each test, because suspend_me may differ for each. Signed-off-by: Steve Sistare Acked-by: Peter Xu Reviewed-by: Fabiano Rosas Link: https://lore.kernel.org/r/1704312341-66640-11-git-send-email-steven.sistare@oracle.com Signed-off-by: Peter Xu --- tests/migration/i386/Makefile | 5 +-- tests/migration/i386/a-b-bootblock.S | 50 ++++++++++++++++++++++++++-- tests/migration/i386/a-b-bootblock.h | 26 ++++++++++----- tests/qtest/migration-test.c | 12 +++++-- 4 files changed, 77 insertions(+), 16 deletions(-) diff --git a/tests/migration/i386/Makefile b/tests/migration/i386/Makefile index 5c0324134a..37a72ae353 100644 --- a/tests/migration/i386/Makefile +++ b/tests/migration/i386/Makefile @@ -4,9 +4,10 @@ .PHONY: all clean all: a-b-bootblock.h -a-b-bootblock.h: x86.bootsect +a-b-bootblock.h: x86.bootsect x86.o echo "$$__note" > header.tmp xxd -i $< | sed -e 's/.*int.*//' >> header.tmp + nm x86.o | awk '{print "#define SYM_"$$3" 0x"$$1}' >> header.tmp mv header.tmp $@ x86.bootsect: x86.boot @@ -16,7 +17,7 @@ x86.boot: x86.o $(CROSS_PREFIX)objcopy -O binary $< $@ x86.o: a-b-bootblock.S - $(CROSS_PREFIX)gcc -m32 -march=i486 -c $< -o $@ + $(CROSS_PREFIX)gcc -I.. -m32 -march=i486 -c $< -o $@ clean: @rm -rf *.boot *.o *.bootsect diff --git a/tests/migration/i386/a-b-bootblock.S b/tests/migration/i386/a-b-bootblock.S index 6bb9999d60..6f39eb6051 100644 --- a/tests/migration/i386/a-b-bootblock.S +++ b/tests/migration/i386/a-b-bootblock.S @@ -9,6 +9,23 @@ # # Author: dgilbert@redhat.com +#include "migration-test.h" + +#define ACPI_ENABLE 0xf1 +#define ACPI_PORT_SMI_CMD 0xb2 +#define ACPI_PM_BASE 0x600 +#define PM1A_CNT_OFFSET 4 + +#define ACPI_SCI_ENABLE 0x0001 +#define ACPI_SLEEP_TYPE 0x0400 +#define ACPI_SLEEP_ENABLE 0x2000 +#define SLEEP (ACPI_SCI_ENABLE + ACPI_SLEEP_TYPE + ACPI_SLEEP_ENABLE) + +#define LOW_ADDR X86_TEST_MEM_START +#define HIGH_ADDR X86_TEST_MEM_END + +/* Save the suspended status at an address that is not written in the loop. */ +#define suspended (X86_TEST_MEM_START + 4) .code16 .org 0x7c00 @@ -35,8 +52,8 @@ start: # at 0x7c00 ? mov %eax,%ds # Start from 1MB -.set TEST_MEM_START, (1024*1024) -.set TEST_MEM_END, (100*1024*1024) +.set TEST_MEM_START, X86_TEST_MEM_START +.set TEST_MEM_END, X86_TEST_MEM_END mov $65,%ax mov $0x3f8,%dx @@ -69,7 +86,30 @@ innerloop: mov $0x3f8,%dx outb %al,%dx - jmp mainloop + # should this test suspend? + mov (suspend_me),%eax + cmp $0,%eax + je mainloop + + # are we waking after suspend? do not suspend again. + mov $suspended,%eax + mov (%eax),%eax + cmp $1,%eax + je mainloop + + # enable acpi + mov $ACPI_ENABLE,%al + outb %al,$ACPI_PORT_SMI_CMD + + # suspend to ram + mov $suspended,%eax + movl $1,(%eax) + mov $SLEEP,%ax + mov $(ACPI_PM_BASE + PM1A_CNT_OFFSET),%dx + outw %ax,%dx + # not reached. The wakeup causes reset and restart at 0x7c00, and we + # do not save and restore registers as a real kernel would do. + # GDT magic from old (GPLv2) Grub startup.S .p2align 2 /* force 4-byte alignment */ @@ -95,6 +135,10 @@ gdtdesc: .word 0x27 /* limit */ .long gdt /* addr */ + /* test launcher can poke a 1 here to exercise suspend */ +suspend_me: + .int 0 + /* I'm a bootable disk */ .org 0x7dfe .byte 0x55 diff --git a/tests/migration/i386/a-b-bootblock.h b/tests/migration/i386/a-b-bootblock.h index 5b523917ce..c83f8711db 100644 --- a/tests/migration/i386/a-b-bootblock.h +++ b/tests/migration/i386/a-b-bootblock.h @@ -4,7 +4,7 @@ * the header and the assembler differences in your patch submission. */ unsigned char x86_bootsect[] = { - 0xfa, 0x0f, 0x01, 0x16, 0x8c, 0x7c, 0x66, 0xb8, 0x01, 0x00, 0x00, 0x00, + 0xfa, 0x0f, 0x01, 0x16, 0xb8, 0x7c, 0x66, 0xb8, 0x01, 0x00, 0x00, 0x00, 0x0f, 0x22, 0xc0, 0x66, 0xea, 0x20, 0x7c, 0x00, 0x00, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xe4, 0x92, 0x0c, 0x02, 0xe6, 0x92, 0xb8, 0x10, 0x00, 0x00, 0x00, 0x8e, 0xd8, 0x66, 0xb8, 0x41, @@ -13,13 +13,13 @@ unsigned char x86_bootsect[] = { 0x40, 0x06, 0x7c, 0xf1, 0xb8, 0x00, 0x00, 0x10, 0x00, 0xfe, 0x00, 0x05, 0x00, 0x10, 0x00, 0x00, 0x3d, 0x00, 0x00, 0x40, 0x06, 0x7c, 0xf2, 0xfe, 0xc3, 0x80, 0xe3, 0x3f, 0x75, 0xe6, 0x66, 0xb8, 0x42, 0x00, 0x66, 0xba, - 0xf8, 0x03, 0xee, 0xeb, 0xdb, 0x8d, 0x76, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0x00, 0x00, 0x00, 0x9a, 0xcf, 0x00, - 0xff, 0xff, 0x00, 0x00, 0x00, 0x92, 0xcf, 0x00, 0x27, 0x00, 0x74, 0x7c, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, - 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xf8, 0x03, 0xee, 0xa1, 0xbe, 0x7c, 0x00, 0x00, 0x83, 0xf8, 0x00, 0x74, + 0xd3, 0xb8, 0x04, 0x00, 0x10, 0x00, 0x8b, 0x00, 0x83, 0xf8, 0x01, 0x74, + 0xc7, 0xb0, 0xf1, 0xe6, 0xb2, 0xb8, 0x04, 0x00, 0x10, 0x00, 0xc7, 0x00, + 0x01, 0x00, 0x00, 0x00, 0x66, 0xb8, 0x01, 0x24, 0x66, 0xba, 0x04, 0x06, + 0x66, 0xef, 0x66, 0x90, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, + 0xff, 0xff, 0x00, 0x00, 0x00, 0x9a, 0xcf, 0x00, 0xff, 0xff, 0x00, 0x00, + 0x00, 0x92, 0xcf, 0x00, 0x27, 0x00, 0xa0, 0x7c, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, @@ -49,3 +49,13 @@ unsigned char x86_bootsect[] = { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x55, 0xaa }; +#define SYM_do_zero 0x00007c3d +#define SYM_gdt 0x00007ca0 +#define SYM_gdtdesc 0x00007cb8 +#define SYM_innerloop 0x00007c51 +#define SYM_mainloop 0x00007c4c +#define SYM_pre_zero 0x00007c38 +#define SYM_start 0x00007c00 +#define SYM_suspend_me 0x00007cbe +#define SYM_TEST_MEM_END 0x06400000 +#define SYM_TEST_MEM_START 0x00100000 diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index 41fc837f36..2e2ab6af3c 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -133,7 +133,7 @@ static char *bootpath; #include "tests/migration/aarch64/a-b-kernel.h" #include "tests/migration/s390x/a-b-bios.h" -static void bootfile_create(char *dir) +static void bootfile_create(char *dir, bool suspend_me) { const char *arch = qtest_get_arch(); unsigned char *content; @@ -143,6 +143,7 @@ static void bootfile_create(char *dir) if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) { /* the assembled x86 boot sector should be exactly one sector large */ g_assert(sizeof(x86_bootsect) == 512); + x86_bootsect[SYM_suspend_me - SYM_start] = suspend_me; content = x86_bootsect; len = sizeof(x86_bootsect); } else if (g_str_equal(arch, "s390x")) { @@ -646,6 +647,8 @@ typedef struct { bool use_dirty_ring; const char *opts_source; const char *opts_target; + /* suspend the src before migrating to dest. */ + bool suspend_me; } MigrateStart; /* @@ -767,6 +770,8 @@ static int test_migrate_start(QTestState **from, QTestState **to, dst_state = (QTestMigrationState) { }; src_state = (QTestMigrationState) { }; + bootfile_create(tmpfs, args->suspend_me); + if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) { memory_size = "150M"; @@ -2980,7 +2985,9 @@ static int64_t get_limit_rate(QTestState *who) static QTestState *dirtylimit_start_vm(void) { QTestState *vm = NULL; - g_autofree gchar * + g_autofree gchar *cmd = NULL; + + bootfile_create(tmpfs, false); cmd = g_strdup_printf("-accel kvm,dirty-ring-size=4096 " "-name dirtylimit-test,debug-threads=on " "-m 150M -smp 1 " @@ -3329,7 +3336,6 @@ int main(int argc, char **argv) g_get_tmp_dir(), err->message); } g_assert(tmpfs); - bootfile_create(tmpfs); module_call_init(MODULE_INIT_QOM); From b1fdd21e8c2442ec8a68f0c3e67efdb279ecea64 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 3 Jan 2024 12:05:40 -0800 Subject: [PATCH 13/26] tests/qtest: precopy migration with suspend Add a test case to verify that the suspended state is handled correctly during live migration precopy. The test suspends the src, migrates, then wakes the dest. Signed-off-by: Steve Sistare Reviewed-by: Fabiano Rosas Reviewed-by: Peter Xu Link: https://lore.kernel.org/r/1704312341-66640-12-git-send-email-steven.sistare@oracle.com Signed-off-by: Peter Xu --- tests/qtest/migration-helpers.c | 3 ++ tests/qtest/migration-helpers.h | 2 ++ tests/qtest/migration-test.c | 62 +++++++++++++++++++++++++++++++-- 3 files changed, 64 insertions(+), 3 deletions(-) diff --git a/tests/qtest/migration-helpers.c b/tests/qtest/migration-helpers.c index fd3b94efa2..37e8e812c5 100644 --- a/tests/qtest/migration-helpers.c +++ b/tests/qtest/migration-helpers.c @@ -32,6 +32,9 @@ bool migrate_watch_for_events(QTestState *who, const char *name, if (g_str_equal(name, "STOP")) { state->stop_seen = true; return true; + } else if (g_str_equal(name, "SUSPEND")) { + state->suspend_seen = true; + return true; } else if (g_str_equal(name, "RESUME")) { state->resume_seen = true; return true; diff --git a/tests/qtest/migration-helpers.h b/tests/qtest/migration-helpers.h index 3d32699c45..b478549096 100644 --- a/tests/qtest/migration-helpers.h +++ b/tests/qtest/migration-helpers.h @@ -18,6 +18,8 @@ typedef struct QTestMigrationState { bool stop_seen; bool resume_seen; + bool suspend_seen; + bool suspend_me; } QTestMigrationState; bool migrate_watch_for_events(QTestState *who, const char *name, diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index 2e2ab6af3c..a3bfbf2654 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -178,7 +178,7 @@ static void bootfile_delete(void) /* * Wait for some output in the serial output file, * we get an 'A' followed by an endless string of 'B's - * but on the destination we won't have the A. + * but on the destination we won't have the A (unless we enabled suspend/resume) */ static void wait_for_serial(const char *side) { @@ -245,6 +245,13 @@ static void wait_for_resume(QTestState *who, QTestMigrationState *state) } } +static void wait_for_suspend(QTestState *who, QTestMigrationState *state) +{ + if (state->suspend_me && !state->suspend_seen) { + qtest_qmp_eventwait(who, "SUSPEND"); + } +} + /* * It's tricky to use qemu's migration event capability with qtest, * events suddenly appearing confuse the qmp()/hmp() responses. @@ -299,7 +306,7 @@ static void wait_for_migration_pass(QTestState *who) { uint64_t pass, prev_pass = 0, changes = 0; - while (changes < 2 && !src_state.stop_seen) { + while (changes < 2 && !src_state.stop_seen && !src_state.suspend_seen) { usleep(1000); pass = get_migration_pass(who); changes += (pass != prev_pass); @@ -584,6 +591,12 @@ static void migrate_wait_for_dirty_mem(QTestState *from, usleep(1000 * 10); } while (qtest_readq(to, marker_address) != MAGIC_MARKER); + + /* If suspended, src only iterates once, and watch_byte may never change */ + if (src_state.suspend_me) { + return; + } + /* * Now ensure that already transferred bytes are * dirty again from the guest workload. Note the @@ -771,6 +784,7 @@ static int test_migrate_start(QTestState **from, QTestState **to, dst_state = (QTestMigrationState) { }; src_state = (QTestMigrationState) { }; bootfile_create(tmpfs, args->suspend_me); + src_state.suspend_me = args->suspend_me; if (strcmp(arch, "i386") == 0 || strcmp(arch, "x86_64") == 0) { memory_size = "150M"; @@ -1717,6 +1731,7 @@ static void test_precopy_common(MigrateCommon *args) /* Wait for the first serial output from the source */ if (args->result == MIG_TEST_SUCCEED) { wait_for_serial("src_serial"); + wait_for_suspend(from, &src_state); } if (args->live) { @@ -1793,6 +1808,11 @@ static void test_precopy_common(MigrateCommon *args) wait_for_resume(to, &dst_state); + if (args->start.suspend_me) { + /* wakeup succeeds only if guest is suspended */ + qtest_qmp_assert_success(to, "{'execute': 'system_wakeup'}"); + } + wait_for_serial("dest_serial"); } @@ -1879,6 +1899,34 @@ static void test_precopy_unix_plain(void) test_precopy_common(&args); } +static void test_precopy_unix_suspend_live(void) +{ + g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); + MigrateCommon args = { + .listen_uri = uri, + .connect_uri = uri, + /* + * despite being live, the test is fast because the src + * suspends immediately. + */ + .live = true, + .start.suspend_me = true, + }; + + test_precopy_common(&args); +} + +static void test_precopy_unix_suspend_notlive(void) +{ + g_autofree char *uri = g_strdup_printf("unix:%s/migsocket", tmpfs); + MigrateCommon args = { + .listen_uri = uri, + .connect_uri = uri, + .start.suspend_me = true, + }; + + test_precopy_common(&args); +} static void test_precopy_unix_dirty_ring(void) { @@ -3279,7 +3327,7 @@ static bool kvm_dirty_ring_supported(void) int main(int argc, char **argv) { bool has_kvm, has_tcg; - bool has_uffd; + bool has_uffd, is_x86; const char *arch; g_autoptr(GError) err = NULL; const char *qemu_src = getenv(QEMU_ENV_SRC); @@ -3309,6 +3357,7 @@ int main(int argc, char **argv) has_uffd = ufd_version_check(); arch = qtest_get_arch(); + is_x86 = !strcmp(arch, "i386") || !strcmp(arch, "x86_64"); /* * On ppc64, the test only works with kvm-hv, but not with kvm-pr and TCG @@ -3339,6 +3388,13 @@ int main(int argc, char **argv) module_call_init(MODULE_INIT_QOM); + if (is_x86) { + qtest_add_func("/migration/precopy/unix/suspend/live", + test_precopy_unix_suspend_live); + qtest_add_func("/migration/precopy/unix/suspend/notlive", + test_precopy_unix_suspend_notlive); + } + if (has_uffd) { qtest_add_func("/migration/postcopy/plain", test_postcopy); qtest_add_func("/migration/postcopy/recovery/plain", From 2b58a8b9631f25f0ddc19fbed0fd5bfff3bb5f41 Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Wed, 3 Jan 2024 12:05:41 -0800 Subject: [PATCH 14/26] tests/qtest: postcopy migration with suspend Add a test case to verify that the suspended state is handled correctly by live migration postcopy. The test suspends the src, migrates, then wakes the dest. Signed-off-by: Steve Sistare Reviewed-by: Peter Xu Link: https://lore.kernel.org/r/1704312341-66640-13-git-send-email-steven.sistare@oracle.com Signed-off-by: Peter Xu --- tests/qtest/migration-test.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/tests/qtest/migration-test.c b/tests/qtest/migration-test.c index a3bfbf2654..136e5df06c 100644 --- a/tests/qtest/migration-test.c +++ b/tests/qtest/migration-test.c @@ -1347,6 +1347,7 @@ static int migrate_postcopy_prepare(QTestState **from_ptr, /* Wait for the first serial output from the source */ wait_for_serial("src_serial"); + wait_for_suspend(from, &src_state); g_autofree char *uri = migrate_get_socket_address(to, "socket-address"); migrate_qmp(from, uri, "{}"); @@ -1364,6 +1365,11 @@ static void migrate_postcopy_complete(QTestState *from, QTestState *to, { wait_for_migration_complete(from); + if (args->start.suspend_me) { + /* wakeup succeeds only if guest is suspended */ + qtest_qmp_assert_success(to, "{'execute': 'system_wakeup'}"); + } + /* Make sure we get at least one "B" on destination */ wait_for_serial("dest_serial"); @@ -1397,6 +1403,15 @@ static void test_postcopy(void) test_postcopy_common(&args); } +static void test_postcopy_suspend(void) +{ + MigrateCommon args = { + .start.suspend_me = true, + }; + + test_postcopy_common(&args); +} + static void test_postcopy_compress(void) { MigrateCommon args = { @@ -3412,7 +3427,10 @@ int main(int argc, char **argv) qtest_add_func("/migration/postcopy/recovery/double-failures", test_postcopy_recovery_double_fail); #endif /* _WIN32 */ - + if (is_x86) { + qtest_add_func("/migration/postcopy/suspend", + test_postcopy_suspend); + } } qtest_add_func("/migration/bad_dest", test_baddest); From 17b9483baae595aa1b9d25adceb240bf74fdb1ee Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Sun, 31 Dec 2023 11:30:06 +0200 Subject: [PATCH 15/26] migration: Remove migrate_max_downtime() declaration migrate_max_downtime() has been removed long ago, but its declaration was mistakenly left. Remove it. Signed-off-by: Avihai Horon Reviewed-by: Fabiano Rosas Link: https://lore.kernel.org/r/20231231093016.14204-2-avihaih@nvidia.com Signed-off-by: Peter Xu --- migration/migration.h | 2 -- 1 file changed, 2 deletions(-) diff --git a/migration/migration.h b/migration/migration.h index cf2c9c88e0..b3c9288c38 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -480,8 +480,6 @@ void migration_incoming_process(void); bool migration_has_all_channels(void); -uint64_t migrate_max_downtime(void); - void migrate_set_error(MigrationState *s, const Error *error); bool migrate_has_error(MigrationState *s); From 80caba6955c422ac7f848999cb961846aa2781d1 Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Sun, 31 Dec 2023 11:30:07 +0200 Subject: [PATCH 16/26] migration: Remove nulling of hostname in migrate_init() MigrationState->hostname is set to NULL in migrate_init(). This is redundant because it is already freed and set to NULL in migrade_fd_cleanup(). Remove it. Signed-off-by: Avihai Horon Reviewed-by: Fabiano Rosas Link: https://lore.kernel.org/r/20231231093016.14204-3-avihaih@nvidia.com Signed-off-by: Peter Xu --- migration/migration.c | 1 - 1 file changed, 1 deletion(-) diff --git a/migration/migration.c b/migration/migration.c index 2cc7e2a56c..15dc8aa21c 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1588,7 +1588,6 @@ int migrate_init(MigrationState *s, Error **errp) s->migration_thread_running = false; error_free(s->error); s->error = NULL; - s->hostname = NULL; s->vmdesc = NULL; migrate_set_state(&s->state, MIGRATION_STATUS_NONE, MIGRATION_STATUS_SETUP); From c606f3baa2de0ee40a3cc69142a23fff130debc3 Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Sun, 31 Dec 2023 11:30:08 +0200 Subject: [PATCH 17/26] migration: Refactor migration_incoming_setup() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 6720c2b32725 ("migration: check magic value for deciding the mapping of channels") extracted the only code that could fail in migration_incoming_setup(). Now migration_incoming_setup() can't fail, so refactor it to return void and remove errp parameter. Signed-off-by: Avihai Horon Reviewed-by: Fabiano Rosas Reviewed-by: Philippe Mathieu-Daudé Link: https://lore.kernel.org/r/20231231093016.14204-4-avihaih@nvidia.com Signed-off-by: Peter Xu --- migration/migration.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 15dc8aa21c..e7d342ab59 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -724,11 +724,8 @@ fail: /** * migration_incoming_setup: Setup incoming migration * @f: file for main migration channel - * @errp: where to put errors - * - * Returns: %true on success, %false on error. */ -static bool migration_incoming_setup(QEMUFile *f, Error **errp) +static void migration_incoming_setup(QEMUFile *f) { MigrationIncomingState *mis = migration_incoming_get_current(); @@ -736,7 +733,6 @@ static bool migration_incoming_setup(QEMUFile *f, Error **errp) mis->from_src_file = f; } qemu_file_set_blocking(f, false); - return true; } void migration_incoming_process(void) @@ -780,9 +776,7 @@ static bool postcopy_try_recover(void) void migration_fd_process_incoming(QEMUFile *f, Error **errp) { - if (!migration_incoming_setup(f, errp)) { - return; - } + migration_incoming_setup(f); if (postcopy_try_recover()) { return; } @@ -855,10 +849,7 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) if (default_channel) { f = qemu_file_new_input(ioc); - - if (!migration_incoming_setup(f, errp)) { - return; - } + migration_incoming_setup(f); } else { /* Multiple connections */ assert(migration_needs_multiple_sockets()); From b0cf3bfc69752969314f9e88d68f75f40fcdf274 Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Sun, 31 Dec 2023 11:30:09 +0200 Subject: [PATCH 18/26] migration: Remove errp parameter in migration_fd_process_incoming() Errp parameter in migration_fd_process_incoming() is unused. Remove it. Signed-off-by: Avihai Horon Reviewed-by: Fabiano Rosas Link: https://lore.kernel.org/r/20231231093016.14204-5-avihaih@nvidia.com Signed-off-by: Peter Xu --- migration/migration.c | 2 +- migration/migration.h | 2 +- migration/rdma.c | 6 +----- 3 files changed, 3 insertions(+), 7 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index e7d342ab59..8bb1a8134e 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -774,7 +774,7 @@ static bool postcopy_try_recover(void) return false; } -void migration_fd_process_incoming(QEMUFile *f, Error **errp) +void migration_fd_process_incoming(QEMUFile *f) { migration_incoming_setup(f); if (postcopy_try_recover()) { diff --git a/migration/migration.h b/migration/migration.h index b3c9288c38..17972dac34 100644 --- a/migration/migration.h +++ b/migration/migration.h @@ -474,7 +474,7 @@ struct MigrationState { void migrate_set_state(int *state, int old_state, int new_state); -void migration_fd_process_incoming(QEMUFile *f, Error **errp); +void migration_fd_process_incoming(QEMUFile *f); void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp); void migration_incoming_process(void); diff --git a/migration/rdma.c b/migration/rdma.c index 04debab5d9..94c0f871f0 100644 --- a/migration/rdma.c +++ b/migration/rdma.c @@ -4035,7 +4035,6 @@ static void rdma_accept_incoming_migration(void *opaque) { RDMAContext *rdma = opaque; QEMUFile *f; - Error *local_err = NULL; trace_qemu_rdma_accept_incoming_migration(); if (qemu_rdma_accept(rdma) < 0) { @@ -4057,10 +4056,7 @@ static void rdma_accept_incoming_migration(void *opaque) } rdma->migration_started_on_destination = 1; - migration_fd_process_incoming(f, &local_err); - if (local_err) { - error_reportf_err(local_err, "RDMA ERROR:"); - } + migration_fd_process_incoming(f); } void rdma_start_incoming_migration(InetSocketAddress *host_port, From c77b40859a5201f01b44dc475258405e289c431f Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Sun, 31 Dec 2023 11:30:10 +0200 Subject: [PATCH 19/26] migration/multifd: Fix error message in multifd_recv_initial_packet() In multifd_recv_initial_packet(), if MultiFDInit_t->id is greater than the configured number of multifd channels, an irrelevant error message about multifd version is printed. Change the error message to a relevant one about the channel id. Signed-off-by: Avihai Horon Reviewed-by: Fabiano Rosas Link: https://lore.kernel.org/r/20231231093016.14204-6-avihaih@nvidia.com Signed-off-by: Peter Xu --- migration/multifd.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/migration/multifd.c b/migration/multifd.c index 409460684f..a6204fc72f 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -228,8 +228,8 @@ static int multifd_recv_initial_packet(QIOChannel *c, Error **errp) } if (msg.id > migrate_multifd_channels()) { - error_setg(errp, "multifd: received channel version %u " - "expected %u", msg.version, MULTIFD_VERSION); + error_setg(errp, "multifd: received channel id %u is greater than " + "number of channels %u", msg.id, migrate_multifd_channels()); return -1; } From a4395f5d3c06472ed70d9ef9f79878f95575be9e Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Sun, 31 Dec 2023 11:30:11 +0200 Subject: [PATCH 20/26] migration/multifd: Simplify multifd_channel_connect() if else statement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The else branch in multifd_channel_connect() is redundant because when the if branch is taken the function returns. Simplify the code by removing the else branch. Signed-off-by: Avihai Horon Reviewed-by: Philippe Mathieu-Daudé Link: https://lore.kernel.org/r/20231231093016.14204-7-avihaih@nvidia.com Signed-off-by: Peter Xu --- migration/multifd.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/migration/multifd.c b/migration/multifd.c index a6204fc72f..55d5fd55f8 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -847,14 +847,13 @@ static bool multifd_channel_connect(MultiFDSendParams *p, * so we mustn't call multifd_send_thread until then */ return multifd_tls_channel_connect(p, ioc, errp); - - } else { - migration_ioc_register_yank(ioc); - p->registered_yank = true; - p->c = ioc; - qemu_thread_create(&p->thread, p->name, multifd_send_thread, p, - QEMU_THREAD_JOINABLE); } + + migration_ioc_register_yank(ioc); + p->registered_yank = true; + p->c = ioc; + qemu_thread_create(&p->thread, p->name, multifd_send_thread, p, + QEMU_THREAD_JOINABLE); return true; } From 6ae208ce9656114e428b1a75ac62a6761ed3216c Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Sun, 31 Dec 2023 11:30:12 +0200 Subject: [PATCH 21/26] migration/multifd: Fix leaking of Error in TLS error flow If there is an error in multifd TLS handshake task, multifd_tls_outgoing_handshake() retrieves the error with qio_task_propagate_error() but never frees it. Fix it by freeing the obtained Error. In addition, the error is not reported at all, so report it with migrate_set_error(). Fixes: 29647140157a ("migration/tls: add support for multifd tls-handshake") Signed-off-by: Avihai Horon Reviewed-by: Fabiano Rosas Link: https://lore.kernel.org/r/20231231093016.14204-8-avihaih@nvidia.com Signed-off-by: Peter Xu --- migration/multifd.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/migration/multifd.c b/migration/multifd.c index 55d5fd55f8..9ac24866ad 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -787,6 +787,7 @@ static void multifd_tls_outgoing_handshake(QIOTask *task, trace_multifd_tls_outgoing_handshake_error(ioc, error_get_pretty(err)); + migrate_set_error(migrate_get_current(), err); /* * Error happen, mark multifd_send_thread status as 'quit' although it * is not created, and then tell who pay attention to me. @@ -794,6 +795,7 @@ static void multifd_tls_outgoing_handshake(QIOTask *task, p->quit = true; qemu_sem_post(&multifd_send_state->channels_ready); qemu_sem_post(&p->sem_sync); + error_free(err); } static void *multifd_tls_handshake_thread(void *opaque) From 1d3886f837d8e972366a8b58ba8afb0e5efbeed7 Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Sun, 31 Dec 2023 11:30:13 +0200 Subject: [PATCH 22/26] migration/multifd: Remove error_setg() in migration_ioc_process_incoming() If multifd_load_setup() fails in migration_ioc_process_incoming(), error_setg() is called with errp. This will lead to an assert because in that case errp already contains an error. Fix it by removing the redundant error_setg(). Fixes: 6720c2b32725 ("migration: check magic value for deciding the mapping of channels") Signed-off-by: Avihai Horon Reviewed-by: Fabiano Rosas Link: https://lore.kernel.org/r/20231231093016.14204-9-avihaih@nvidia.com Signed-off-by: Peter Xu --- migration/migration.c | 1 - 1 file changed, 1 deletion(-) diff --git a/migration/migration.c b/migration/migration.c index 8bb1a8134e..9524c22a50 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -843,7 +843,6 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) } if (multifd_load_setup(errp) != 0) { - error_setg(errp, "Failed to setup multifd channels"); return; } From 4f8cf323e80c17f7d4b5604f1699591326df6262 Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Sun, 31 Dec 2023 11:30:14 +0200 Subject: [PATCH 23/26] migration: Fix migration_channel_read_peek() error path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit migration_channel_read_peek() calls qio_channel_readv_full() and handles both cases of return value == 0 and return value < 0 the same way, by calling error_setg() with errp. However, if return value < 0, errp is already set, so calling error_setg() with errp will lead to an assert. Fix it by handling these cases separately, calling error_setg() with errp only in return value == 0 case. Fixes: 6720c2b32725 ("migration: check magic value for deciding the mapping of channels") Signed-off-by: Avihai Horon Reviewed-by: Fabiano Rosas Reviewed-by: Philippe Mathieu-Daudé Link: https://lore.kernel.org/r/20231231093016.14204-10-avihaih@nvidia.com Signed-off-by: Peter Xu --- migration/channel.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/migration/channel.c b/migration/channel.c index ca3319a309..f9de064f3b 100644 --- a/migration/channel.c +++ b/migration/channel.c @@ -117,9 +117,12 @@ int migration_channel_read_peek(QIOChannel *ioc, len = qio_channel_readv_full(ioc, &iov, 1, NULL, NULL, QIO_CHANNEL_READ_FLAG_MSG_PEEK, errp); - if (len <= 0 && len != QIO_CHANNEL_ERR_BLOCK) { - error_setg(errp, - "Failed to peek at channel"); + if (len < 0 && len != QIO_CHANNEL_ERR_BLOCK) { + return -1; + } + + if (len == 0) { + error_setg(errp, "Failed to peek at channel"); return -1; } From b6f4c0c788d5a037197a0ed409e131ed2cec274a Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Sun, 31 Dec 2023 11:30:15 +0200 Subject: [PATCH 24/26] migration: Remove unnecessary usage of local Error According to Error API, usage of ERRP_GUARD() or a local Error instead of errp is needed if errp is passed to void functions, where it is later dereferenced to see if an error occurred. There are several places in migration.c that use local Error although it is not needed. Change these places to use errp directly. Signed-off-by: Avihai Horon Reviewed-by: Fabiano Rosas Link: https://lore.kernel.org/r/20231231093016.14204-11-avihaih@nvidia.com Signed-off-by: Peter Xu --- migration/migration.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index 9524c22a50..454cd4ec1f 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -830,10 +830,9 @@ void migration_ioc_process_incoming(QIOChannel *ioc, Error **errp) * issue is not possible. */ ret = migration_channel_read_peek(ioc, (void *)&channel_magic, - sizeof(channel_magic), &local_err); + sizeof(channel_magic), errp); if (ret != 0) { - error_propagate(errp, local_err); return; } @@ -1825,8 +1824,6 @@ bool migration_is_blocked(Error **errp) static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, bool resume, Error **errp) { - Error *local_err = NULL; - if (blk_inc) { warn_report("parameter 'inc' is deprecated;" " use blockdev-mirror with NBD instead"); @@ -1896,8 +1893,7 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, "current migration capabilities"); return false; } - if (!migrate_cap_set(MIGRATION_CAPABILITY_BLOCK, true, &local_err)) { - error_propagate(errp, local_err); + if (!migrate_cap_set(MIGRATION_CAPABILITY_BLOCK, true, errp)) { return false; } s->must_remove_block_options = true; From 3fc58efa938338a82e4d5c0c031e7e9c98e9544f Mon Sep 17 00:00:00 2001 From: Avihai Horon Date: Sun, 31 Dec 2023 11:30:16 +0200 Subject: [PATCH 25/26] migration/multifd: Remove unnecessary usage of local Error MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to Error API, usage of ERRP_GUARD() or a local Error instead of errp is needed if errp is passed to void functions, where it is later dereferenced to see if an error occurred. There are several places in multifd.c that use local Error although it is not needed. Change these places to use errp directly. Signed-off-by: Avihai Horon Reviewed-by: Philippe Mathieu-Daudé Link: https://lore.kernel.org/r/20231231093016.14204-12-avihaih@nvidia.com Signed-off-by: Peter Xu --- migration/multifd.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/migration/multifd.c b/migration/multifd.c index 9ac24866ad..9f353aecfa 100644 --- a/migration/multifd.c +++ b/migration/multifd.c @@ -951,12 +951,10 @@ int multifd_save_setup(Error **errp) for (i = 0; i < thread_count; i++) { MultiFDSendParams *p = &multifd_send_state->params[i]; - Error *local_err = NULL; int ret; - ret = multifd_send_state->ops->send_setup(p, &local_err); + ret = multifd_send_state->ops->send_setup(p, errp); if (ret) { - error_propagate(errp, local_err); return ret; } } @@ -1195,12 +1193,10 @@ int multifd_load_setup(Error **errp) for (i = 0; i < thread_count; i++) { MultiFDRecvParams *p = &multifd_recv_state->params[i]; - Error *local_err = NULL; int ret; - ret = multifd_recv_state->ops->recv_setup(p, &local_err); + ret = multifd_recv_state->ops->recv_setup(p, errp); if (ret) { - error_propagate(errp, local_err); return ret; } } From b12635ff08ab2e5a6a955c6866ef4525fb3deb5d Mon Sep 17 00:00:00 2001 From: Steve Sistare Date: Mon, 13 Nov 2023 12:23:45 -0800 Subject: [PATCH 26/26] migration: fix coverity migrate_mode finding Coverity diagnoses a possible out-of-range array index here ... static GSList *migration_blockers[MIG_MODE__MAX]; fill_source_migration_info() { GSList *cur_blocker = migration_blockers[migrate_mode()]; ... because it does not know that MIG_MODE__MAX will never be returned as a migration mode. To fix, assert so in migrate_mode(). Fixes: fa3673e497a1 ("migration: per-mode blockers") Reported-by: Peter Maydell Suggested-by: Peter Maydell Signed-off-by: Steve Sistare Reviewed-by: Fabiano Rosas Link: https://lore.kernel.org/r/1699907025-215450-1-git-send-email-steven.sistare@oracle.com Signed-off-by: Peter Xu --- migration/options.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/migration/options.c b/migration/options.c index 8d8ec73ad9..3e3e0b93b4 100644 --- a/migration/options.c +++ b/migration/options.c @@ -833,8 +833,10 @@ uint64_t migrate_max_postcopy_bandwidth(void) MigMode migrate_mode(void) { MigrationState *s = migrate_get_current(); + MigMode mode = s->parameters.mode; - return s->parameters.mode; + assert(mode >= 0 && mode < MIG_MODE__MAX); + return mode; } int migrate_multifd_channels(void)