From a8664ba5101446f4f2c24b24ed9e10335bbbd46b Mon Sep 17 00:00:00 2001 From: zhanghailiang Date: Tue, 17 Jan 2017 20:57:44 +0800 Subject: [PATCH] COLO: Don't process failover request while loading VM's state We should not do failover work while the main thread is loading VM's state. Otherwise the consistent of VM's memory and device state will be broken. We will restart the loading process after jump over the stage, The new failover status 'RELAUNCH' will help to record if we need to restart the process. Cc: Eric Blake Signed-off-by: zhanghailiang Signed-off-by: Li Zhijian Reviewed-by: Dr. David Alan Gilbert Message-Id: <1484657864-21708-4-git-send-email-zhang.zhanghailiang@huawei.com> Signed-off-by: Dr. David Alan Gilbert Added a missing '(Since 2.9)' --- migration/colo.c | 26 ++++++++++++++++++++++++++ qapi-schema.json | 4 +++- 2 files changed, 29 insertions(+), 1 deletion(-) diff --git a/migration/colo.c b/migration/colo.c index 3222812d96..712308ed5e 100644 --- a/migration/colo.c +++ b/migration/colo.c @@ -20,6 +20,8 @@ #include "qapi/error.h" #include "migration/failover.h" +static bool vmstate_loading; + #define COLO_BUFFER_BASE_SIZE (4 * 1024 * 1024) bool colo_supported(void) @@ -51,6 +53,19 @@ static void secondary_vm_do_failover(void) int old_state; MigrationIncomingState *mis = migration_incoming_get_current(); + /* Can not do failover during the process of VM's loading VMstate, Or + * it will break the secondary VM. + */ + if (vmstate_loading) { + old_state = failover_set_state(FAILOVER_STATUS_ACTIVE, + FAILOVER_STATUS_RELAUNCH); + if (old_state != FAILOVER_STATUS_ACTIVE) { + error_report("Unknown error while do failover for secondary VM," + "old_state: %s", FailoverStatus_lookup[old_state]); + } + return; + } + migrate_set_state(&mis->state, MIGRATION_STATUS_COLO, MIGRATION_STATUS_COMPLETED); @@ -548,13 +563,23 @@ void *colo_process_incoming_thread(void *opaque) qemu_mutex_lock_iothread(); qemu_system_reset(VMRESET_SILENT); + vmstate_loading = true; if (qemu_loadvm_state(fb) < 0) { error_report("COLO: loadvm failed"); qemu_mutex_unlock_iothread(); goto out; } + + vmstate_loading = false; qemu_mutex_unlock_iothread(); + if (failover_get_state() == FAILOVER_STATUS_RELAUNCH) { + failover_set_state(FAILOVER_STATUS_RELAUNCH, + FAILOVER_STATUS_NONE); + failover_request_active(NULL); + goto out; + } + colo_send_message(mis->to_src_file, COLO_MESSAGE_VMSTATE_LOADED, &local_err); if (local_err) { @@ -563,6 +588,7 @@ void *colo_process_incoming_thread(void *opaque) } out: + vmstate_loading = false; /* Throw the unreported error message after exited from loop */ if (local_err) { error_report_err(local_err); diff --git a/qapi-schema.json b/qapi-schema.json index 93305412dd..5edb08d621 100644 --- a/qapi-schema.json +++ b/qapi-schema.json @@ -1193,10 +1193,12 @@ # # @completed: finish the process of failover # +# @relaunch: restart the failover process, from 'none' -> 'completed' (Since 2.9) +# # Since: 2.8 ## { 'enum': 'FailoverStatus', - 'data': [ 'none', 'require', 'active', 'completed'] } + 'data': [ 'none', 'require', 'active', 'completed', 'relaunch' ] } ## # @x-colo-lost-heartbeat: