From 22b04245f0d5237b0e4f7b10fa05577eff6522ea Mon Sep 17 00:00:00 2001 From: Fabiano Rosas Date: Mon, 26 Feb 2024 17:31:21 -0300 Subject: [PATCH] migration: Join the return path thread before releasing to_dst_file MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The return path thread might hang at a blocking system call. Before joining the thread we might need to issue a shutdown() on the socket file descriptor to release it. To determine whether the shutdown() is necessary we look at the QEMUFile error. Make sure we only clean up the QEMUFile after the return path has been waited for. This fixes a hang when qemu_savevm_state_setup() produced an error that was detected by migration_detect_error(). That skips migration_completion() so close_return_path_on_source() would get stuck waiting for the RP thread to terminate. Reported-by: Cédric Le Goater Tested-by: Cédric Le Goater Signed-off-by: Fabiano Rosas Link: https://lore.kernel.org/r/20240226203122.22894-2-farosas@suse.de Signed-off-by: Peter Xu --- migration/migration.c | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index ccb13fa94a..7ba2b60e46 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1342,6 +1342,8 @@ static void migrate_fd_cleanup(MigrationState *s) qemu_savevm_state_cleanup(); + close_return_path_on_source(s); + if (s->to_dst_file) { QEMUFile *tmp; @@ -1366,12 +1368,6 @@ static void migrate_fd_cleanup(MigrationState *s) qemu_fclose(tmp); } - /* - * We already cleaned up to_dst_file, so errors from the return - * path might be due to that, ignore them. - */ - close_return_path_on_source(s); - assert(!migration_is_active(s)); if (s->state == MIGRATION_STATUS_CANCELLING) { @@ -2914,6 +2910,13 @@ static MigThrError postcopy_pause(MigrationState *s) while (true) { QEMUFile *file; + /* + * We're already pausing, so ignore any errors on the return + * path and just wait for the thread to finish. It will be + * re-created when we resume. + */ + close_return_path_on_source(s); + /* * Current channel is possibly broken. Release it. Note that this is * guaranteed even without lock because to_dst_file should only be @@ -2933,13 +2936,6 @@ static MigThrError postcopy_pause(MigrationState *s) qemu_file_shutdown(file); qemu_fclose(file); - /* - * We're already pausing, so ignore any errors on the return - * path and just wait for the thread to finish. It will be - * re-created when we resume. - */ - close_return_path_on_source(s); - migrate_set_state(&s->state, s->state, MIGRATION_STATUS_POSTCOPY_PAUSED);