From 4fcefd44a074008e490ff54c3c28a08b8dbfb14b Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 20 Jul 2018 11:47:13 +0800 Subject: [PATCH 1/7] migration: fix potential overflow in multifd send I would guess it won't happen normally, but this should ease Coverity. >>> CID 1394385: Integer handling issues (OVERFLOW_BEFORE_WIDEN) >>> Potentially overflowing expression "pages->used * 8192U" with type "unsigned int" (32 bits, unsigned) is evaluated using 32-bit arithmetic, and then used in a context that expects an expression of type "uint64_t" (64 bits, unsigned). 854 transferred = pages->used * TARGET_PAGE_SIZE + p->packet_len; Fixes: CID 1394385 CC: Juan Quintela Signed-off-by: Peter Xu Message-Id: <20180720034713.11711-1-peterx@redhat.com> Reviewed-by: Juan Quintela Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/migration/ram.c b/migration/ram.c index 52dd678092..fdd108475c 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -851,7 +851,7 @@ static void multifd_send_pages(void) p->pages->block = NULL; multifd_send_state->pages = p->pages; p->pages = pages; - transferred = pages->used * TARGET_PAGE_SIZE + p->packet_len; + transferred = ((uint64_t) pages->used) * TARGET_PAGE_SIZE + p->packet_len; ram_counters.multifd_bytes += transferred; ram_counters.transferred += transferred;; qemu_mutex_unlock(&p->mutex); From 57225e5f32bb051c9eb03758647963aeb72cebbf Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Thu, 19 Jul 2018 10:22:57 +0100 Subject: [PATCH 2/7] migrate: Fix cancelling state warning We've been getting the warning: migration_iteration_finish: Unknown ending state 2 on a cancel. I think that's originally due to 39b9e17905c; although I've only seen the warning, I think that in some cases that we could find the VM stays paused after a cancel where it should restart. Signed-off-by: Dr. David Alan Gilbert Message-Id: <20180719092257.12703-1-dgilbert@redhat.com> Reviewed-by: Peter Xu Reviewed-by: Juan Quintela Signed-off-by: Dr. David Alan Gilbert --- migration/migration.c | 1 + 1 file changed, 1 insertion(+) diff --git a/migration/migration.c b/migration/migration.c index 8d56d56930..db6bde7453 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -2877,6 +2877,7 @@ static void migration_iteration_finish(MigrationState *s) /* Fallthrough */ case MIGRATION_STATUS_FAILED: case MIGRATION_STATUS_CANCELLED: + case MIGRATION_STATUS_CANCELLING: if (s->vm_was_running) { vm_start(); } else { From 67fa1f5700248fc66d5b1526c268737e29892b86 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Tue, 24 Jul 2018 11:22:15 +0100 Subject: [PATCH 3/7] audio/hda: Fix migration MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fix outgoing migration which was crashing in vmstate_hda_audio_stream_buf_needed, I think the problem is that we have room for upto 4 streams in the array but only use 2, when we come to try and save the state of the unused streams we hit st->state == NULL. Fixes: 280c1e1cdb24d80ecdfc Signed-off-by: Dr. David Alan Gilbert Message-Id: <20180724102215.31866-1-dgilbert@redhat.com> Reviewed-by: Daniel P. Berrangé Reviewed-by: Juan Quintela Signed-off-by: Dr. David Alan Gilbert --- hw/audio/hda-codec.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/audio/hda-codec.c b/hw/audio/hda-codec.c index 2b58c3505b..617a1c1016 100644 --- a/hw/audio/hda-codec.c +++ b/hw/audio/hda-codec.c @@ -786,7 +786,7 @@ static void hda_audio_reset(DeviceState *dev) static bool vmstate_hda_audio_stream_buf_needed(void *opaque) { HDAAudioStream *st = opaque; - return st->state->use_timer; + return st->state && st->state->use_timer; } static const VMStateDescription vmstate_hda_audio_stream_buf = { From 814bb08f177af8dc67e155f0ad622fb6366c3b85 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 23 Jul 2018 20:33:02 +0800 Subject: [PATCH 4/7] migration: update recv bitmap only on dest vm We shouldn't update the received bitmap if we're the source VM. This fixes a breakage when release-ram is enabled on postcopy. Signed-off-by: Peter Xu Message-Id: <20180723123305.24792-2-peterx@redhat.com> Reviewed-by: Juan Quintela Signed-off-by: Dr. David Alan Gilbert --- migration/ram.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/migration/ram.c b/migration/ram.c index fdd108475c..24dea2730c 100644 --- a/migration/ram.c +++ b/migration/ram.c @@ -2827,8 +2827,15 @@ int ram_discard_range(const char *rbname, uint64_t start, size_t length) goto err; } - bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(), - length >> qemu_target_page_bits()); + /* + * On source VM, we don't need to update the received bitmap since + * we don't even have one. + */ + if (rb->receivedmap) { + bitmap_clear(rb->receivedmap, start >> qemu_target_page_bits(), + length >> qemu_target_page_bits()); + } + ret = ram_block_discard_range(rb, start, length); err: From 97ca211c6216ccfcb64c46f739a0ce36042d9ea8 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 23 Jul 2018 20:33:03 +0800 Subject: [PATCH 5/7] migration: disallow recovery for release-ram Postcopy recovery won't work well with release-ram capability since release-ram will drop the page buffer as long as the page is put into the send buffer. So if there is a network failure happened, any page buffers that have not yet reached the destination VM but have already been sent from the source VM will be lost forever. Let's refuse the client from resuming such a postcopy migration. Luckily release-ram was designed to only be used when src and destination VMs are on the same host, so it should be fine. Signed-off-by: Peter Xu Message-Id: <20180723123305.24792-3-peterx@redhat.com> Reviewed-by: Juan Quintela Signed-off-by: Dr. David Alan Gilbert --- migration/migration.c | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) diff --git a/migration/migration.c b/migration/migration.c index db6bde7453..bfc4d09aae 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -1629,6 +1629,25 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc, "paused migration"); return false; } + + /* + * Postcopy recovery won't work well with release-ram + * capability since release-ram will drop the page buffer as + * long as the page is put into the send buffer. So if there + * is a network failure happened, any page buffers that have + * not yet reached the destination VM but have already been + * sent from the source VM will be lost forever. Let's refuse + * the client from resuming such a postcopy migration. + * Luckily release-ram was designed to only be used when src + * and destination VMs are on the same host, so it should be + * fine. + */ + if (migrate_release_ram()) { + error_setg(errp, "Postcopy recovery cannot work " + "when release-ram capability is set"); + return false; + } + /* This is a resume, skip init status */ return true; } From 829db8b497d855d39f85df4de9472c0b74ec5c3f Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 23 Jul 2018 20:33:04 +0800 Subject: [PATCH 6/7] tests: only update last_byte when at the edge The only possible change of last_byte is when it reaches the edge. Setting it every time might let last_byte contain an invalid data when memory corruption is detected, then the check of the next byte will be incorrect. For example, a single page corruption at address 0x14ad000 will also lead to a "fake" corruption at 0x14ae000: Memory content inconsistency at 14ad000 first_byte = 44 last_byte = 44 current = ef hit_edge = 0 Memory content inconsistency at 14ae000 first_byte = 44 last_byte = ef current = 44 hit_edge = 0 After the patch, it'll only report the corrputed page: Memory content inconsistency at 14ad000 first_byte = 44 last_byte = 44 current = ef hit_edge = 0 Signed-off-by: Peter Xu Message-Id: <20180723123305.24792-4-peterx@redhat.com> Reviewed-by: Juan Quintela Signed-off-by: Dr. David Alan Gilbert --- tests/migration-test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/migration-test.c b/tests/migration-test.c index 086f727b34..e079e0bdb6 100644 --- a/tests/migration-test.c +++ b/tests/migration-test.c @@ -300,6 +300,7 @@ static void check_guests_ram(QTestState *who) * to us yet. */ hit_edge = true; + last_byte = b; } else { fprintf(stderr, "Memory content inconsistency at %x" " first_byte = %x last_byte = %x current = %x" @@ -308,7 +309,6 @@ static void check_guests_ram(QTestState *who) bad = true; } } - last_byte = b; } g_assert_false(bad); } From 4b3fb65db973b51346e3456987ba80b15c1fc75c Mon Sep 17 00:00:00 2001 From: Lidong Chen Date: Tue, 24 Jul 2018 20:16:25 +0800 Subject: [PATCH 7/7] migration: fix duplicate initialization for expected_downtime and cleanup_bh migrate_fd_connect duplicate initialize expected_downtime and cleanup_bh. Signed-off-by: Lidong Chen Message-Id: <1532434585-14732-2-git-send-email-lidongchen@tencent.com> Reviewed-by: Juan Quintela Signed-off-by: Dr. David Alan Gilbert --- migration/migration.c | 2 -- 1 file changed, 2 deletions(-) diff --git a/migration/migration.c b/migration/migration.c index bfc4d09aae..b7d9854bda 100644 --- a/migration/migration.c +++ b/migration/migration.c @@ -3052,8 +3052,6 @@ void migrate_fd_connect(MigrationState *s, Error *error_in) } else { /* This is a fresh new migration */ rate_limit = s->parameters.max_bandwidth / XFER_LIMIT_RATIO; - s->expected_downtime = s->parameters.downtime_limit; - s->cleanup_bh = qemu_bh_new(migrate_fd_cleanup, s); /* Notify before starting migration thread */ notifier_list_notify(&migration_state_notifiers, s);