migration: prevent migration when VM has poisoned memory

A memory page poisoned from the hypervisor level is no longer readable.
The migration of a VM will crash Qemu when it tries to read the
memory address space and stumbles on the poisoned page with a similar
stack trace:

Program terminated with signal SIGBUS, Bus error.
#0  _mm256_loadu_si256
#1  buffer_zero_avx2
#2  select_accel_fn
#3  buffer_is_zero
#4  save_zero_page
#5  ram_save_target_page_legacy
#6  ram_save_host_page
#7  ram_find_and_save_block
#8  ram_save_iterate
#9  qemu_savevm_state_iterate
#10 migration_iteration_run
#11 migration_thread
#12 qemu_thread_start

To avoid this VM crash during the migration, prevent the migration
when a known hardware poison exists on the VM.

Signed-off-by: William Roche <william.roche@oracle.com>
Link: https://lore.kernel.org/r/20240130190640.139364-2-william.roche@oracle.com
Signed-off-by: Peter Xu <peterx@redhat.com>
This commit is contained in:
William Roche 2024-01-30 19:06:40 +00:00 committed by Peter Xu
parent 39a6e4f87e
commit 06152b89db
4 changed files with 28 additions and 0 deletions

View File

@ -1119,6 +1119,11 @@ int kvm_vm_check_extension(KVMState *s, unsigned int extension)
return ret; return ret;
} }
/*
* We track the poisoned pages to be able to:
* - replace them on VM reset
* - block a migration for a VM with a poisoned page
*/
typedef struct HWPoisonPage { typedef struct HWPoisonPage {
ram_addr_t ram_addr; ram_addr_t ram_addr;
QLIST_ENTRY(HWPoisonPage) list; QLIST_ENTRY(HWPoisonPage) list;
@ -1152,6 +1157,11 @@ void kvm_hwpoison_page_add(ram_addr_t ram_addr)
QLIST_INSERT_HEAD(&hwpoison_page_list, page, list); QLIST_INSERT_HEAD(&hwpoison_page_list, page, list);
} }
bool kvm_hwpoisoned_mem(void)
{
return !QLIST_EMPTY(&hwpoison_page_list);
}
static uint32_t adjust_ioeventfd_endianness(uint32_t val, uint32_t size) static uint32_t adjust_ioeventfd_endianness(uint32_t val, uint32_t size)
{ {
#if HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN #if HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN

View File

@ -124,3 +124,8 @@ uint32_t kvm_dirty_ring_size(void)
{ {
return 0; return 0;
} }
bool kvm_hwpoisoned_mem(void)
{
return false;
}

View File

@ -538,4 +538,10 @@ bool kvm_arch_cpu_check_are_resettable(void);
bool kvm_dirty_ring_enabled(void); bool kvm_dirty_ring_enabled(void);
uint32_t kvm_dirty_ring_size(void); uint32_t kvm_dirty_ring_size(void);
/**
* kvm_hwpoisoned_mem - indicate if there is any hwpoisoned page
* reported for the VM.
*/
bool kvm_hwpoisoned_mem(void);
#endif #endif

View File

@ -67,6 +67,7 @@
#include "options.h" #include "options.h"
#include "sysemu/dirtylimit.h" #include "sysemu/dirtylimit.h"
#include "qemu/sockets.h" #include "qemu/sockets.h"
#include "sysemu/kvm.h"
static NotifierList migration_state_notifiers = static NotifierList migration_state_notifiers =
NOTIFIER_LIST_INITIALIZER(migration_state_notifiers); NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
@ -1906,6 +1907,12 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
return false; return false;
} }
if (kvm_hwpoisoned_mem()) {
error_setg(errp, "Can't migrate this vm with hardware poisoned memory, "
"please reboot the vm and try again");
return false;
}
if (migration_is_blocked(errp)) { if (migration_is_blocked(errp)) {
return false; return false;
} }