migration: prevent migration when VM has poisoned memory
A memory page poisoned from the hypervisor level is no longer readable. The migration of a VM will crash Qemu when it tries to read the memory address space and stumbles on the poisoned page with a similar stack trace: Program terminated with signal SIGBUS, Bus error. #0 _mm256_loadu_si256 #1 buffer_zero_avx2 #2 select_accel_fn #3 buffer_is_zero #4 save_zero_page #5 ram_save_target_page_legacy #6 ram_save_host_page #7 ram_find_and_save_block #8 ram_save_iterate #9 qemu_savevm_state_iterate #10 migration_iteration_run #11 migration_thread #12 qemu_thread_start To avoid this VM crash during the migration, prevent the migration when a known hardware poison exists on the VM. Signed-off-by: William Roche <william.roche@oracle.com> Link: https://lore.kernel.org/r/20240130190640.139364-2-william.roche@oracle.com Signed-off-by: Peter Xu <peterx@redhat.com>
This commit is contained in:
parent
39a6e4f87e
commit
06152b89db
@ -1119,6 +1119,11 @@ int kvm_vm_check_extension(KVMState *s, unsigned int extension)
|
|||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/*
|
||||||
|
* We track the poisoned pages to be able to:
|
||||||
|
* - replace them on VM reset
|
||||||
|
* - block a migration for a VM with a poisoned page
|
||||||
|
*/
|
||||||
typedef struct HWPoisonPage {
|
typedef struct HWPoisonPage {
|
||||||
ram_addr_t ram_addr;
|
ram_addr_t ram_addr;
|
||||||
QLIST_ENTRY(HWPoisonPage) list;
|
QLIST_ENTRY(HWPoisonPage) list;
|
||||||
@ -1152,6 +1157,11 @@ void kvm_hwpoison_page_add(ram_addr_t ram_addr)
|
|||||||
QLIST_INSERT_HEAD(&hwpoison_page_list, page, list);
|
QLIST_INSERT_HEAD(&hwpoison_page_list, page, list);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool kvm_hwpoisoned_mem(void)
|
||||||
|
{
|
||||||
|
return !QLIST_EMPTY(&hwpoison_page_list);
|
||||||
|
}
|
||||||
|
|
||||||
static uint32_t adjust_ioeventfd_endianness(uint32_t val, uint32_t size)
|
static uint32_t adjust_ioeventfd_endianness(uint32_t val, uint32_t size)
|
||||||
{
|
{
|
||||||
#if HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN
|
#if HOST_BIG_ENDIAN != TARGET_BIG_ENDIAN
|
||||||
|
@ -124,3 +124,8 @@ uint32_t kvm_dirty_ring_size(void)
|
|||||||
{
|
{
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
bool kvm_hwpoisoned_mem(void)
|
||||||
|
{
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
@ -538,4 +538,10 @@ bool kvm_arch_cpu_check_are_resettable(void);
|
|||||||
bool kvm_dirty_ring_enabled(void);
|
bool kvm_dirty_ring_enabled(void);
|
||||||
|
|
||||||
uint32_t kvm_dirty_ring_size(void);
|
uint32_t kvm_dirty_ring_size(void);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* kvm_hwpoisoned_mem - indicate if there is any hwpoisoned page
|
||||||
|
* reported for the VM.
|
||||||
|
*/
|
||||||
|
bool kvm_hwpoisoned_mem(void);
|
||||||
#endif
|
#endif
|
||||||
|
@ -67,6 +67,7 @@
|
|||||||
#include "options.h"
|
#include "options.h"
|
||||||
#include "sysemu/dirtylimit.h"
|
#include "sysemu/dirtylimit.h"
|
||||||
#include "qemu/sockets.h"
|
#include "qemu/sockets.h"
|
||||||
|
#include "sysemu/kvm.h"
|
||||||
|
|
||||||
static NotifierList migration_state_notifiers =
|
static NotifierList migration_state_notifiers =
|
||||||
NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
|
NOTIFIER_LIST_INITIALIZER(migration_state_notifiers);
|
||||||
@ -1906,6 +1907,12 @@ static bool migrate_prepare(MigrationState *s, bool blk, bool blk_inc,
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (kvm_hwpoisoned_mem()) {
|
||||||
|
error_setg(errp, "Can't migrate this vm with hardware poisoned memory, "
|
||||||
|
"please reboot the vm and try again");
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
if (migration_is_blocked(errp)) {
|
if (migration_is_blocked(errp)) {
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user