2008-11-05 17:29:27 +01:00
|
|
|
/*
|
|
|
|
* QEMU KVM support
|
|
|
|
*
|
|
|
|
* Copyright IBM, Corp. 2008
|
2008-11-24 20:36:26 +01:00
|
|
|
* Red Hat, Inc. 2008
|
2008-11-05 17:29:27 +01:00
|
|
|
*
|
|
|
|
* Authors:
|
|
|
|
* Anthony Liguori <aliguori@us.ibm.com>
|
2008-11-24 20:36:26 +01:00
|
|
|
* Glauber Costa <gcosta@redhat.com>
|
2008-11-05 17:29:27 +01:00
|
|
|
*
|
|
|
|
* This work is licensed under the terms of the GNU GPL, version 2 or later.
|
|
|
|
* See the COPYING file in the top-level directory.
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
#include <sys/types.h>
|
|
|
|
#include <sys/ioctl.h>
|
|
|
|
#include <sys/mman.h>
|
2008-11-13 20:21:00 +01:00
|
|
|
#include <stdarg.h>
|
2008-11-05 17:29:27 +01:00
|
|
|
|
|
|
|
#include <linux/kvm.h>
|
|
|
|
|
|
|
|
#include "qemu-common.h"
|
2012-12-17 18:20:00 +01:00
|
|
|
#include "qemu/atomic.h"
|
|
|
|
#include "qemu/option.h"
|
|
|
|
#include "qemu/config-file.h"
|
2012-12-17 18:20:04 +01:00
|
|
|
#include "sysemu/sysemu.h"
|
2009-05-02 00:29:37 +02:00
|
|
|
#include "hw/hw.h"
|
2012-12-12 13:24:50 +01:00
|
|
|
#include "hw/pci/msi.h"
|
2012-12-17 18:19:49 +01:00
|
|
|
#include "exec/gdbstub.h"
|
2012-12-17 18:20:04 +01:00
|
|
|
#include "sysemu/kvm.h"
|
2012-12-17 18:20:00 +01:00
|
|
|
#include "qemu/bswap.h"
|
2012-12-17 18:19:49 +01:00
|
|
|
#include "exec/memory.h"
|
2013-11-04 12:59:02 +01:00
|
|
|
#include "exec/ram_addr.h"
|
2012-12-17 18:19:49 +01:00
|
|
|
#include "exec/address-spaces.h"
|
2012-12-17 18:20:00 +01:00
|
|
|
#include "qemu/event_notifier.h"
|
2013-03-29 05:27:05 +01:00
|
|
|
#include "trace.h"
|
2008-11-05 17:29:27 +01:00
|
|
|
|
2013-12-23 16:40:40 +01:00
|
|
|
#include "hw/boards.h"
|
|
|
|
|
2011-01-10 12:50:05 +01:00
|
|
|
/* This check must be after config-host.h is included */
|
|
|
|
#ifdef CONFIG_EVENTFD
|
|
|
|
#include <sys/eventfd.h>
|
|
|
|
#endif
|
|
|
|
|
2012-08-10 15:11:45 +02:00
|
|
|
#ifdef CONFIG_VALGRIND_H
|
|
|
|
#include <valgrind/memcheck.h>
|
|
|
|
#endif
|
|
|
|
|
2012-02-26 18:46:12 +01:00
|
|
|
/* KVM uses PAGE_SIZE in its definition of COALESCED_MMIO_MAX */
|
2008-12-09 21:09:57 +01:00
|
|
|
#define PAGE_SIZE TARGET_PAGE_SIZE
|
|
|
|
|
2008-11-05 17:29:27 +01:00
|
|
|
//#define DEBUG_KVM
|
|
|
|
|
|
|
|
#ifdef DEBUG_KVM
|
2010-04-18 16:22:14 +02:00
|
|
|
#define DPRINTF(fmt, ...) \
|
2008-11-05 17:29:27 +01:00
|
|
|
do { fprintf(stderr, fmt, ## __VA_ARGS__); } while (0)
|
|
|
|
#else
|
2010-04-18 16:22:14 +02:00
|
|
|
#define DPRINTF(fmt, ...) \
|
2008-11-05 17:29:27 +01:00
|
|
|
do { } while (0)
|
|
|
|
#endif
|
|
|
|
|
2012-05-16 20:41:10 +02:00
|
|
|
#define KVM_MSI_HASHTAB_SIZE 256
|
|
|
|
|
2008-11-19 18:41:58 +01:00
|
|
|
typedef struct KVMSlot
|
|
|
|
{
|
2012-10-23 12:30:10 +02:00
|
|
|
hwaddr start_addr;
|
2009-10-01 23:12:16 +02:00
|
|
|
ram_addr_t memory_size;
|
2011-12-15 18:55:26 +01:00
|
|
|
void *ram;
|
2008-11-19 18:41:58 +01:00
|
|
|
int slot;
|
|
|
|
int flags;
|
|
|
|
} KVMSlot;
|
2008-11-05 17:29:27 +01:00
|
|
|
|
2008-11-24 20:36:26 +01:00
|
|
|
typedef struct kvm_dirty_log KVMDirtyLog;
|
|
|
|
|
2008-11-05 17:29:27 +01:00
|
|
|
struct KVMState
|
|
|
|
{
|
2013-11-22 20:12:44 +01:00
|
|
|
KVMSlot *slots;
|
|
|
|
int nr_slots;
|
2008-11-05 17:29:27 +01:00
|
|
|
int fd;
|
|
|
|
int vmfd;
|
2008-12-09 21:09:57 +01:00
|
|
|
int coalesced_mmio;
|
2010-01-26 12:21:16 +01:00
|
|
|
struct kvm_coalesced_mmio_ring *coalesced_mmio_ring;
|
2011-10-18 19:43:12 +02:00
|
|
|
bool coalesced_flush_in_progress;
|
2009-05-01 20:42:15 +02:00
|
|
|
int broken_set_mem_region;
|
2009-05-01 20:52:46 +02:00
|
|
|
int migration_log;
|
2009-11-25 00:33:03 +01:00
|
|
|
int vcpu_events;
|
2010-03-01 19:10:29 +01:00
|
|
|
int robust_singlestep;
|
2010-03-12 15:20:49 +01:00
|
|
|
int debugregs;
|
2009-03-12 21:12:48 +01:00
|
|
|
#ifdef KVM_CAP_SET_GUEST_DEBUG
|
|
|
|
struct kvm_sw_breakpoint_head kvm_sw_breakpoints;
|
|
|
|
#endif
|
2012-03-02 20:28:48 +01:00
|
|
|
int pit_state2;
|
2010-06-17 11:53:07 +02:00
|
|
|
int xsave, xcrs;
|
2011-01-10 12:50:05 +01:00
|
|
|
int many_ioeventfds;
|
2012-08-27 08:28:39 +02:00
|
|
|
int intx_set_mask;
|
kvm: Comparison with ioctl number macros needs to be unsigned
In kvm-all.c we store an ioctl cmd number in the irqchip_inject_ioctl field
of KVMState, which has type 'int'. This seems to make sense since the
ioctl() man page says that the cmd parameter has type int.
However, the kernel treats ioctl numbers as unsigned - sys_ioctl() takes an
unsigned int, and the macros which generate ioctl numbers expand to
unsigned expressions. Furthermore, some ioctls (IOC_READ ioctls on x86
and IOC_WRITE ioctls on powerpc) have bit 31 set, and so would be negative
if interpreted as an int. This has the surprising and compile-breaking
consequence that in kvm_irqchip_set_irq() where we do:
return (s->irqchip_inject_ioctl == KVM_IRQ_LINE) ? 1 : event.status;
We will get a "comparison is always false due to limited range of data
type" warning from gcc if KVM_IRQ_LINE is one of the bit-31-set ioctls,
which it is on powerpc.
So, despite the fact that the man page and posix say ioctl numbers are
signed, they're actually unsigned. The kernel uses unsigned, the glibc
header uses unsigned long, and FreeBSD, NetBSD and OSX also use unsigned
long ioctl numbers in the code.
Therefore, this patch changes the variable to be unsigned, fixing the
compile.
Signed-off-by: David Gibson <david@gibson.dropbear.id.au>
Signed-off-by: Alexander Graf <agraf@suse.de>
2012-03-07 15:41:09 +01:00
|
|
|
/* The man page (and posix) say ioctl numbers are signed int, but
|
|
|
|
* they're not. Linux, glibc and *BSD all treat ioctl numbers as
|
|
|
|
* unsigned, and treating them as signed here can break things */
|
2012-08-24 13:34:47 +02:00
|
|
|
unsigned irq_set_ioctl;
|
2011-10-15 11:49:47 +02:00
|
|
|
#ifdef KVM_CAP_IRQ_ROUTING
|
|
|
|
struct kvm_irq_routing *irq_routes;
|
|
|
|
int nr_allocated_irq_routes;
|
|
|
|
uint32_t *used_gsi_bitmap;
|
2012-05-16 20:41:08 +02:00
|
|
|
unsigned int gsi_count;
|
2012-05-16 20:41:10 +02:00
|
|
|
QTAILQ_HEAD(msi_hashtab, KVMMSIRoute) msi_hashtab[KVM_MSI_HASHTAB_SIZE];
|
2012-05-16 20:41:14 +02:00
|
|
|
bool direct_msi;
|
2011-10-15 11:49:47 +02:00
|
|
|
#endif
|
2008-11-05 17:29:27 +01:00
|
|
|
};
|
|
|
|
|
2011-02-07 12:19:25 +01:00
|
|
|
KVMState *kvm_state;
|
2012-01-31 19:17:52 +01:00
|
|
|
bool kvm_kernel_irqchip;
|
2012-07-26 16:35:11 +02:00
|
|
|
bool kvm_async_interrupts_allowed;
|
2013-04-24 22:24:12 +02:00
|
|
|
bool kvm_halt_in_kernel_allowed;
|
2012-07-26 16:35:14 +02:00
|
|
|
bool kvm_irqfds_allowed;
|
2012-07-26 16:35:15 +02:00
|
|
|
bool kvm_msi_via_irqfd_allowed;
|
2012-07-26 16:35:16 +02:00
|
|
|
bool kvm_gsi_routing_allowed;
|
2013-09-03 10:08:25 +02:00
|
|
|
bool kvm_gsi_direct_mapping;
|
2013-04-23 10:29:36 +02:00
|
|
|
bool kvm_allowed;
|
2013-05-29 10:27:25 +02:00
|
|
|
bool kvm_readonly_mem_allowed;
|
2008-11-05 17:29:27 +01:00
|
|
|
|
2011-01-21 21:48:17 +01:00
|
|
|
static const KVMCapabilityInfo kvm_required_capabilites[] = {
|
|
|
|
KVM_CAP_INFO(USER_MEMORY),
|
|
|
|
KVM_CAP_INFO(DESTROY_MEMORY_REGION_WORKS),
|
|
|
|
KVM_CAP_LAST_INFO
|
|
|
|
};
|
|
|
|
|
2008-11-05 17:29:27 +01:00
|
|
|
static KVMSlot *kvm_alloc_slot(KVMState *s)
|
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2013-11-22 20:12:44 +01:00
|
|
|
for (i = 0; i < s->nr_slots; i++) {
|
2011-01-04 09:32:13 +01:00
|
|
|
if (s->slots[i].memory_size == 0) {
|
2008-11-05 17:29:27 +01:00
|
|
|
return &s->slots[i];
|
2011-01-04 09:32:13 +01:00
|
|
|
}
|
2008-11-05 17:29:27 +01:00
|
|
|
}
|
|
|
|
|
2009-04-17 16:26:29 +02:00
|
|
|
fprintf(stderr, "%s: no free slot available\n", __func__);
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
|
|
|
static KVMSlot *kvm_lookup_matching_slot(KVMState *s,
|
2012-10-23 12:30:10 +02:00
|
|
|
hwaddr start_addr,
|
|
|
|
hwaddr end_addr)
|
2009-04-17 16:26:29 +02:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2013-11-22 20:12:44 +01:00
|
|
|
for (i = 0; i < s->nr_slots; i++) {
|
2009-04-17 16:26:29 +02:00
|
|
|
KVMSlot *mem = &s->slots[i];
|
|
|
|
|
|
|
|
if (start_addr == mem->start_addr &&
|
|
|
|
end_addr == mem->start_addr + mem->memory_size) {
|
|
|
|
return mem;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2008-11-05 17:29:27 +01:00
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2009-04-17 16:26:33 +02:00
|
|
|
/*
|
|
|
|
* Find overlapping slot with lowest start address
|
|
|
|
*/
|
|
|
|
static KVMSlot *kvm_lookup_overlapping_slot(KVMState *s,
|
2012-10-23 12:30:10 +02:00
|
|
|
hwaddr start_addr,
|
|
|
|
hwaddr end_addr)
|
2008-11-05 17:29:27 +01:00
|
|
|
{
|
2009-04-17 16:26:33 +02:00
|
|
|
KVMSlot *found = NULL;
|
2008-11-05 17:29:27 +01:00
|
|
|
int i;
|
|
|
|
|
2013-11-22 20:12:44 +01:00
|
|
|
for (i = 0; i < s->nr_slots; i++) {
|
2008-11-05 17:29:27 +01:00
|
|
|
KVMSlot *mem = &s->slots[i];
|
|
|
|
|
2009-04-17 16:26:33 +02:00
|
|
|
if (mem->memory_size == 0 ||
|
|
|
|
(found && found->start_addr < mem->start_addr)) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (end_addr > mem->start_addr &&
|
|
|
|
start_addr < mem->start_addr + mem->memory_size) {
|
|
|
|
found = mem;
|
|
|
|
}
|
2008-11-05 17:29:27 +01:00
|
|
|
}
|
|
|
|
|
2009-04-17 16:26:33 +02:00
|
|
|
return found;
|
2008-11-05 17:29:27 +01:00
|
|
|
}
|
|
|
|
|
2011-12-15 18:55:26 +01:00
|
|
|
int kvm_physical_memory_addr_from_host(KVMState *s, void *ram,
|
2012-10-23 12:30:10 +02:00
|
|
|
hwaddr *phys_addr)
|
2010-10-11 20:31:20 +02:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
|
2013-11-22 20:12:44 +01:00
|
|
|
for (i = 0; i < s->nr_slots; i++) {
|
2010-10-11 20:31:20 +02:00
|
|
|
KVMSlot *mem = &s->slots[i];
|
|
|
|
|
2011-12-15 18:55:26 +01:00
|
|
|
if (ram >= mem->ram && ram < mem->ram + mem->memory_size) {
|
|
|
|
*phys_addr = mem->start_addr + (ram - mem->ram);
|
2010-10-11 20:31:20 +02:00
|
|
|
return 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2008-11-24 20:36:26 +01:00
|
|
|
static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot)
|
|
|
|
{
|
|
|
|
struct kvm_userspace_memory_region mem;
|
|
|
|
|
|
|
|
mem.slot = slot->slot;
|
|
|
|
mem.guest_phys_addr = slot->start_addr;
|
2011-12-15 18:55:26 +01:00
|
|
|
mem.userspace_addr = (unsigned long)slot->ram;
|
2008-11-24 20:36:26 +01:00
|
|
|
mem.flags = slot->flags;
|
2009-05-01 20:52:46 +02:00
|
|
|
if (s->migration_log) {
|
|
|
|
mem.flags |= KVM_MEM_LOG_DIRTY_PAGES;
|
|
|
|
}
|
2013-05-31 10:52:18 +02:00
|
|
|
|
|
|
|
if (slot->memory_size && mem.flags & KVM_MEM_READONLY) {
|
2013-05-29 10:27:26 +02:00
|
|
|
/* Set the slot size to 0 before setting the slot to the desired
|
|
|
|
* value. This is needed based on KVM commit 75d61fbc. */
|
|
|
|
mem.memory_size = 0;
|
|
|
|
kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
|
|
|
|
}
|
|
|
|
mem.memory_size = slot->memory_size;
|
2008-11-24 20:36:26 +01:00
|
|
|
return kvm_vm_ioctl(s, KVM_SET_USER_MEMORY_REGION, &mem);
|
|
|
|
}
|
|
|
|
|
2009-06-27 09:24:58 +02:00
|
|
|
static void kvm_reset_vcpu(void *opaque)
|
|
|
|
{
|
2012-10-31 06:57:49 +01:00
|
|
|
CPUState *cpu = opaque;
|
2009-06-27 09:24:58 +02:00
|
|
|
|
2012-10-31 06:57:49 +01:00
|
|
|
kvm_arch_reset_vcpu(cpu);
|
2009-06-27 09:24:58 +02:00
|
|
|
}
|
2008-11-24 20:36:26 +01:00
|
|
|
|
2012-12-17 06:38:45 +01:00
|
|
|
int kvm_init_vcpu(CPUState *cpu)
|
2008-11-05 17:29:27 +01:00
|
|
|
{
|
|
|
|
KVMState *s = kvm_state;
|
|
|
|
long mmap_size;
|
|
|
|
int ret;
|
|
|
|
|
2010-04-18 16:22:14 +02:00
|
|
|
DPRINTF("kvm_init_vcpu\n");
|
2008-11-05 17:29:27 +01:00
|
|
|
|
2013-01-22 21:25:01 +01:00
|
|
|
ret = kvm_vm_ioctl(s, KVM_CREATE_VCPU, (void *)kvm_arch_vcpu_id(cpu));
|
2008-11-05 17:29:27 +01:00
|
|
|
if (ret < 0) {
|
2010-04-18 16:22:14 +02:00
|
|
|
DPRINTF("kvm_create_vcpu failed\n");
|
2008-11-05 17:29:27 +01:00
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2012-10-31 05:29:00 +01:00
|
|
|
cpu->kvm_fd = ret;
|
2012-12-01 05:35:08 +01:00
|
|
|
cpu->kvm_state = s;
|
2012-10-31 06:57:49 +01:00
|
|
|
cpu->kvm_vcpu_dirty = true;
|
2008-11-05 17:29:27 +01:00
|
|
|
|
|
|
|
mmap_size = kvm_ioctl(s, KVM_GET_VCPU_MMAP_SIZE, 0);
|
|
|
|
if (mmap_size < 0) {
|
2011-02-01 22:15:48 +01:00
|
|
|
ret = mmap_size;
|
2010-04-18 16:22:14 +02:00
|
|
|
DPRINTF("KVM_GET_VCPU_MMAP_SIZE failed\n");
|
2008-11-05 17:29:27 +01:00
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2012-12-01 06:18:14 +01:00
|
|
|
cpu->kvm_run = mmap(NULL, mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED,
|
2012-10-31 05:29:00 +01:00
|
|
|
cpu->kvm_fd, 0);
|
2012-12-01 06:18:14 +01:00
|
|
|
if (cpu->kvm_run == MAP_FAILED) {
|
2008-11-05 17:29:27 +01:00
|
|
|
ret = -errno;
|
2010-04-18 16:22:14 +02:00
|
|
|
DPRINTF("mmap'ing vcpu state failed\n");
|
2008-11-05 17:29:27 +01:00
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2011-01-04 09:32:13 +01:00
|
|
|
if (s->coalesced_mmio && !s->coalesced_mmio_ring) {
|
|
|
|
s->coalesced_mmio_ring =
|
2012-12-01 06:18:14 +01:00
|
|
|
(void *)cpu->kvm_run + s->coalesced_mmio * PAGE_SIZE;
|
2011-01-04 09:32:13 +01:00
|
|
|
}
|
2010-01-26 12:21:16 +01:00
|
|
|
|
2012-10-31 06:57:49 +01:00
|
|
|
ret = kvm_arch_init_vcpu(cpu);
|
2009-06-27 09:24:58 +02:00
|
|
|
if (ret == 0) {
|
2012-10-31 06:57:49 +01:00
|
|
|
qemu_register_reset(kvm_reset_vcpu, cpu);
|
|
|
|
kvm_arch_reset_vcpu(cpu);
|
2009-06-27 09:24:58 +02:00
|
|
|
}
|
2008-11-05 17:29:27 +01:00
|
|
|
err:
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2008-11-24 20:36:26 +01:00
|
|
|
/*
|
|
|
|
* dirty pages logging control
|
|
|
|
*/
|
2011-04-06 21:09:54 +02:00
|
|
|
|
2013-05-29 10:27:26 +02:00
|
|
|
static int kvm_mem_flags(KVMState *s, bool log_dirty, bool readonly)
|
2011-04-06 21:09:54 +02:00
|
|
|
{
|
2013-05-29 10:27:26 +02:00
|
|
|
int flags = 0;
|
|
|
|
flags = log_dirty ? KVM_MEM_LOG_DIRTY_PAGES : 0;
|
|
|
|
if (readonly && kvm_readonly_mem_allowed) {
|
|
|
|
flags |= KVM_MEM_READONLY;
|
|
|
|
}
|
|
|
|
return flags;
|
2011-04-06 21:09:54 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
static int kvm_slot_dirty_pages_log_change(KVMSlot *mem, bool log_dirty)
|
2008-11-24 20:36:26 +01:00
|
|
|
{
|
|
|
|
KVMState *s = kvm_state;
|
2011-04-06 21:09:54 +02:00
|
|
|
int flags, mask = KVM_MEM_LOG_DIRTY_PAGES;
|
2009-05-01 20:52:46 +02:00
|
|
|
int old_flags;
|
|
|
|
|
|
|
|
old_flags = mem->flags;
|
2008-11-24 20:36:26 +01:00
|
|
|
|
2013-05-29 10:27:26 +02:00
|
|
|
flags = (mem->flags & ~mask) | kvm_mem_flags(s, log_dirty, false);
|
2008-11-24 20:36:26 +01:00
|
|
|
mem->flags = flags;
|
|
|
|
|
2009-05-01 20:52:46 +02:00
|
|
|
/* If nothing changed effectively, no need to issue ioctl */
|
|
|
|
if (s->migration_log) {
|
|
|
|
flags |= KVM_MEM_LOG_DIRTY_PAGES;
|
|
|
|
}
|
2011-04-06 21:09:54 +02:00
|
|
|
|
2009-05-01 20:52:46 +02:00
|
|
|
if (flags == old_flags) {
|
2011-04-06 21:09:54 +02:00
|
|
|
return 0;
|
2009-05-01 20:52:46 +02:00
|
|
|
}
|
|
|
|
|
2008-11-24 20:36:26 +01:00
|
|
|
return kvm_set_user_memory_region(s, mem);
|
|
|
|
}
|
|
|
|
|
2012-10-23 12:30:10 +02:00
|
|
|
static int kvm_dirty_pages_log_change(hwaddr phys_addr,
|
2011-04-06 21:09:54 +02:00
|
|
|
ram_addr_t size, bool log_dirty)
|
|
|
|
{
|
|
|
|
KVMState *s = kvm_state;
|
|
|
|
KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size);
|
|
|
|
|
|
|
|
if (mem == NULL) {
|
|
|
|
fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-"
|
|
|
|
TARGET_FMT_plx "\n", __func__, phys_addr,
|
2012-10-23 12:30:10 +02:00
|
|
|
(hwaddr)(phys_addr + size - 1));
|
2011-04-06 21:09:54 +02:00
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
return kvm_slot_dirty_pages_log_change(mem, log_dirty);
|
|
|
|
}
|
|
|
|
|
2011-12-18 13:06:05 +01:00
|
|
|
static void kvm_log_start(MemoryListener *listener,
|
|
|
|
MemoryRegionSection *section)
|
2008-11-24 20:36:26 +01:00
|
|
|
{
|
2011-12-18 13:06:05 +01:00
|
|
|
int r;
|
|
|
|
|
|
|
|
r = kvm_dirty_pages_log_change(section->offset_within_address_space,
|
2013-05-27 10:08:27 +02:00
|
|
|
int128_get64(section->size), true);
|
2011-12-18 13:06:05 +01:00
|
|
|
if (r < 0) {
|
|
|
|
abort();
|
|
|
|
}
|
2008-11-24 20:36:26 +01:00
|
|
|
}
|
|
|
|
|
2011-12-18 13:06:05 +01:00
|
|
|
static void kvm_log_stop(MemoryListener *listener,
|
|
|
|
MemoryRegionSection *section)
|
2008-11-24 20:36:26 +01:00
|
|
|
{
|
2011-12-18 13:06:05 +01:00
|
|
|
int r;
|
|
|
|
|
|
|
|
r = kvm_dirty_pages_log_change(section->offset_within_address_space,
|
2013-05-27 10:08:27 +02:00
|
|
|
int128_get64(section->size), false);
|
2011-12-18 13:06:05 +01:00
|
|
|
if (r < 0) {
|
|
|
|
abort();
|
|
|
|
}
|
2008-11-24 20:36:26 +01:00
|
|
|
}
|
|
|
|
|
2010-01-27 21:07:21 +01:00
|
|
|
static int kvm_set_migration_log(int enable)
|
2009-05-01 20:52:46 +02:00
|
|
|
{
|
|
|
|
KVMState *s = kvm_state;
|
|
|
|
KVMSlot *mem;
|
|
|
|
int i, err;
|
|
|
|
|
|
|
|
s->migration_log = enable;
|
|
|
|
|
2013-11-22 20:12:44 +01:00
|
|
|
for (i = 0; i < s->nr_slots; i++) {
|
2009-05-01 20:52:46 +02:00
|
|
|
mem = &s->slots[i];
|
|
|
|
|
2010-07-14 21:36:49 +02:00
|
|
|
if (!mem->memory_size) {
|
|
|
|
continue;
|
|
|
|
}
|
2009-05-01 20:52:46 +02:00
|
|
|
if (!!(mem->flags & KVM_MEM_LOG_DIRTY_PAGES) == enable) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
err = kvm_set_user_memory_region(s, mem);
|
|
|
|
if (err) {
|
|
|
|
return err;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2010-04-23 19:04:14 +02:00
|
|
|
/* get kvm's dirty pages bitmap and update qemu's */
|
2011-12-19 12:18:13 +01:00
|
|
|
static int kvm_get_dirty_pages_log_range(MemoryRegionSection *section,
|
|
|
|
unsigned long *bitmap)
|
2009-07-27 12:49:56 +02:00
|
|
|
{
|
2013-11-05 15:45:46 +01:00
|
|
|
ram_addr_t start = section->offset_within_region + section->mr->ram_addr;
|
2013-11-05 15:52:54 +01:00
|
|
|
ram_addr_t pages = int128_get64(section->size) / getpagesize();
|
|
|
|
|
|
|
|
cpu_physical_memory_set_dirty_lebitmap(bitmap, start, pages);
|
2010-04-23 19:04:14 +02:00
|
|
|
return 0;
|
2009-07-27 12:49:56 +02:00
|
|
|
}
|
|
|
|
|
2010-04-23 19:04:14 +02:00
|
|
|
#define ALIGN(x, y) (((x)+(y)-1) & ~((y)-1))
|
|
|
|
|
2008-11-24 20:36:26 +01:00
|
|
|
/**
|
|
|
|
* kvm_physical_sync_dirty_bitmap - Grab dirty bitmap from kernel space
|
2011-10-16 18:04:59 +02:00
|
|
|
* This function updates qemu's dirty bitmap using
|
|
|
|
* memory_region_set_dirty(). This means all bits are set
|
|
|
|
* to dirty.
|
2008-11-24 20:36:26 +01:00
|
|
|
*
|
2009-04-17 16:26:29 +02:00
|
|
|
* @start_add: start of logged region.
|
2008-11-24 20:36:26 +01:00
|
|
|
* @end_addr: end of logged region.
|
|
|
|
*/
|
2011-12-19 12:18:13 +01:00
|
|
|
static int kvm_physical_sync_dirty_bitmap(MemoryRegionSection *section)
|
2008-11-24 20:36:26 +01:00
|
|
|
{
|
|
|
|
KVMState *s = kvm_state;
|
2009-05-01 20:52:47 +02:00
|
|
|
unsigned long size, allocated_size = 0;
|
|
|
|
KVMDirtyLog d;
|
|
|
|
KVMSlot *mem;
|
|
|
|
int ret = 0;
|
2012-10-23 12:30:10 +02:00
|
|
|
hwaddr start_addr = section->offset_within_address_space;
|
2013-05-27 10:08:27 +02:00
|
|
|
hwaddr end_addr = start_addr + int128_get64(section->size);
|
2008-11-24 20:36:26 +01:00
|
|
|
|
2009-05-01 20:52:47 +02:00
|
|
|
d.dirty_bitmap = NULL;
|
|
|
|
while (start_addr < end_addr) {
|
|
|
|
mem = kvm_lookup_overlapping_slot(s, start_addr, end_addr);
|
|
|
|
if (mem == NULL) {
|
|
|
|
break;
|
|
|
|
}
|
2008-11-24 20:36:26 +01:00
|
|
|
|
fix crash in migration, 32-bit userspace on 64-bit host
This change fixes a long-standing immediate crash (memory corruption
and abort in glibc malloc code) in migration on 32bits.
The bug is present since this commit:
commit 692d9aca97b865b0f7903565274a52606910f129
Author: Bruce Rogers <brogers@novell.com>
Date: Wed Sep 23 16:13:18 2009 -0600
qemu-kvm: allocate correct size for dirty bitmap
The dirty bitmap copied out to userspace is stored in a long array,
and gets copied out to userspace accordingly. This patch accounts
for that correctly. Currently I'm seeing kvm crashing due to writing
beyond the end of the alloc'd dirty bitmap memory, because the buffer
has the wrong size.
Signed-off-by: Bruce Rogers
Signed-off-by: Marcelo Tosatti <mtosatti@redhat.com>
--- a/qemu-kvm.c
+++ b/qemu-kvm.c
@@ int kvm_get_dirty_pages_range(kvm_context_t kvm, unsigned long phys_addr,
- buf = qemu_malloc((slots[i].len / 4096 + 7) / 8 + 2);
+ buf = qemu_malloc(BITMAP_SIZE(slots[i].len));
r = kvm_get_map(kvm, KVM_GET_DIRTY_LOG, i, buf);
BITMAP_SIZE is now open-coded in that function, like this:
size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS), HOST_LONG_BITS) / 8;
The problem is that HOST_LONG_BITS in 32bit userspace is 32
but it's 64 in 64bit kernel. So userspace aligns this to
32, and kernel to 64, but since no length is passed from
userspace to kernel on ioctl, kernel uses its size calculation
and copies 4 extra bytes to userspace, corrupting memory.
Here's how it looks like during migrate execution:
our=20, kern=24
our=4, kern=8
...
our=4, kern=8
our=4064, kern=4064
our=512, kern=512
our=4, kern=8
our=20, kern=24
our=4, kern=8
...
our=4, kern=8
our=4064, kern=4064
*** glibc detected *** ./x86_64-softmmu/qemu-system-x86_64: realloc(): invalid next size: 0x08f20528 ***
(our is userspace size above, kern is the size as calculated
by the kernel).
Fix this by always aligning to 64 in a hope that no platform will
have sizeof(long)>8 any time soon, and add a comment describing it
all. It's a small price to pay for bad kernel design.
Alternatively it's possible to fix that in the kernel by using
different size calculation depending on the current process.
But this becomes quite ugly.
Special thanks goes to Stefan Hajnoczi for spotting the fundamental
cause of the issue, and to Alexander Graf for his support in #qemu.
Signed-off-by: Michael Tokarev <mjt@tls.msk.ru>
CC: Bruce Rogers <brogers@novell.com>
Signed-off-by: Avi Kivity <avi@redhat.com>
2011-04-26 18:13:49 +02:00
|
|
|
/* XXX bad kernel interface alert
|
|
|
|
* For dirty bitmap, kernel allocates array of size aligned to
|
|
|
|
* bits-per-long. But for case when the kernel is 64bits and
|
|
|
|
* the userspace is 32bits, userspace can't align to the same
|
|
|
|
* bits-per-long, since sizeof(long) is different between kernel
|
|
|
|
* and user space. This way, userspace will provide buffer which
|
|
|
|
* may be 4 bytes less than the kernel will use, resulting in
|
|
|
|
* userspace memory corruption (which is not detectable by valgrind
|
|
|
|
* too, in most cases).
|
|
|
|
* So for now, let's align to 64 instead of HOST_LONG_BITS here, in
|
|
|
|
* a hope that sizeof(long) wont become >8 any time soon.
|
|
|
|
*/
|
|
|
|
size = ALIGN(((mem->memory_size) >> TARGET_PAGE_BITS),
|
|
|
|
/*HOST_LONG_BITS*/ 64) / 8;
|
2009-05-01 20:52:47 +02:00
|
|
|
if (!d.dirty_bitmap) {
|
2011-08-21 05:09:37 +02:00
|
|
|
d.dirty_bitmap = g_malloc(size);
|
2009-05-01 20:52:47 +02:00
|
|
|
} else if (size > allocated_size) {
|
2011-08-21 05:09:37 +02:00
|
|
|
d.dirty_bitmap = g_realloc(d.dirty_bitmap, size);
|
2009-05-01 20:52:47 +02:00
|
|
|
}
|
|
|
|
allocated_size = size;
|
|
|
|
memset(d.dirty_bitmap, 0, allocated_size);
|
2008-11-24 20:36:26 +01:00
|
|
|
|
2009-05-01 20:52:47 +02:00
|
|
|
d.slot = mem->slot;
|
2008-11-24 20:36:26 +01:00
|
|
|
|
2009-07-27 22:23:59 +02:00
|
|
|
if (kvm_vm_ioctl(s, KVM_GET_DIRTY_LOG, &d) == -1) {
|
2010-04-18 16:22:14 +02:00
|
|
|
DPRINTF("ioctl failed %d\n", errno);
|
2009-05-01 20:52:47 +02:00
|
|
|
ret = -1;
|
|
|
|
break;
|
|
|
|
}
|
2008-11-24 20:36:26 +01:00
|
|
|
|
2011-12-19 12:18:13 +01:00
|
|
|
kvm_get_dirty_pages_log_range(section, d.dirty_bitmap);
|
2010-04-23 19:04:14 +02:00
|
|
|
start_addr = mem->start_addr + mem->memory_size;
|
2008-11-24 20:36:26 +01:00
|
|
|
}
|
2011-08-21 05:09:37 +02:00
|
|
|
g_free(d.dirty_bitmap);
|
2009-05-01 20:52:47 +02:00
|
|
|
|
|
|
|
return ret;
|
2008-11-24 20:36:26 +01:00
|
|
|
}
|
|
|
|
|
2012-10-02 18:21:54 +02:00
|
|
|
static void kvm_coalesce_mmio_region(MemoryListener *listener,
|
|
|
|
MemoryRegionSection *secion,
|
2012-10-23 12:30:10 +02:00
|
|
|
hwaddr start, hwaddr size)
|
2008-12-09 21:09:57 +01:00
|
|
|
{
|
|
|
|
KVMState *s = kvm_state;
|
|
|
|
|
|
|
|
if (s->coalesced_mmio) {
|
|
|
|
struct kvm_coalesced_mmio_zone zone;
|
|
|
|
|
|
|
|
zone.addr = start;
|
|
|
|
zone.size = size;
|
2012-02-29 16:54:29 +01:00
|
|
|
zone.pad = 0;
|
2008-12-09 21:09:57 +01:00
|
|
|
|
2012-10-02 18:21:54 +02:00
|
|
|
(void)kvm_vm_ioctl(s, KVM_REGISTER_COALESCED_MMIO, &zone);
|
2008-12-09 21:09:57 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-10-02 18:21:54 +02:00
|
|
|
static void kvm_uncoalesce_mmio_region(MemoryListener *listener,
|
|
|
|
MemoryRegionSection *secion,
|
2012-10-23 12:30:10 +02:00
|
|
|
hwaddr start, hwaddr size)
|
2008-12-09 21:09:57 +01:00
|
|
|
{
|
|
|
|
KVMState *s = kvm_state;
|
|
|
|
|
|
|
|
if (s->coalesced_mmio) {
|
|
|
|
struct kvm_coalesced_mmio_zone zone;
|
|
|
|
|
|
|
|
zone.addr = start;
|
|
|
|
zone.size = size;
|
2012-02-29 16:54:29 +01:00
|
|
|
zone.pad = 0;
|
2008-12-09 21:09:57 +01:00
|
|
|
|
2012-10-02 18:21:54 +02:00
|
|
|
(void)kvm_vm_ioctl(s, KVM_UNREGISTER_COALESCED_MMIO, &zone);
|
2008-12-09 21:09:57 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-05-08 22:33:24 +02:00
|
|
|
int kvm_check_extension(KVMState *s, unsigned int extension)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
ret = kvm_ioctl(s, KVM_CHECK_EXTENSION, extension);
|
|
|
|
if (ret < 0) {
|
|
|
|
ret = 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2014-01-10 08:20:18 +01:00
|
|
|
static int kvm_set_ioeventfd_mmio(int fd, hwaddr addr, uint32_t val,
|
2013-04-02 15:52:25 +02:00
|
|
|
bool assign, uint32_t size, bool datamatch)
|
2013-04-01 23:05:21 +02:00
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
struct kvm_ioeventfd iofd;
|
|
|
|
|
2013-04-02 15:52:25 +02:00
|
|
|
iofd.datamatch = datamatch ? val : 0;
|
2013-04-01 23:05:21 +02:00
|
|
|
iofd.addr = addr;
|
|
|
|
iofd.len = size;
|
2013-04-02 15:52:25 +02:00
|
|
|
iofd.flags = 0;
|
2013-04-01 23:05:21 +02:00
|
|
|
iofd.fd = fd;
|
|
|
|
|
|
|
|
if (!kvm_enabled()) {
|
|
|
|
return -ENOSYS;
|
|
|
|
}
|
|
|
|
|
2013-04-02 15:52:25 +02:00
|
|
|
if (datamatch) {
|
|
|
|
iofd.flags |= KVM_IOEVENTFD_FLAG_DATAMATCH;
|
|
|
|
}
|
2013-04-01 23:05:21 +02:00
|
|
|
if (!assign) {
|
|
|
|
iofd.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &iofd);
|
|
|
|
|
|
|
|
if (ret < 0) {
|
|
|
|
return -errno;
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-04-01 23:54:45 +02:00
|
|
|
static int kvm_set_ioeventfd_pio(int fd, uint16_t addr, uint16_t val,
|
2013-04-02 15:52:25 +02:00
|
|
|
bool assign, uint32_t size, bool datamatch)
|
2013-04-01 23:05:21 +02:00
|
|
|
{
|
|
|
|
struct kvm_ioeventfd kick = {
|
2013-04-02 15:52:25 +02:00
|
|
|
.datamatch = datamatch ? val : 0,
|
2013-04-01 23:05:21 +02:00
|
|
|
.addr = addr,
|
2013-04-02 15:52:25 +02:00
|
|
|
.flags = KVM_IOEVENTFD_FLAG_PIO,
|
2013-04-01 23:54:45 +02:00
|
|
|
.len = size,
|
2013-04-01 23:05:21 +02:00
|
|
|
.fd = fd,
|
|
|
|
};
|
|
|
|
int r;
|
|
|
|
if (!kvm_enabled()) {
|
|
|
|
return -ENOSYS;
|
|
|
|
}
|
2013-04-02 15:52:25 +02:00
|
|
|
if (datamatch) {
|
|
|
|
kick.flags |= KVM_IOEVENTFD_FLAG_DATAMATCH;
|
|
|
|
}
|
2013-04-01 23:05:21 +02:00
|
|
|
if (!assign) {
|
|
|
|
kick.flags |= KVM_IOEVENTFD_FLAG_DEASSIGN;
|
|
|
|
}
|
|
|
|
r = kvm_vm_ioctl(kvm_state, KVM_IOEVENTFD, &kick);
|
|
|
|
if (r < 0) {
|
|
|
|
return r;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2011-01-10 12:50:05 +01:00
|
|
|
static int kvm_check_many_ioeventfds(void)
|
|
|
|
{
|
2011-01-25 17:17:14 +01:00
|
|
|
/* Userspace can use ioeventfd for io notification. This requires a host
|
|
|
|
* that supports eventfd(2) and an I/O thread; since eventfd does not
|
|
|
|
* support SIGIO it cannot interrupt the vcpu.
|
|
|
|
*
|
|
|
|
* Older kernels have a 6 device limit on the KVM io bus. Find out so we
|
2011-01-10 12:50:05 +01:00
|
|
|
* can avoid creating too many ioeventfds.
|
|
|
|
*/
|
2011-08-22 15:24:58 +02:00
|
|
|
#if defined(CONFIG_EVENTFD)
|
2011-01-10 12:50:05 +01:00
|
|
|
int ioeventfds[7];
|
|
|
|
int i, ret = 0;
|
|
|
|
for (i = 0; i < ARRAY_SIZE(ioeventfds); i++) {
|
|
|
|
ioeventfds[i] = eventfd(0, EFD_CLOEXEC);
|
|
|
|
if (ioeventfds[i] < 0) {
|
|
|
|
break;
|
|
|
|
}
|
2013-04-02 15:52:25 +02:00
|
|
|
ret = kvm_set_ioeventfd_pio(ioeventfds[i], 0, i, true, 2, true);
|
2011-01-10 12:50:05 +01:00
|
|
|
if (ret < 0) {
|
|
|
|
close(ioeventfds[i]);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Decide whether many devices are supported or not */
|
|
|
|
ret = i == ARRAY_SIZE(ioeventfds);
|
|
|
|
|
|
|
|
while (i-- > 0) {
|
2013-04-02 15:52:25 +02:00
|
|
|
kvm_set_ioeventfd_pio(ioeventfds[i], 0, i, false, 2, true);
|
2011-01-10 12:50:05 +01:00
|
|
|
close(ioeventfds[i]);
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
#else
|
|
|
|
return 0;
|
|
|
|
#endif
|
|
|
|
}
|
|
|
|
|
2011-01-21 21:48:17 +01:00
|
|
|
static const KVMCapabilityInfo *
|
|
|
|
kvm_check_extension_list(KVMState *s, const KVMCapabilityInfo *list)
|
|
|
|
{
|
|
|
|
while (list->name) {
|
|
|
|
if (!kvm_check_extension(s, list->value)) {
|
|
|
|
return list;
|
|
|
|
}
|
|
|
|
list++;
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2011-12-18 13:06:05 +01:00
|
|
|
static void kvm_set_phys_mem(MemoryRegionSection *section, bool add)
|
2010-01-27 21:07:08 +01:00
|
|
|
{
|
|
|
|
KVMState *s = kvm_state;
|
|
|
|
KVMSlot *mem, old;
|
|
|
|
int err;
|
2011-12-18 13:06:05 +01:00
|
|
|
MemoryRegion *mr = section->mr;
|
|
|
|
bool log_dirty = memory_region_is_logging(mr);
|
2013-05-29 10:27:26 +02:00
|
|
|
bool writeable = !mr->readonly && !mr->rom_device;
|
|
|
|
bool readonly_flag = mr->readonly || memory_region_is_romd(mr);
|
2012-10-23 12:30:10 +02:00
|
|
|
hwaddr start_addr = section->offset_within_address_space;
|
2013-05-27 10:08:27 +02:00
|
|
|
ram_addr_t size = int128_get64(section->size);
|
2011-12-15 18:55:26 +01:00
|
|
|
void *ram = NULL;
|
2012-02-29 12:22:12 +01:00
|
|
|
unsigned delta;
|
2010-01-27 21:07:08 +01:00
|
|
|
|
2010-07-28 17:13:23 +02:00
|
|
|
/* kvm works in page size chunks, but the function may be called
|
|
|
|
with sub-page size and unaligned start address. */
|
2012-02-29 12:22:12 +01:00
|
|
|
delta = TARGET_PAGE_ALIGN(size) - size;
|
|
|
|
if (delta > size) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
start_addr += delta;
|
|
|
|
size -= delta;
|
|
|
|
size &= TARGET_PAGE_MASK;
|
|
|
|
if (!size || (start_addr & ~TARGET_PAGE_MASK)) {
|
|
|
|
return;
|
|
|
|
}
|
2010-01-27 21:07:08 +01:00
|
|
|
|
2011-12-18 13:06:05 +01:00
|
|
|
if (!memory_region_is_ram(mr)) {
|
2013-05-29 10:27:26 +02:00
|
|
|
if (writeable || !kvm_readonly_mem_allowed) {
|
|
|
|
return;
|
|
|
|
} else if (!mr->romd_mode) {
|
|
|
|
/* If the memory device is not in romd_mode, then we actually want
|
|
|
|
* to remove the kvm memory slot so all accesses will trap. */
|
|
|
|
add = false;
|
|
|
|
}
|
2011-12-15 18:55:26 +01:00
|
|
|
}
|
|
|
|
|
2012-02-29 12:22:12 +01:00
|
|
|
ram = memory_region_get_ram_ptr(mr) + section->offset_within_region + delta;
|
2011-12-18 13:06:05 +01:00
|
|
|
|
2010-01-27 21:07:08 +01:00
|
|
|
while (1) {
|
|
|
|
mem = kvm_lookup_overlapping_slot(s, start_addr, start_addr + size);
|
|
|
|
if (!mem) {
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2011-12-18 13:06:05 +01:00
|
|
|
if (add && start_addr >= mem->start_addr &&
|
2010-01-27 21:07:08 +01:00
|
|
|
(start_addr + size <= mem->start_addr + mem->memory_size) &&
|
2011-12-15 18:55:26 +01:00
|
|
|
(ram - start_addr == mem->ram - mem->start_addr)) {
|
2010-01-27 21:07:08 +01:00
|
|
|
/* The new slot fits into the existing one and comes with
|
2011-04-06 21:09:54 +02:00
|
|
|
* identical parameters - update flags and done. */
|
|
|
|
kvm_slot_dirty_pages_log_change(mem, log_dirty);
|
2010-01-27 21:07:08 +01:00
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
old = *mem;
|
|
|
|
|
2012-01-15 15:13:59 +01:00
|
|
|
if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) {
|
|
|
|
kvm_physical_sync_dirty_bitmap(section);
|
|
|
|
}
|
|
|
|
|
2010-01-27 21:07:08 +01:00
|
|
|
/* unregister the overlapping slot */
|
|
|
|
mem->memory_size = 0;
|
|
|
|
err = kvm_set_user_memory_region(s, mem);
|
|
|
|
if (err) {
|
|
|
|
fprintf(stderr, "%s: error unregistering overlapping slot: %s\n",
|
|
|
|
__func__, strerror(-err));
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
|
|
|
/* Workaround for older KVM versions: we can't join slots, even not by
|
|
|
|
* unregistering the previous ones and then registering the larger
|
|
|
|
* slot. We have to maintain the existing fragmentation. Sigh.
|
|
|
|
*
|
|
|
|
* This workaround assumes that the new slot starts at the same
|
|
|
|
* address as the first existing one. If not or if some overlapping
|
|
|
|
* slot comes around later, we will fail (not seen in practice so far)
|
|
|
|
* - and actually require a recent KVM version. */
|
|
|
|
if (s->broken_set_mem_region &&
|
2011-12-18 13:06:05 +01:00
|
|
|
old.start_addr == start_addr && old.memory_size < size && add) {
|
2010-01-27 21:07:08 +01:00
|
|
|
mem = kvm_alloc_slot(s);
|
|
|
|
mem->memory_size = old.memory_size;
|
|
|
|
mem->start_addr = old.start_addr;
|
2011-12-15 18:55:26 +01:00
|
|
|
mem->ram = old.ram;
|
2013-05-29 10:27:26 +02:00
|
|
|
mem->flags = kvm_mem_flags(s, log_dirty, readonly_flag);
|
2010-01-27 21:07:08 +01:00
|
|
|
|
|
|
|
err = kvm_set_user_memory_region(s, mem);
|
|
|
|
if (err) {
|
|
|
|
fprintf(stderr, "%s: error updating slot: %s\n", __func__,
|
|
|
|
strerror(-err));
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
|
|
|
start_addr += old.memory_size;
|
2011-12-15 18:55:26 +01:00
|
|
|
ram += old.memory_size;
|
2010-01-27 21:07:08 +01:00
|
|
|
size -= old.memory_size;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
/* register prefix slot */
|
|
|
|
if (old.start_addr < start_addr) {
|
|
|
|
mem = kvm_alloc_slot(s);
|
|
|
|
mem->memory_size = start_addr - old.start_addr;
|
|
|
|
mem->start_addr = old.start_addr;
|
2011-12-15 18:55:26 +01:00
|
|
|
mem->ram = old.ram;
|
2013-05-29 10:27:26 +02:00
|
|
|
mem->flags = kvm_mem_flags(s, log_dirty, readonly_flag);
|
2010-01-27 21:07:08 +01:00
|
|
|
|
|
|
|
err = kvm_set_user_memory_region(s, mem);
|
|
|
|
if (err) {
|
|
|
|
fprintf(stderr, "%s: error registering prefix slot: %s\n",
|
|
|
|
__func__, strerror(-err));
|
2011-04-16 10:15:11 +02:00
|
|
|
#ifdef TARGET_PPC
|
|
|
|
fprintf(stderr, "%s: This is probably because your kernel's " \
|
|
|
|
"PAGE_SIZE is too big. Please try to use 4k " \
|
|
|
|
"PAGE_SIZE!\n", __func__);
|
|
|
|
#endif
|
2010-01-27 21:07:08 +01:00
|
|
|
abort();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* register suffix slot */
|
|
|
|
if (old.start_addr + old.memory_size > start_addr + size) {
|
|
|
|
ram_addr_t size_delta;
|
|
|
|
|
|
|
|
mem = kvm_alloc_slot(s);
|
|
|
|
mem->start_addr = start_addr + size;
|
|
|
|
size_delta = mem->start_addr - old.start_addr;
|
|
|
|
mem->memory_size = old.memory_size - size_delta;
|
2011-12-15 18:55:26 +01:00
|
|
|
mem->ram = old.ram + size_delta;
|
2013-05-29 10:27:26 +02:00
|
|
|
mem->flags = kvm_mem_flags(s, log_dirty, readonly_flag);
|
2010-01-27 21:07:08 +01:00
|
|
|
|
|
|
|
err = kvm_set_user_memory_region(s, mem);
|
|
|
|
if (err) {
|
|
|
|
fprintf(stderr, "%s: error registering suffix slot: %s\n",
|
|
|
|
__func__, strerror(-err));
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
/* in case the KVM bug workaround already "consumed" the new slot */
|
2011-01-04 09:32:13 +01:00
|
|
|
if (!size) {
|
2010-01-27 21:07:08 +01:00
|
|
|
return;
|
2011-01-04 09:32:13 +01:00
|
|
|
}
|
2011-12-18 13:06:05 +01:00
|
|
|
if (!add) {
|
2010-01-27 21:07:08 +01:00
|
|
|
return;
|
2011-01-04 09:32:13 +01:00
|
|
|
}
|
2010-01-27 21:07:08 +01:00
|
|
|
mem = kvm_alloc_slot(s);
|
|
|
|
mem->memory_size = size;
|
|
|
|
mem->start_addr = start_addr;
|
2011-12-15 18:55:26 +01:00
|
|
|
mem->ram = ram;
|
2013-05-29 10:27:26 +02:00
|
|
|
mem->flags = kvm_mem_flags(s, log_dirty, readonly_flag);
|
2010-01-27 21:07:08 +01:00
|
|
|
|
|
|
|
err = kvm_set_user_memory_region(s, mem);
|
|
|
|
if (err) {
|
|
|
|
fprintf(stderr, "%s: error registering slot: %s\n", __func__,
|
|
|
|
strerror(-err));
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-12-18 13:06:05 +01:00
|
|
|
static void kvm_region_add(MemoryListener *listener,
|
|
|
|
MemoryRegionSection *section)
|
|
|
|
{
|
2013-05-06 10:46:11 +02:00
|
|
|
memory_region_ref(section->mr);
|
2011-12-18 13:06:05 +01:00
|
|
|
kvm_set_phys_mem(section, true);
|
|
|
|
}
|
|
|
|
|
|
|
|
static void kvm_region_del(MemoryListener *listener,
|
|
|
|
MemoryRegionSection *section)
|
|
|
|
{
|
|
|
|
kvm_set_phys_mem(section, false);
|
2013-05-06 10:46:11 +02:00
|
|
|
memory_region_unref(section->mr);
|
2011-12-18 13:06:05 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
static void kvm_log_sync(MemoryListener *listener,
|
|
|
|
MemoryRegionSection *section)
|
2010-01-27 21:07:21 +01:00
|
|
|
{
|
2011-12-18 13:06:05 +01:00
|
|
|
int r;
|
|
|
|
|
2011-12-19 12:18:13 +01:00
|
|
|
r = kvm_physical_sync_dirty_bitmap(section);
|
2011-12-18 13:06:05 +01:00
|
|
|
if (r < 0) {
|
|
|
|
abort();
|
|
|
|
}
|
2010-01-27 21:07:21 +01:00
|
|
|
}
|
|
|
|
|
2011-12-18 13:06:05 +01:00
|
|
|
static void kvm_log_global_start(struct MemoryListener *listener)
|
2010-01-27 21:07:21 +01:00
|
|
|
{
|
2011-12-18 13:06:05 +01:00
|
|
|
int r;
|
|
|
|
|
|
|
|
r = kvm_set_migration_log(1);
|
|
|
|
assert(r >= 0);
|
2010-01-27 21:07:21 +01:00
|
|
|
}
|
|
|
|
|
2011-12-18 13:06:05 +01:00
|
|
|
static void kvm_log_global_stop(struct MemoryListener *listener)
|
2010-01-27 21:07:21 +01:00
|
|
|
{
|
2011-12-18 13:06:05 +01:00
|
|
|
int r;
|
|
|
|
|
|
|
|
r = kvm_set_migration_log(0);
|
|
|
|
assert(r >= 0);
|
2010-01-27 21:07:21 +01:00
|
|
|
}
|
|
|
|
|
2012-09-30 22:21:11 +02:00
|
|
|
static void kvm_mem_ioeventfd_add(MemoryListener *listener,
|
|
|
|
MemoryRegionSection *section,
|
|
|
|
bool match_data, uint64_t data,
|
|
|
|
EventNotifier *e)
|
|
|
|
{
|
|
|
|
int fd = event_notifier_get_fd(e);
|
2012-02-08 15:39:06 +01:00
|
|
|
int r;
|
|
|
|
|
2012-03-20 13:31:38 +01:00
|
|
|
r = kvm_set_ioeventfd_mmio(fd, section->offset_within_address_space,
|
2013-05-27 10:08:27 +02:00
|
|
|
data, true, int128_get64(section->size),
|
|
|
|
match_data);
|
2012-02-08 15:39:06 +01:00
|
|
|
if (r < 0) {
|
2013-05-22 06:57:35 +02:00
|
|
|
fprintf(stderr, "%s: error adding ioeventfd: %s\n",
|
|
|
|
__func__, strerror(-r));
|
2012-02-08 15:39:06 +01:00
|
|
|
abort();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-09-30 22:21:11 +02:00
|
|
|
static void kvm_mem_ioeventfd_del(MemoryListener *listener,
|
|
|
|
MemoryRegionSection *section,
|
|
|
|
bool match_data, uint64_t data,
|
|
|
|
EventNotifier *e)
|
2012-02-08 15:39:06 +01:00
|
|
|
{
|
2012-09-30 22:21:11 +02:00
|
|
|
int fd = event_notifier_get_fd(e);
|
2012-02-08 15:39:06 +01:00
|
|
|
int r;
|
|
|
|
|
2012-03-20 13:31:38 +01:00
|
|
|
r = kvm_set_ioeventfd_mmio(fd, section->offset_within_address_space,
|
2013-05-27 10:08:27 +02:00
|
|
|
data, false, int128_get64(section->size),
|
|
|
|
match_data);
|
2012-02-08 15:39:06 +01:00
|
|
|
if (r < 0) {
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-09-30 22:21:11 +02:00
|
|
|
static void kvm_io_ioeventfd_add(MemoryListener *listener,
|
|
|
|
MemoryRegionSection *section,
|
|
|
|
bool match_data, uint64_t data,
|
|
|
|
EventNotifier *e)
|
2012-02-08 15:39:06 +01:00
|
|
|
{
|
2012-09-30 22:21:11 +02:00
|
|
|
int fd = event_notifier_get_fd(e);
|
2012-02-08 15:39:06 +01:00
|
|
|
int r;
|
|
|
|
|
2013-04-01 23:54:45 +02:00
|
|
|
r = kvm_set_ioeventfd_pio(fd, section->offset_within_address_space,
|
2013-05-27 10:08:27 +02:00
|
|
|
data, true, int128_get64(section->size),
|
|
|
|
match_data);
|
2012-02-08 15:39:06 +01:00
|
|
|
if (r < 0) {
|
2013-05-22 06:57:35 +02:00
|
|
|
fprintf(stderr, "%s: error adding ioeventfd: %s\n",
|
|
|
|
__func__, strerror(-r));
|
2012-02-08 15:39:06 +01:00
|
|
|
abort();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-09-30 22:21:11 +02:00
|
|
|
static void kvm_io_ioeventfd_del(MemoryListener *listener,
|
|
|
|
MemoryRegionSection *section,
|
|
|
|
bool match_data, uint64_t data,
|
|
|
|
EventNotifier *e)
|
2012-02-08 15:39:06 +01:00
|
|
|
|
|
|
|
{
|
2012-09-30 22:21:11 +02:00
|
|
|
int fd = event_notifier_get_fd(e);
|
2012-02-08 15:39:06 +01:00
|
|
|
int r;
|
|
|
|
|
2013-04-01 23:54:45 +02:00
|
|
|
r = kvm_set_ioeventfd_pio(fd, section->offset_within_address_space,
|
2013-05-27 10:08:27 +02:00
|
|
|
data, false, int128_get64(section->size),
|
|
|
|
match_data);
|
2012-02-08 15:39:06 +01:00
|
|
|
if (r < 0) {
|
|
|
|
abort();
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2011-12-18 13:06:05 +01:00
|
|
|
static MemoryListener kvm_memory_listener = {
|
|
|
|
.region_add = kvm_region_add,
|
|
|
|
.region_del = kvm_region_del,
|
2011-02-07 12:19:23 +01:00
|
|
|
.log_start = kvm_log_start,
|
|
|
|
.log_stop = kvm_log_stop,
|
2011-12-18 13:06:05 +01:00
|
|
|
.log_sync = kvm_log_sync,
|
|
|
|
.log_global_start = kvm_log_global_start,
|
|
|
|
.log_global_stop = kvm_log_global_stop,
|
2012-09-30 22:21:11 +02:00
|
|
|
.eventfd_add = kvm_mem_ioeventfd_add,
|
|
|
|
.eventfd_del = kvm_mem_ioeventfd_del,
|
2012-10-02 18:21:54 +02:00
|
|
|
.coalesced_mmio_add = kvm_coalesce_mmio_region,
|
|
|
|
.coalesced_mmio_del = kvm_uncoalesce_mmio_region,
|
2012-09-30 22:21:11 +02:00
|
|
|
.priority = 10,
|
|
|
|
};
|
|
|
|
|
|
|
|
static MemoryListener kvm_io_listener = {
|
|
|
|
.eventfd_add = kvm_io_ioeventfd_add,
|
|
|
|
.eventfd_del = kvm_io_ioeventfd_del,
|
2012-02-08 14:05:50 +01:00
|
|
|
.priority = 10,
|
2010-01-27 21:07:21 +01:00
|
|
|
};
|
|
|
|
|
2013-01-18 15:03:43 +01:00
|
|
|
static void kvm_handle_interrupt(CPUState *cpu, int mask)
|
2011-04-13 01:32:56 +02:00
|
|
|
{
|
2013-01-17 18:51:17 +01:00
|
|
|
cpu->interrupt_request |= mask;
|
2011-04-13 01:32:56 +02:00
|
|
|
|
2012-05-02 22:23:49 +02:00
|
|
|
if (!qemu_cpu_is_self(cpu)) {
|
2012-05-03 04:34:15 +02:00
|
|
|
qemu_cpu_kick(cpu);
|
2011-04-13 01:32:56 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2012-07-26 16:35:12 +02:00
|
|
|
int kvm_set_irq(KVMState *s, int irq, int level)
|
2011-10-15 11:49:47 +02:00
|
|
|
{
|
|
|
|
struct kvm_irq_level event;
|
|
|
|
int ret;
|
|
|
|
|
2012-07-26 16:35:11 +02:00
|
|
|
assert(kvm_async_interrupts_enabled());
|
2011-10-15 11:49:47 +02:00
|
|
|
|
|
|
|
event.level = level;
|
|
|
|
event.irq = irq;
|
2012-08-24 13:34:47 +02:00
|
|
|
ret = kvm_vm_ioctl(s, s->irq_set_ioctl, &event);
|
2011-10-15 11:49:47 +02:00
|
|
|
if (ret < 0) {
|
2012-07-26 16:35:12 +02:00
|
|
|
perror("kvm_set_irq");
|
2011-10-15 11:49:47 +02:00
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
2012-08-24 13:34:47 +02:00
|
|
|
return (s->irq_set_ioctl == KVM_IRQ_LINE) ? 1 : event.status;
|
2011-10-15 11:49:47 +02:00
|
|
|
}
|
|
|
|
|
|
|
|
#ifdef KVM_CAP_IRQ_ROUTING
|
2012-06-05 21:03:57 +02:00
|
|
|
typedef struct KVMMSIRoute {
|
|
|
|
struct kvm_irq_routing_entry kroute;
|
|
|
|
QTAILQ_ENTRY(KVMMSIRoute) entry;
|
|
|
|
} KVMMSIRoute;
|
|
|
|
|
2011-10-15 11:49:47 +02:00
|
|
|
static void set_gsi(KVMState *s, unsigned int gsi)
|
|
|
|
{
|
|
|
|
s->used_gsi_bitmap[gsi / 32] |= 1U << (gsi % 32);
|
|
|
|
}
|
|
|
|
|
2012-05-16 20:41:10 +02:00
|
|
|
static void clear_gsi(KVMState *s, unsigned int gsi)
|
|
|
|
{
|
|
|
|
s->used_gsi_bitmap[gsi / 32] &= ~(1U << (gsi % 32));
|
|
|
|
}
|
|
|
|
|
2013-04-16 15:58:13 +02:00
|
|
|
void kvm_init_irq_routing(KVMState *s)
|
2011-10-15 11:49:47 +02:00
|
|
|
{
|
2012-05-16 20:41:10 +02:00
|
|
|
int gsi_count, i;
|
2011-10-15 11:49:47 +02:00
|
|
|
|
|
|
|
gsi_count = kvm_check_extension(s, KVM_CAP_IRQ_ROUTING);
|
|
|
|
if (gsi_count > 0) {
|
|
|
|
unsigned int gsi_bits, i;
|
|
|
|
|
|
|
|
/* Round up so we can search ints using ffs */
|
2012-03-28 20:18:05 +02:00
|
|
|
gsi_bits = ALIGN(gsi_count, 32);
|
2011-10-15 11:49:47 +02:00
|
|
|
s->used_gsi_bitmap = g_malloc0(gsi_bits / 8);
|
2012-05-16 20:41:08 +02:00
|
|
|
s->gsi_count = gsi_count;
|
2011-10-15 11:49:47 +02:00
|
|
|
|
|
|
|
/* Mark any over-allocated bits as already in use */
|
|
|
|
for (i = gsi_count; i < gsi_bits; i++) {
|
|
|
|
set_gsi(s, i);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
s->irq_routes = g_malloc0(sizeof(*s->irq_routes));
|
|
|
|
s->nr_allocated_irq_routes = 0;
|
|
|
|
|
2012-05-16 20:41:14 +02:00
|
|
|
if (!s->direct_msi) {
|
|
|
|
for (i = 0; i < KVM_MSI_HASHTAB_SIZE; i++) {
|
|
|
|
QTAILQ_INIT(&s->msi_hashtab[i]);
|
|
|
|
}
|
2012-05-16 20:41:10 +02:00
|
|
|
}
|
|
|
|
|
2011-10-15 11:49:47 +02:00
|
|
|
kvm_arch_init_irq_routing(s);
|
|
|
|
}
|
|
|
|
|
2013-04-17 01:11:55 +02:00
|
|
|
void kvm_irqchip_commit_routes(KVMState *s)
|
2012-05-17 15:32:35 +02:00
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
|
|
|
s->irq_routes->flags = 0;
|
|
|
|
ret = kvm_vm_ioctl(s, KVM_SET_GSI_ROUTING, s->irq_routes);
|
|
|
|
assert(ret == 0);
|
|
|
|
}
|
|
|
|
|
2011-10-15 11:49:47 +02:00
|
|
|
static void kvm_add_routing_entry(KVMState *s,
|
|
|
|
struct kvm_irq_routing_entry *entry)
|
|
|
|
{
|
|
|
|
struct kvm_irq_routing_entry *new;
|
|
|
|
int n, size;
|
|
|
|
|
|
|
|
if (s->irq_routes->nr == s->nr_allocated_irq_routes) {
|
|
|
|
n = s->nr_allocated_irq_routes * 2;
|
|
|
|
if (n < 64) {
|
|
|
|
n = 64;
|
|
|
|
}
|
|
|
|
size = sizeof(struct kvm_irq_routing);
|
|
|
|
size += n * sizeof(*new);
|
|
|
|
s->irq_routes = g_realloc(s->irq_routes, size);
|
|
|
|
s->nr_allocated_irq_routes = n;
|
|
|
|
}
|
|
|
|
n = s->irq_routes->nr++;
|
|
|
|
new = &s->irq_routes->entries[n];
|
2013-06-04 13:52:32 +02:00
|
|
|
|
|
|
|
*new = *entry;
|
2011-10-15 11:49:47 +02:00
|
|
|
|
|
|
|
set_gsi(s, entry->gsi);
|
|
|
|
}
|
|
|
|
|
2012-08-27 08:28:38 +02:00
|
|
|
static int kvm_update_routing_entry(KVMState *s,
|
|
|
|
struct kvm_irq_routing_entry *new_entry)
|
|
|
|
{
|
|
|
|
struct kvm_irq_routing_entry *entry;
|
|
|
|
int n;
|
|
|
|
|
|
|
|
for (n = 0; n < s->irq_routes->nr; n++) {
|
|
|
|
entry = &s->irq_routes->entries[n];
|
|
|
|
if (entry->gsi != new_entry->gsi) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
2013-06-04 13:52:35 +02:00
|
|
|
if(!memcmp(entry, new_entry, sizeof *entry)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-06-04 13:52:32 +02:00
|
|
|
*entry = *new_entry;
|
2012-08-27 08:28:38 +02:00
|
|
|
|
|
|
|
kvm_irqchip_commit_routes(s);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
return -ESRCH;
|
|
|
|
}
|
|
|
|
|
2012-05-17 15:32:32 +02:00
|
|
|
void kvm_irqchip_add_irq_route(KVMState *s, int irq, int irqchip, int pin)
|
2011-10-15 11:49:47 +02:00
|
|
|
{
|
2013-06-04 13:52:32 +02:00
|
|
|
struct kvm_irq_routing_entry e = {};
|
2011-10-15 11:49:47 +02:00
|
|
|
|
2012-05-16 20:41:08 +02:00
|
|
|
assert(pin < s->gsi_count);
|
|
|
|
|
2011-10-15 11:49:47 +02:00
|
|
|
e.gsi = irq;
|
|
|
|
e.type = KVM_IRQ_ROUTING_IRQCHIP;
|
|
|
|
e.flags = 0;
|
|
|
|
e.u.irqchip.irqchip = irqchip;
|
|
|
|
e.u.irqchip.pin = pin;
|
|
|
|
kvm_add_routing_entry(s, &e);
|
|
|
|
}
|
|
|
|
|
2012-05-17 15:32:34 +02:00
|
|
|
void kvm_irqchip_release_virq(KVMState *s, int virq)
|
2012-05-16 20:41:10 +02:00
|
|
|
{
|
|
|
|
struct kvm_irq_routing_entry *e;
|
|
|
|
int i;
|
|
|
|
|
2013-09-03 10:08:25 +02:00
|
|
|
if (kvm_gsi_direct_mapping()) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
2012-05-16 20:41:10 +02:00
|
|
|
for (i = 0; i < s->irq_routes->nr; i++) {
|
|
|
|
e = &s->irq_routes->entries[i];
|
|
|
|
if (e->gsi == virq) {
|
|
|
|
s->irq_routes->nr--;
|
|
|
|
*e = s->irq_routes->entries[s->irq_routes->nr];
|
|
|
|
}
|
|
|
|
}
|
|
|
|
clear_gsi(s, virq);
|
|
|
|
}
|
|
|
|
|
|
|
|
static unsigned int kvm_hash_msi(uint32_t data)
|
|
|
|
{
|
|
|
|
/* This is optimized for IA32 MSI layout. However, no other arch shall
|
|
|
|
* repeat the mistake of not providing a direct MSI injection API. */
|
|
|
|
return data & 0xff;
|
|
|
|
}
|
|
|
|
|
|
|
|
static void kvm_flush_dynamic_msi_routes(KVMState *s)
|
|
|
|
{
|
|
|
|
KVMMSIRoute *route, *next;
|
|
|
|
unsigned int hash;
|
|
|
|
|
|
|
|
for (hash = 0; hash < KVM_MSI_HASHTAB_SIZE; hash++) {
|
|
|
|
QTAILQ_FOREACH_SAFE(route, &s->msi_hashtab[hash], entry, next) {
|
|
|
|
kvm_irqchip_release_virq(s, route->kroute.gsi);
|
|
|
|
QTAILQ_REMOVE(&s->msi_hashtab[hash], route, entry);
|
|
|
|
g_free(route);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
static int kvm_irqchip_get_virq(KVMState *s)
|
|
|
|
{
|
|
|
|
uint32_t *word = s->used_gsi_bitmap;
|
|
|
|
int max_words = ALIGN(s->gsi_count, 32) / 32;
|
|
|
|
int i, bit;
|
|
|
|
bool retry = true;
|
|
|
|
|
|
|
|
again:
|
|
|
|
/* Return the lowest unused GSI in the bitmap */
|
|
|
|
for (i = 0; i < max_words; i++) {
|
|
|
|
bit = ffs(~word[i]);
|
|
|
|
if (!bit) {
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
return bit - 1 + i * 32;
|
|
|
|
}
|
2012-05-16 20:41:14 +02:00
|
|
|
if (!s->direct_msi && retry) {
|
2012-05-16 20:41:10 +02:00
|
|
|
retry = false;
|
|
|
|
kvm_flush_dynamic_msi_routes(s);
|
|
|
|
goto again;
|
|
|
|
}
|
|
|
|
return -ENOSPC;
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
static KVMMSIRoute *kvm_lookup_msi_route(KVMState *s, MSIMessage msg)
|
|
|
|
{
|
|
|
|
unsigned int hash = kvm_hash_msi(msg.data);
|
|
|
|
KVMMSIRoute *route;
|
|
|
|
|
|
|
|
QTAILQ_FOREACH(route, &s->msi_hashtab[hash], entry) {
|
|
|
|
if (route->kroute.u.msi.address_lo == (uint32_t)msg.address &&
|
|
|
|
route->kroute.u.msi.address_hi == (msg.address >> 32) &&
|
2013-04-16 15:05:22 +02:00
|
|
|
route->kroute.u.msi.data == le32_to_cpu(msg.data)) {
|
2012-05-16 20:41:10 +02:00
|
|
|
return route;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
|
|
|
int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg)
|
|
|
|
{
|
2012-05-16 20:41:14 +02:00
|
|
|
struct kvm_msi msi;
|
2012-05-16 20:41:10 +02:00
|
|
|
KVMMSIRoute *route;
|
|
|
|
|
2012-05-16 20:41:14 +02:00
|
|
|
if (s->direct_msi) {
|
|
|
|
msi.address_lo = (uint32_t)msg.address;
|
|
|
|
msi.address_hi = msg.address >> 32;
|
2013-04-16 15:05:22 +02:00
|
|
|
msi.data = le32_to_cpu(msg.data);
|
2012-05-16 20:41:14 +02:00
|
|
|
msi.flags = 0;
|
|
|
|
memset(msi.pad, 0, sizeof(msi.pad));
|
|
|
|
|
|
|
|
return kvm_vm_ioctl(s, KVM_SIGNAL_MSI, &msi);
|
|
|
|
}
|
|
|
|
|
2012-05-16 20:41:10 +02:00
|
|
|
route = kvm_lookup_msi_route(s, msg);
|
|
|
|
if (!route) {
|
2012-05-17 15:32:35 +02:00
|
|
|
int virq;
|
2012-05-16 20:41:10 +02:00
|
|
|
|
|
|
|
virq = kvm_irqchip_get_virq(s);
|
|
|
|
if (virq < 0) {
|
|
|
|
return virq;
|
|
|
|
}
|
|
|
|
|
2013-06-04 13:52:32 +02:00
|
|
|
route = g_malloc0(sizeof(KVMMSIRoute));
|
2012-05-16 20:41:10 +02:00
|
|
|
route->kroute.gsi = virq;
|
|
|
|
route->kroute.type = KVM_IRQ_ROUTING_MSI;
|
|
|
|
route->kroute.flags = 0;
|
|
|
|
route->kroute.u.msi.address_lo = (uint32_t)msg.address;
|
|
|
|
route->kroute.u.msi.address_hi = msg.address >> 32;
|
2013-04-16 15:05:22 +02:00
|
|
|
route->kroute.u.msi.data = le32_to_cpu(msg.data);
|
2012-05-16 20:41:10 +02:00
|
|
|
|
|
|
|
kvm_add_routing_entry(s, &route->kroute);
|
2013-04-17 01:11:55 +02:00
|
|
|
kvm_irqchip_commit_routes(s);
|
2012-05-16 20:41:10 +02:00
|
|
|
|
|
|
|
QTAILQ_INSERT_TAIL(&s->msi_hashtab[kvm_hash_msi(msg.data)], route,
|
|
|
|
entry);
|
|
|
|
}
|
|
|
|
|
|
|
|
assert(route->kroute.type == KVM_IRQ_ROUTING_MSI);
|
|
|
|
|
2012-07-26 16:35:12 +02:00
|
|
|
return kvm_set_irq(s, route->kroute.gsi, 1);
|
2012-05-16 20:41:10 +02:00
|
|
|
}
|
|
|
|
|
2012-05-17 15:32:33 +02:00
|
|
|
int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg)
|
|
|
|
{
|
2013-06-04 13:52:32 +02:00
|
|
|
struct kvm_irq_routing_entry kroute = {};
|
2012-05-17 15:32:33 +02:00
|
|
|
int virq;
|
|
|
|
|
2013-09-03 10:08:25 +02:00
|
|
|
if (kvm_gsi_direct_mapping()) {
|
|
|
|
return msg.data & 0xffff;
|
|
|
|
}
|
|
|
|
|
2012-07-26 16:35:16 +02:00
|
|
|
if (!kvm_gsi_routing_enabled()) {
|
2012-05-17 15:32:33 +02:00
|
|
|
return -ENOSYS;
|
|
|
|
}
|
|
|
|
|
|
|
|
virq = kvm_irqchip_get_virq(s);
|
|
|
|
if (virq < 0) {
|
|
|
|
return virq;
|
|
|
|
}
|
|
|
|
|
|
|
|
kroute.gsi = virq;
|
|
|
|
kroute.type = KVM_IRQ_ROUTING_MSI;
|
|
|
|
kroute.flags = 0;
|
|
|
|
kroute.u.msi.address_lo = (uint32_t)msg.address;
|
|
|
|
kroute.u.msi.address_hi = msg.address >> 32;
|
2013-04-16 15:05:22 +02:00
|
|
|
kroute.u.msi.data = le32_to_cpu(msg.data);
|
2012-05-17 15:32:33 +02:00
|
|
|
|
|
|
|
kvm_add_routing_entry(s, &kroute);
|
2013-04-17 01:11:55 +02:00
|
|
|
kvm_irqchip_commit_routes(s);
|
2012-05-17 15:32:33 +02:00
|
|
|
|
|
|
|
return virq;
|
|
|
|
}
|
|
|
|
|
2012-08-27 08:28:38 +02:00
|
|
|
int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg)
|
|
|
|
{
|
2013-06-04 13:52:32 +02:00
|
|
|
struct kvm_irq_routing_entry kroute = {};
|
2012-08-27 08:28:38 +02:00
|
|
|
|
2013-09-03 10:08:25 +02:00
|
|
|
if (kvm_gsi_direct_mapping()) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-08-27 08:28:38 +02:00
|
|
|
if (!kvm_irqchip_in_kernel()) {
|
|
|
|
return -ENOSYS;
|
|
|
|
}
|
|
|
|
|
|
|
|
kroute.gsi = virq;
|
|
|
|
kroute.type = KVM_IRQ_ROUTING_MSI;
|
|
|
|
kroute.flags = 0;
|
|
|
|
kroute.u.msi.address_lo = (uint32_t)msg.address;
|
|
|
|
kroute.u.msi.address_hi = msg.address >> 32;
|
2013-04-16 15:05:22 +02:00
|
|
|
kroute.u.msi.data = le32_to_cpu(msg.data);
|
2012-08-27 08:28:38 +02:00
|
|
|
|
|
|
|
return kvm_update_routing_entry(s, &kroute);
|
|
|
|
}
|
|
|
|
|
2013-07-22 11:51:33 +02:00
|
|
|
static int kvm_irqchip_assign_irqfd(KVMState *s, int fd, int rfd, int virq,
|
|
|
|
bool assign)
|
2012-05-17 15:32:36 +02:00
|
|
|
{
|
|
|
|
struct kvm_irqfd irqfd = {
|
|
|
|
.fd = fd,
|
|
|
|
.gsi = virq,
|
|
|
|
.flags = assign ? 0 : KVM_IRQFD_FLAG_DEASSIGN,
|
|
|
|
};
|
|
|
|
|
2013-07-22 11:51:33 +02:00
|
|
|
if (rfd != -1) {
|
|
|
|
irqfd.flags |= KVM_IRQFD_FLAG_RESAMPLE;
|
|
|
|
irqfd.resamplefd = rfd;
|
|
|
|
}
|
|
|
|
|
2012-07-26 16:35:14 +02:00
|
|
|
if (!kvm_irqfds_enabled()) {
|
2012-05-17 15:32:36 +02:00
|
|
|
return -ENOSYS;
|
|
|
|
}
|
|
|
|
|
|
|
|
return kvm_vm_ioctl(s, KVM_IRQFD, &irqfd);
|
|
|
|
}
|
|
|
|
|
2011-10-15 11:49:47 +02:00
|
|
|
#else /* !KVM_CAP_IRQ_ROUTING */
|
|
|
|
|
2013-04-16 15:58:13 +02:00
|
|
|
void kvm_init_irq_routing(KVMState *s)
|
2011-10-15 11:49:47 +02:00
|
|
|
{
|
|
|
|
}
|
2012-05-16 20:41:10 +02:00
|
|
|
|
2012-06-05 21:03:57 +02:00
|
|
|
void kvm_irqchip_release_virq(KVMState *s, int virq)
|
|
|
|
{
|
|
|
|
}
|
|
|
|
|
2012-05-16 20:41:10 +02:00
|
|
|
int kvm_irqchip_send_msi(KVMState *s, MSIMessage msg)
|
|
|
|
{
|
|
|
|
abort();
|
|
|
|
}
|
2012-05-17 15:32:33 +02:00
|
|
|
|
|
|
|
int kvm_irqchip_add_msi_route(KVMState *s, MSIMessage msg)
|
|
|
|
{
|
2012-06-25 17:40:39 +02:00
|
|
|
return -ENOSYS;
|
2012-05-17 15:32:33 +02:00
|
|
|
}
|
2012-05-17 15:32:36 +02:00
|
|
|
|
|
|
|
static int kvm_irqchip_assign_irqfd(KVMState *s, int fd, int virq, bool assign)
|
|
|
|
{
|
|
|
|
abort();
|
|
|
|
}
|
2013-01-15 18:50:13 +01:00
|
|
|
|
|
|
|
int kvm_irqchip_update_msi_route(KVMState *s, int virq, MSIMessage msg)
|
|
|
|
{
|
|
|
|
return -ENOSYS;
|
|
|
|
}
|
2011-10-15 11:49:47 +02:00
|
|
|
#endif /* !KVM_CAP_IRQ_ROUTING */
|
|
|
|
|
2013-07-22 11:51:33 +02:00
|
|
|
int kvm_irqchip_add_irqfd_notifier(KVMState *s, EventNotifier *n,
|
|
|
|
EventNotifier *rn, int virq)
|
2012-05-17 15:32:36 +02:00
|
|
|
{
|
2013-07-22 11:51:33 +02:00
|
|
|
return kvm_irqchip_assign_irqfd(s, event_notifier_get_fd(n),
|
|
|
|
rn ? event_notifier_get_fd(rn) : -1, virq, true);
|
2012-05-17 15:32:36 +02:00
|
|
|
}
|
|
|
|
|
2012-08-20 10:55:56 +02:00
|
|
|
int kvm_irqchip_remove_irqfd_notifier(KVMState *s, EventNotifier *n, int virq)
|
2012-07-05 17:16:30 +02:00
|
|
|
{
|
2013-07-22 11:51:33 +02:00
|
|
|
return kvm_irqchip_assign_irqfd(s, event_notifier_get_fd(n), -1, virq,
|
|
|
|
false);
|
2012-07-05 17:16:30 +02:00
|
|
|
}
|
|
|
|
|
2011-10-15 11:49:47 +02:00
|
|
|
static int kvm_irqchip_create(KVMState *s)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
|
Fix -machine options accel, kernel_irqchip, kvm_shadow_mem
Multiple -machine options with the same ID are merged. All but the
one without an ID are to be silently ignored.
In most places, we query these options with a null ID. This is
correct.
In some places, we instead query whatever options come first in the
list. This is wrong. When the -machine processed first happens to
have an ID, options are taken from that ID, and the ones specified
without ID are silently ignored.
Example:
$ upstream-qemu -nodefaults -S -display none -monitor stdio -machine id=foo -machine accel=kvm,usb=on
$ upstream-qemu -nodefaults -S -display none -monitor stdio -machine id=foo,accel=kvm,usb=on -machine accel=xen
$ upstream-qemu -nodefaults -S -display none -monitor stdio -machine accel=xen -machine id=foo,accel=kvm,usb=on
$ qemu-system-x86_64 -nodefaults -S -display none -monitor stdio -machine accel=kvm,usb=on
QEMU 1.5.50 monitor - type 'help' for more information
(qemu) info kvm
kvm support: enabled
(qemu) info usb
(qemu) q
$ qemu-system-x86_64 -nodefaults -S -display none -monitor stdio -machine id=foo -machine accel=kvm,usb=on
QEMU 1.5.50 monitor - type 'help' for more information
(qemu) info kvm
kvm support: disabled
(qemu) info usb
(qemu) q
$ qemu-system-x86_64 -nodefaults -S -display none -monitor stdio -machine id=foo,accel=kvm,usb=on -machine accel=xen
QEMU 1.5.50 monitor - type 'help' for more information
(qemu) info kvm
kvm support: enabled
(qemu) info usb
USB support not enabled
(qemu) q
$ qemu-system-x86_64 -nodefaults -S -display none -monitor stdio -machine accel=xen -machine id=foo,accel=kvm,usb=on
xc: error: Could not obtain handle on privileged command interface (2 = No such file or directory): Internal error
xen be core: can't open xen interface
failed to initialize Xen: Operation not permitted
Option usb is queried correctly, and the one without an ID wins,
regardless of option order.
Option accel is queried incorrectly, and which one wins depends on
option order and ID.
Affected options are accel (and its sugared forms -enable-kvm and
-no-kvm), kernel_irqchip, kvm_shadow_mem.
Additionally, option kernel_irqchip is normally on by default, except
it's off when no -machine options are given. Bug can't bite, because
kernel_irqchip is used only when KVM is enabled, KVM is off by
default, and enabling always creates -machine options. Downstreams
that enable KVM by default do get bitten, though.
Use qemu_get_machine_opts() to fix these bugs.
Signed-off-by: Markus Armbruster <armbru@redhat.com>
Message-id: 1372943363-24081-5-git-send-email-armbru@redhat.com
Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>
2013-07-04 15:09:20 +02:00
|
|
|
if (!qemu_opt_get_bool(qemu_get_machine_opts(), "kernel_irqchip", true) ||
|
2011-10-15 11:49:47 +02:00
|
|
|
!kvm_check_extension(s, KVM_CAP_IRQCHIP)) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2014-02-26 18:20:00 +01:00
|
|
|
/* First probe and see if there's a arch-specific hook to create the
|
|
|
|
* in-kernel irqchip for us */
|
|
|
|
ret = kvm_arch_irqchip_create(s);
|
2011-10-15 11:49:47 +02:00
|
|
|
if (ret < 0) {
|
|
|
|
return ret;
|
2014-02-26 18:20:00 +01:00
|
|
|
} else if (ret == 0) {
|
|
|
|
ret = kvm_vm_ioctl(s, KVM_CREATE_IRQCHIP);
|
|
|
|
if (ret < 0) {
|
|
|
|
fprintf(stderr, "Create kernel irqchip failed\n");
|
|
|
|
return ret;
|
|
|
|
}
|
2011-10-15 11:49:47 +02:00
|
|
|
}
|
|
|
|
|
2012-01-31 19:17:52 +01:00
|
|
|
kvm_kernel_irqchip = true;
|
2012-07-26 16:35:11 +02:00
|
|
|
/* If we have an in-kernel IRQ chip then we must have asynchronous
|
|
|
|
* interrupt delivery (though the reverse is not necessarily true)
|
|
|
|
*/
|
|
|
|
kvm_async_interrupts_allowed = true;
|
2013-04-24 22:24:12 +02:00
|
|
|
kvm_halt_in_kernel_allowed = true;
|
2011-10-15 11:49:47 +02:00
|
|
|
|
|
|
|
kvm_init_irq_routing(s);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-08-23 15:24:37 +02:00
|
|
|
/* Find number of supported CPUs using the recommended
|
|
|
|
* procedure from the kernel API documentation to cope with
|
|
|
|
* older kernels that may be missing capabilities.
|
|
|
|
*/
|
|
|
|
static int kvm_recommended_vcpus(KVMState *s)
|
2012-07-31 13:18:17 +02:00
|
|
|
{
|
2013-08-23 15:24:37 +02:00
|
|
|
int ret = kvm_check_extension(s, KVM_CAP_NR_VCPUS);
|
|
|
|
return (ret) ? ret : 4;
|
|
|
|
}
|
2012-07-31 13:18:17 +02:00
|
|
|
|
2013-08-23 15:24:37 +02:00
|
|
|
static int kvm_max_vcpus(KVMState *s)
|
|
|
|
{
|
|
|
|
int ret = kvm_check_extension(s, KVM_CAP_MAX_VCPUS);
|
|
|
|
return (ret) ? ret : kvm_recommended_vcpus(s);
|
2012-07-31 13:18:17 +02:00
|
|
|
}
|
|
|
|
|
2013-12-23 16:40:40 +01:00
|
|
|
int kvm_init(QEMUMachine *machine)
|
2008-11-05 17:29:27 +01:00
|
|
|
{
|
2009-06-07 11:30:25 +02:00
|
|
|
static const char upgrade_note[] =
|
|
|
|
"Please upgrade to at least kernel 2.6.29 or recent kvm-kmod\n"
|
|
|
|
"(see http://sourceforge.net/projects/kvm).\n";
|
2013-08-23 15:24:37 +02:00
|
|
|
struct {
|
|
|
|
const char *name;
|
|
|
|
int num;
|
|
|
|
} num_cpus[] = {
|
|
|
|
{ "SMP", smp_cpus },
|
|
|
|
{ "hotpluggable", max_cpus },
|
|
|
|
{ NULL, }
|
|
|
|
}, *nc = num_cpus;
|
|
|
|
int soft_vcpus_limit, hard_vcpus_limit;
|
2008-11-05 17:29:27 +01:00
|
|
|
KVMState *s;
|
2011-01-21 21:48:17 +01:00
|
|
|
const KVMCapabilityInfo *missing_cap;
|
2008-11-05 17:29:27 +01:00
|
|
|
int ret;
|
2013-12-23 16:40:40 +01:00
|
|
|
int i, type = 0;
|
|
|
|
const char *kvm_type;
|
2008-11-05 17:29:27 +01:00
|
|
|
|
2011-08-21 05:09:37 +02:00
|
|
|
s = g_malloc0(sizeof(KVMState));
|
2008-11-05 17:29:27 +01:00
|
|
|
|
2012-04-04 03:15:54 +02:00
|
|
|
/*
|
|
|
|
* On systems where the kernel can support different base page
|
|
|
|
* sizes, host page size may be different from TARGET_PAGE_SIZE,
|
|
|
|
* even with KVM. TARGET_PAGE_SIZE is assumed to be the minimum
|
|
|
|
* page size for the system though.
|
|
|
|
*/
|
|
|
|
assert(TARGET_PAGE_SIZE <= getpagesize());
|
2014-01-17 19:12:07 +01:00
|
|
|
page_size_init();
|
2012-04-04 03:15:54 +02:00
|
|
|
|
2009-03-12 21:12:48 +01:00
|
|
|
#ifdef KVM_CAP_SET_GUEST_DEBUG
|
2009-09-12 09:36:22 +02:00
|
|
|
QTAILQ_INIT(&s->kvm_sw_breakpoints);
|
2009-03-12 21:12:48 +01:00
|
|
|
#endif
|
2008-11-05 17:29:27 +01:00
|
|
|
s->vmfd = -1;
|
2009-12-02 12:24:42 +01:00
|
|
|
s->fd = qemu_open("/dev/kvm", O_RDWR);
|
2008-11-05 17:29:27 +01:00
|
|
|
if (s->fd == -1) {
|
|
|
|
fprintf(stderr, "Could not access KVM kernel module: %m\n");
|
|
|
|
ret = -errno;
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = kvm_ioctl(s, KVM_GET_API_VERSION, 0);
|
|
|
|
if (ret < KVM_API_VERSION) {
|
2011-01-04 09:32:13 +01:00
|
|
|
if (ret > 0) {
|
2008-11-05 17:29:27 +01:00
|
|
|
ret = -EINVAL;
|
2011-01-04 09:32:13 +01:00
|
|
|
}
|
2008-11-05 17:29:27 +01:00
|
|
|
fprintf(stderr, "kvm version too old\n");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ret > KVM_API_VERSION) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
fprintf(stderr, "kvm version not supported\n");
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2013-11-22 20:12:44 +01:00
|
|
|
s->nr_slots = kvm_check_extension(s, KVM_CAP_NR_MEMSLOTS);
|
|
|
|
|
|
|
|
/* If unspecified, use the default value */
|
|
|
|
if (!s->nr_slots) {
|
|
|
|
s->nr_slots = 32;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->slots = g_malloc0(s->nr_slots * sizeof(KVMSlot));
|
|
|
|
|
|
|
|
for (i = 0; i < s->nr_slots; i++) {
|
|
|
|
s->slots[i].slot = i;
|
|
|
|
}
|
|
|
|
|
2013-08-23 15:24:37 +02:00
|
|
|
/* check the vcpu limits */
|
|
|
|
soft_vcpus_limit = kvm_recommended_vcpus(s);
|
|
|
|
hard_vcpus_limit = kvm_max_vcpus(s);
|
2012-07-31 13:18:17 +02:00
|
|
|
|
2013-08-23 15:24:37 +02:00
|
|
|
while (nc->name) {
|
|
|
|
if (nc->num > soft_vcpus_limit) {
|
|
|
|
fprintf(stderr,
|
|
|
|
"Warning: Number of %s cpus requested (%d) exceeds "
|
|
|
|
"the recommended cpus supported by KVM (%d)\n",
|
|
|
|
nc->name, nc->num, soft_vcpus_limit);
|
|
|
|
|
|
|
|
if (nc->num > hard_vcpus_limit) {
|
|
|
|
ret = -EINVAL;
|
|
|
|
fprintf(stderr, "Number of %s cpus requested (%d) exceeds "
|
|
|
|
"the maximum cpus supported by KVM (%d)\n",
|
|
|
|
nc->name, nc->num, hard_vcpus_limit);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
nc++;
|
2013-08-12 21:56:31 +02:00
|
|
|
}
|
|
|
|
|
2013-12-23 16:40:40 +01:00
|
|
|
kvm_type = qemu_opt_get(qemu_get_machine_opts(), "kvm-type");
|
|
|
|
if (machine->kvm_type) {
|
|
|
|
type = machine->kvm_type(kvm_type);
|
|
|
|
} else if (kvm_type) {
|
|
|
|
fprintf(stderr, "Invalid argument kvm-type=%s\n", kvm_type);
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2014-01-09 22:14:23 +01:00
|
|
|
do {
|
2013-12-23 16:40:40 +01:00
|
|
|
ret = kvm_ioctl(s, KVM_CREATE_VM, type);
|
2014-01-09 22:14:23 +01:00
|
|
|
} while (ret == -EINTR);
|
|
|
|
|
|
|
|
if (ret < 0) {
|
2014-01-27 15:18:09 +01:00
|
|
|
fprintf(stderr, "ioctl(KVM_CREATE_VM) failed: %d %s\n", -ret,
|
2014-01-09 22:14:23 +01:00
|
|
|
strerror(-ret));
|
|
|
|
|
2010-04-01 18:42:37 +02:00
|
|
|
#ifdef TARGET_S390X
|
|
|
|
fprintf(stderr, "Please add the 'switch_amode' kernel parameter to "
|
|
|
|
"your host kernel command line\n");
|
|
|
|
#endif
|
2008-11-05 17:29:27 +01:00
|
|
|
goto err;
|
2010-04-01 18:42:37 +02:00
|
|
|
}
|
2008-11-05 17:29:27 +01:00
|
|
|
|
2014-01-09 22:14:23 +01:00
|
|
|
s->vmfd = ret;
|
2011-01-21 21:48:17 +01:00
|
|
|
missing_cap = kvm_check_extension_list(s, kvm_required_capabilites);
|
|
|
|
if (!missing_cap) {
|
|
|
|
missing_cap =
|
|
|
|
kvm_check_extension_list(s, kvm_arch_required_capabilities);
|
2008-11-05 17:29:27 +01:00
|
|
|
}
|
2011-01-21 21:48:17 +01:00
|
|
|
if (missing_cap) {
|
2009-05-08 22:33:24 +02:00
|
|
|
ret = -EINVAL;
|
2011-01-21 21:48:17 +01:00
|
|
|
fprintf(stderr, "kvm does not support %s\n%s",
|
|
|
|
missing_cap->name, upgrade_note);
|
2008-12-09 20:59:09 +01:00
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2009-05-08 22:33:24 +02:00
|
|
|
s->coalesced_mmio = kvm_check_extension(s, KVM_CAP_COALESCED_MMIO);
|
2008-12-09 21:09:57 +01:00
|
|
|
|
2009-05-01 20:42:15 +02:00
|
|
|
s->broken_set_mem_region = 1;
|
2010-12-10 08:52:36 +01:00
|
|
|
ret = kvm_check_extension(s, KVM_CAP_JOIN_MEMORY_REGIONS_WORKS);
|
2009-05-01 20:42:15 +02:00
|
|
|
if (ret > 0) {
|
|
|
|
s->broken_set_mem_region = 0;
|
|
|
|
}
|
|
|
|
|
2009-11-25 00:33:03 +01:00
|
|
|
#ifdef KVM_CAP_VCPU_EVENTS
|
|
|
|
s->vcpu_events = kvm_check_extension(s, KVM_CAP_VCPU_EVENTS);
|
|
|
|
#endif
|
|
|
|
|
2010-03-01 19:10:29 +01:00
|
|
|
s->robust_singlestep =
|
|
|
|
kvm_check_extension(s, KVM_CAP_X86_ROBUST_SINGLESTEP);
|
|
|
|
|
2010-03-12 15:20:49 +01:00
|
|
|
#ifdef KVM_CAP_DEBUGREGS
|
|
|
|
s->debugregs = kvm_check_extension(s, KVM_CAP_DEBUGREGS);
|
|
|
|
#endif
|
|
|
|
|
2010-06-17 11:53:07 +02:00
|
|
|
#ifdef KVM_CAP_XSAVE
|
|
|
|
s->xsave = kvm_check_extension(s, KVM_CAP_XSAVE);
|
|
|
|
#endif
|
|
|
|
|
|
|
|
#ifdef KVM_CAP_XCRS
|
|
|
|
s->xcrs = kvm_check_extension(s, KVM_CAP_XCRS);
|
|
|
|
#endif
|
|
|
|
|
2012-03-02 20:28:48 +01:00
|
|
|
#ifdef KVM_CAP_PIT_STATE2
|
|
|
|
s->pit_state2 = kvm_check_extension(s, KVM_CAP_PIT_STATE2);
|
|
|
|
#endif
|
|
|
|
|
2012-06-05 21:03:57 +02:00
|
|
|
#ifdef KVM_CAP_IRQ_ROUTING
|
2012-05-16 20:41:14 +02:00
|
|
|
s->direct_msi = (kvm_check_extension(s, KVM_CAP_SIGNAL_MSI) > 0);
|
2012-06-05 21:03:57 +02:00
|
|
|
#endif
|
2012-05-16 20:41:14 +02:00
|
|
|
|
2012-08-27 08:28:39 +02:00
|
|
|
s->intx_set_mask = kvm_check_extension(s, KVM_CAP_PCI_2_3);
|
|
|
|
|
2012-08-24 13:34:47 +02:00
|
|
|
s->irq_set_ioctl = KVM_IRQ_LINE;
|
2012-08-15 13:08:13 +02:00
|
|
|
if (kvm_check_extension(s, KVM_CAP_IRQ_INJECT_STATUS)) {
|
2012-08-24 13:34:47 +02:00
|
|
|
s->irq_set_ioctl = KVM_IRQ_LINE_STATUS;
|
2012-08-15 13:08:13 +02:00
|
|
|
}
|
|
|
|
|
2013-05-29 10:27:25 +02:00
|
|
|
#ifdef KVM_CAP_READONLY_MEM
|
|
|
|
kvm_readonly_mem_allowed =
|
|
|
|
(kvm_check_extension(s, KVM_CAP_READONLY_MEM) > 0);
|
|
|
|
#endif
|
|
|
|
|
2011-01-21 21:48:16 +01:00
|
|
|
ret = kvm_arch_init(s);
|
2011-01-04 09:32:13 +01:00
|
|
|
if (ret < 0) {
|
2008-11-05 17:29:27 +01:00
|
|
|
goto err;
|
2011-01-04 09:32:13 +01:00
|
|
|
}
|
2008-11-05 17:29:27 +01:00
|
|
|
|
2011-10-15 11:49:47 +02:00
|
|
|
ret = kvm_irqchip_create(s);
|
|
|
|
if (ret < 0) {
|
|
|
|
goto err;
|
|
|
|
}
|
|
|
|
|
2008-11-05 17:29:27 +01:00
|
|
|
kvm_state = s;
|
2012-10-02 20:13:51 +02:00
|
|
|
memory_listener_register(&kvm_memory_listener, &address_space_memory);
|
|
|
|
memory_listener_register(&kvm_io_listener, &address_space_io);
|
2008-11-05 17:29:27 +01:00
|
|
|
|
2011-01-10 12:50:05 +01:00
|
|
|
s->many_ioeventfds = kvm_check_many_ioeventfds();
|
|
|
|
|
2011-04-13 01:32:56 +02:00
|
|
|
cpu_interrupt_handler = kvm_handle_interrupt;
|
|
|
|
|
2008-11-05 17:29:27 +01:00
|
|
|
return 0;
|
|
|
|
|
|
|
|
err:
|
2012-09-03 22:40:40 +02:00
|
|
|
if (s->vmfd >= 0) {
|
|
|
|
close(s->vmfd);
|
|
|
|
}
|
|
|
|
if (s->fd != -1) {
|
|
|
|
close(s->fd);
|
2008-11-05 17:29:27 +01:00
|
|
|
}
|
2013-11-22 20:12:44 +01:00
|
|
|
g_free(s->slots);
|
2011-08-21 05:09:37 +02:00
|
|
|
g_free(s);
|
2008-11-05 17:29:27 +01:00
|
|
|
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2011-02-01 22:16:01 +01:00
|
|
|
static void kvm_handle_io(uint16_t port, void *data, int direction, int size,
|
|
|
|
uint32_t count)
|
2008-11-05 17:29:27 +01:00
|
|
|
{
|
|
|
|
int i;
|
|
|
|
uint8_t *ptr = data;
|
|
|
|
|
|
|
|
for (i = 0; i < count; i++) {
|
2013-08-13 14:43:57 +02:00
|
|
|
address_space_rw(&address_space_io, port, ptr, size,
|
|
|
|
direction == KVM_EXIT_IO_OUT);
|
2008-11-05 17:29:27 +01:00
|
|
|
ptr += size;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-05-27 01:55:29 +02:00
|
|
|
static int kvm_handle_internal_error(CPUState *cpu, struct kvm_run *run)
|
2010-03-23 17:37:11 +01:00
|
|
|
{
|
2014-01-21 18:11:31 +01:00
|
|
|
fprintf(stderr, "KVM internal error. Suberror: %d\n",
|
|
|
|
run->internal.suberror);
|
|
|
|
|
2010-03-23 17:37:11 +01:00
|
|
|
if (kvm_check_extension(kvm_state, KVM_CAP_INTERNAL_ERROR_DATA)) {
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < run->internal.ndata; ++i) {
|
|
|
|
fprintf(stderr, "extra data[%d]: %"PRIx64"\n",
|
|
|
|
i, (uint64_t)run->internal.data[i]);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
if (run->internal.suberror == KVM_INTERNAL_ERROR_EMULATION) {
|
|
|
|
fprintf(stderr, "emulation failure\n");
|
2012-10-31 06:57:49 +01:00
|
|
|
if (!kvm_arch_stop_on_emulation_error(cpu)) {
|
2013-05-27 01:33:50 +02:00
|
|
|
cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_CODE);
|
2011-03-15 12:26:27 +01:00
|
|
|
return EXCP_INTERRUPT;
|
2011-01-04 09:32:13 +01:00
|
|
|
}
|
2010-03-23 17:37:11 +01:00
|
|
|
}
|
|
|
|
/* FIXME: Should trigger a qmp message to let management know
|
|
|
|
* something went wrong.
|
|
|
|
*/
|
2011-01-21 21:48:06 +01:00
|
|
|
return -1;
|
2010-03-23 17:37:11 +01:00
|
|
|
}
|
|
|
|
|
2010-01-26 12:21:16 +01:00
|
|
|
void kvm_flush_coalesced_mmio_buffer(void)
|
2008-12-09 21:09:57 +01:00
|
|
|
{
|
|
|
|
KVMState *s = kvm_state;
|
2011-10-18 19:43:12 +02:00
|
|
|
|
|
|
|
if (s->coalesced_flush_in_progress) {
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
s->coalesced_flush_in_progress = true;
|
|
|
|
|
2010-01-26 12:21:16 +01:00
|
|
|
if (s->coalesced_mmio_ring) {
|
|
|
|
struct kvm_coalesced_mmio_ring *ring = s->coalesced_mmio_ring;
|
2008-12-09 21:09:57 +01:00
|
|
|
while (ring->first != ring->last) {
|
|
|
|
struct kvm_coalesced_mmio *ent;
|
|
|
|
|
|
|
|
ent = &ring->coalesced_mmio[ring->first];
|
|
|
|
|
|
|
|
cpu_physical_memory_write(ent->phys_addr, ent->data, ent->len);
|
2010-02-22 17:57:54 +01:00
|
|
|
smp_wmb();
|
2008-12-09 21:09:57 +01:00
|
|
|
ring->first = (ring->first + 1) % KVM_COALESCED_MMIO_MAX;
|
|
|
|
}
|
|
|
|
}
|
2011-10-18 19:43:12 +02:00
|
|
|
|
|
|
|
s->coalesced_flush_in_progress = false;
|
2008-12-09 21:09:57 +01:00
|
|
|
}
|
|
|
|
|
2012-10-31 06:57:49 +01:00
|
|
|
static void do_kvm_cpu_synchronize_state(void *arg)
|
2009-08-17 22:19:53 +02:00
|
|
|
{
|
2012-10-31 06:57:49 +01:00
|
|
|
CPUState *cpu = arg;
|
2010-05-04 14:45:23 +02:00
|
|
|
|
2012-10-31 06:57:49 +01:00
|
|
|
if (!cpu->kvm_vcpu_dirty) {
|
|
|
|
kvm_arch_get_registers(cpu);
|
|
|
|
cpu->kvm_vcpu_dirty = true;
|
2009-08-17 22:19:53 +02:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2013-05-01 13:45:44 +02:00
|
|
|
void kvm_cpu_synchronize_state(CPUState *cpu)
|
2010-05-04 14:45:23 +02:00
|
|
|
{
|
2012-10-31 06:57:49 +01:00
|
|
|
if (!cpu->kvm_vcpu_dirty) {
|
|
|
|
run_on_cpu(cpu, do_kvm_cpu_synchronize_state, cpu);
|
2011-01-04 09:32:13 +01:00
|
|
|
}
|
2010-05-04 14:45:23 +02:00
|
|
|
}
|
|
|
|
|
2013-04-11 16:51:41 +02:00
|
|
|
void kvm_cpu_synchronize_post_reset(CPUState *cpu)
|
2010-03-01 19:10:30 +01:00
|
|
|
{
|
2012-10-31 06:57:49 +01:00
|
|
|
kvm_arch_put_registers(cpu, KVM_PUT_RESET_STATE);
|
|
|
|
cpu->kvm_vcpu_dirty = false;
|
2010-03-01 19:10:30 +01:00
|
|
|
}
|
|
|
|
|
2013-04-11 16:51:41 +02:00
|
|
|
void kvm_cpu_synchronize_post_init(CPUState *cpu)
|
2010-03-01 19:10:30 +01:00
|
|
|
{
|
2012-10-31 06:57:49 +01:00
|
|
|
kvm_arch_put_registers(cpu, KVM_PUT_FULL_STATE);
|
|
|
|
cpu->kvm_vcpu_dirty = false;
|
2010-03-01 19:10:30 +01:00
|
|
|
}
|
|
|
|
|
2013-05-26 23:46:55 +02:00
|
|
|
int kvm_cpu_exec(CPUState *cpu)
|
2008-11-05 17:29:27 +01:00
|
|
|
{
|
2012-12-01 06:18:14 +01:00
|
|
|
struct kvm_run *run = cpu->kvm_run;
|
2011-03-15 12:26:25 +01:00
|
|
|
int ret, run_ret;
|
2008-11-05 17:29:27 +01:00
|
|
|
|
2010-04-18 16:22:14 +02:00
|
|
|
DPRINTF("kvm_cpu_exec()\n");
|
2008-11-05 17:29:27 +01:00
|
|
|
|
2012-10-31 06:57:49 +01:00
|
|
|
if (kvm_arch_process_async_events(cpu)) {
|
2012-12-17 08:02:44 +01:00
|
|
|
cpu->exit_request = 0;
|
2011-02-07 12:19:18 +01:00
|
|
|
return EXCP_HLT;
|
2011-02-01 22:16:00 +01:00
|
|
|
}
|
2010-05-04 14:45:27 +02:00
|
|
|
|
2011-02-01 22:16:00 +01:00
|
|
|
do {
|
2012-10-31 06:57:49 +01:00
|
|
|
if (cpu->kvm_vcpu_dirty) {
|
|
|
|
kvm_arch_put_registers(cpu, KVM_PUT_RUNTIME_STATE);
|
|
|
|
cpu->kvm_vcpu_dirty = false;
|
2009-08-17 22:19:53 +02:00
|
|
|
}
|
|
|
|
|
2012-10-31 06:57:49 +01:00
|
|
|
kvm_arch_pre_run(cpu, run);
|
2012-12-17 08:02:44 +01:00
|
|
|
if (cpu->exit_request) {
|
2011-02-01 22:16:00 +01:00
|
|
|
DPRINTF("interrupt exit requested\n");
|
|
|
|
/*
|
|
|
|
* KVM requires us to reenter the kernel after IO exits to complete
|
|
|
|
* instruction emulation. This self-signal will ensure that we
|
|
|
|
* leave ASAP again.
|
|
|
|
*/
|
|
|
|
qemu_cpu_kick_self();
|
|
|
|
}
|
2009-10-07 21:38:03 +02:00
|
|
|
qemu_mutex_unlock_iothread();
|
2011-02-01 22:16:00 +01:00
|
|
|
|
2012-10-31 06:06:49 +01:00
|
|
|
run_ret = kvm_vcpu_ioctl(cpu, KVM_RUN, 0);
|
2011-02-01 22:16:00 +01:00
|
|
|
|
2009-10-07 21:38:03 +02:00
|
|
|
qemu_mutex_lock_iothread();
|
2012-10-31 06:57:49 +01:00
|
|
|
kvm_arch_post_run(cpu, run);
|
2008-11-05 17:29:27 +01:00
|
|
|
|
2011-03-15 12:26:25 +01:00
|
|
|
if (run_ret < 0) {
|
2011-03-15 12:26:26 +01:00
|
|
|
if (run_ret == -EINTR || run_ret == -EAGAIN) {
|
|
|
|
DPRINTF("io window exit\n");
|
2011-03-15 12:26:27 +01:00
|
|
|
ret = EXCP_INTERRUPT;
|
2011-03-15 12:26:26 +01:00
|
|
|
break;
|
|
|
|
}
|
2011-12-16 01:20:20 +01:00
|
|
|
fprintf(stderr, "error: kvm run failed %s\n",
|
|
|
|
strerror(-run_ret));
|
2008-11-05 17:29:27 +01:00
|
|
|
abort();
|
|
|
|
}
|
|
|
|
|
2013-03-29 05:27:52 +01:00
|
|
|
trace_kvm_run_exit(cpu->cpu_index, run->exit_reason);
|
2008-11-05 17:29:27 +01:00
|
|
|
switch (run->exit_reason) {
|
|
|
|
case KVM_EXIT_IO:
|
2010-04-18 16:22:14 +02:00
|
|
|
DPRINTF("handle_io\n");
|
2011-02-01 22:16:01 +01:00
|
|
|
kvm_handle_io(run->io.port,
|
|
|
|
(uint8_t *)run + run->io.data_offset,
|
|
|
|
run->io.direction,
|
|
|
|
run->io.size,
|
|
|
|
run->io.count);
|
2011-03-15 12:26:27 +01:00
|
|
|
ret = 0;
|
2008-11-05 17:29:27 +01:00
|
|
|
break;
|
|
|
|
case KVM_EXIT_MMIO:
|
2010-04-18 16:22:14 +02:00
|
|
|
DPRINTF("handle_mmio\n");
|
2008-11-05 17:29:27 +01:00
|
|
|
cpu_physical_memory_rw(run->mmio.phys_addr,
|
|
|
|
run->mmio.data,
|
|
|
|
run->mmio.len,
|
|
|
|
run->mmio.is_write);
|
2011-03-15 12:26:27 +01:00
|
|
|
ret = 0;
|
2008-11-05 17:29:27 +01:00
|
|
|
break;
|
|
|
|
case KVM_EXIT_IRQ_WINDOW_OPEN:
|
2010-04-18 16:22:14 +02:00
|
|
|
DPRINTF("irq_window_open\n");
|
2011-03-15 12:26:27 +01:00
|
|
|
ret = EXCP_INTERRUPT;
|
2008-11-05 17:29:27 +01:00
|
|
|
break;
|
|
|
|
case KVM_EXIT_SHUTDOWN:
|
2010-04-18 16:22:14 +02:00
|
|
|
DPRINTF("shutdown\n");
|
2008-11-05 17:29:27 +01:00
|
|
|
qemu_system_reset_request();
|
2011-03-15 12:26:27 +01:00
|
|
|
ret = EXCP_INTERRUPT;
|
2008-11-05 17:29:27 +01:00
|
|
|
break;
|
|
|
|
case KVM_EXIT_UNKNOWN:
|
2011-01-21 21:48:07 +01:00
|
|
|
fprintf(stderr, "KVM: unknown exit, hardware reason %" PRIx64 "\n",
|
|
|
|
(uint64_t)run->hw.hardware_exit_reason);
|
2011-01-21 21:48:06 +01:00
|
|
|
ret = -1;
|
2008-11-05 17:29:27 +01:00
|
|
|
break;
|
2010-03-23 17:37:11 +01:00
|
|
|
case KVM_EXIT_INTERNAL_ERROR:
|
2013-05-27 01:55:29 +02:00
|
|
|
ret = kvm_handle_internal_error(cpu, run);
|
2010-03-23 17:37:11 +01:00
|
|
|
break;
|
2008-11-05 17:29:27 +01:00
|
|
|
default:
|
2010-04-18 16:22:14 +02:00
|
|
|
DPRINTF("kvm_arch_handle_exit\n");
|
2012-10-31 06:57:49 +01:00
|
|
|
ret = kvm_arch_handle_exit(cpu, run);
|
2008-11-05 17:29:27 +01:00
|
|
|
break;
|
|
|
|
}
|
2011-03-15 12:26:27 +01:00
|
|
|
} while (ret == 0);
|
2008-11-05 17:29:27 +01:00
|
|
|
|
2011-01-21 21:48:06 +01:00
|
|
|
if (ret < 0) {
|
2013-05-27 01:33:50 +02:00
|
|
|
cpu_dump_state(cpu, stderr, fprintf, CPU_DUMP_CODE);
|
2011-09-30 19:45:27 +02:00
|
|
|
vm_stop(RUN_STATE_INTERNAL_ERROR);
|
2008-11-10 16:55:14 +01:00
|
|
|
}
|
|
|
|
|
2012-12-17 08:02:44 +01:00
|
|
|
cpu->exit_request = 0;
|
2008-11-05 17:29:27 +01:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2008-11-13 20:21:00 +01:00
|
|
|
int kvm_ioctl(KVMState *s, int type, ...)
|
2008-11-05 17:29:27 +01:00
|
|
|
{
|
|
|
|
int ret;
|
2008-11-13 20:21:00 +01:00
|
|
|
void *arg;
|
|
|
|
va_list ap;
|
2008-11-05 17:29:27 +01:00
|
|
|
|
2008-11-13 20:21:00 +01:00
|
|
|
va_start(ap, type);
|
|
|
|
arg = va_arg(ap, void *);
|
|
|
|
va_end(ap);
|
|
|
|
|
2013-03-29 05:27:05 +01:00
|
|
|
trace_kvm_ioctl(type, arg);
|
2008-11-13 20:21:00 +01:00
|
|
|
ret = ioctl(s->fd, type, arg);
|
2011-01-04 09:32:13 +01:00
|
|
|
if (ret == -1) {
|
2008-11-05 17:29:27 +01:00
|
|
|
ret = -errno;
|
2011-01-04 09:32:13 +01:00
|
|
|
}
|
2008-11-05 17:29:27 +01:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2008-11-13 20:21:00 +01:00
|
|
|
int kvm_vm_ioctl(KVMState *s, int type, ...)
|
2008-11-05 17:29:27 +01:00
|
|
|
{
|
|
|
|
int ret;
|
2008-11-13 20:21:00 +01:00
|
|
|
void *arg;
|
|
|
|
va_list ap;
|
|
|
|
|
|
|
|
va_start(ap, type);
|
|
|
|
arg = va_arg(ap, void *);
|
|
|
|
va_end(ap);
|
2008-11-05 17:29:27 +01:00
|
|
|
|
2013-03-29 05:27:05 +01:00
|
|
|
trace_kvm_vm_ioctl(type, arg);
|
2008-11-13 20:21:00 +01:00
|
|
|
ret = ioctl(s->vmfd, type, arg);
|
2011-01-04 09:32:13 +01:00
|
|
|
if (ret == -1) {
|
2008-11-05 17:29:27 +01:00
|
|
|
ret = -errno;
|
2011-01-04 09:32:13 +01:00
|
|
|
}
|
2008-11-05 17:29:27 +01:00
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2012-10-31 06:06:49 +01:00
|
|
|
int kvm_vcpu_ioctl(CPUState *cpu, int type, ...)
|
2008-11-05 17:29:27 +01:00
|
|
|
{
|
|
|
|
int ret;
|
2008-11-13 20:21:00 +01:00
|
|
|
void *arg;
|
|
|
|
va_list ap;
|
|
|
|
|
|
|
|
va_start(ap, type);
|
|
|
|
arg = va_arg(ap, void *);
|
|
|
|
va_end(ap);
|
2008-11-05 17:29:27 +01:00
|
|
|
|
2013-03-29 05:27:05 +01:00
|
|
|
trace_kvm_vcpu_ioctl(cpu->cpu_index, type, arg);
|
2012-10-31 05:29:00 +01:00
|
|
|
ret = ioctl(cpu->kvm_fd, type, arg);
|
2011-01-04 09:32:13 +01:00
|
|
|
if (ret == -1) {
|
2008-11-05 17:29:27 +01:00
|
|
|
ret = -errno;
|
2011-01-04 09:32:13 +01:00
|
|
|
}
|
2008-11-05 17:29:27 +01:00
|
|
|
return ret;
|
|
|
|
}
|
2008-12-04 21:33:06 +01:00
|
|
|
|
2014-02-26 18:20:00 +01:00
|
|
|
int kvm_device_ioctl(int fd, int type, ...)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
void *arg;
|
|
|
|
va_list ap;
|
|
|
|
|
|
|
|
va_start(ap, type);
|
|
|
|
arg = va_arg(ap, void *);
|
|
|
|
va_end(ap);
|
|
|
|
|
|
|
|
trace_kvm_device_ioctl(fd, type, arg);
|
|
|
|
ret = ioctl(fd, type, arg);
|
|
|
|
if (ret == -1) {
|
|
|
|
ret = -errno;
|
|
|
|
}
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
2008-12-04 21:33:06 +01:00
|
|
|
int kvm_has_sync_mmu(void)
|
|
|
|
{
|
2011-01-21 21:48:17 +01:00
|
|
|
return kvm_check_extension(kvm_state, KVM_CAP_SYNC_MMU);
|
2008-12-04 21:33:06 +01:00
|
|
|
}
|
2009-03-12 21:12:48 +01:00
|
|
|
|
2009-11-25 00:33:03 +01:00
|
|
|
int kvm_has_vcpu_events(void)
|
|
|
|
{
|
|
|
|
return kvm_state->vcpu_events;
|
|
|
|
}
|
|
|
|
|
2010-03-01 19:10:29 +01:00
|
|
|
int kvm_has_robust_singlestep(void)
|
|
|
|
{
|
|
|
|
return kvm_state->robust_singlestep;
|
|
|
|
}
|
|
|
|
|
2010-03-12 15:20:49 +01:00
|
|
|
int kvm_has_debugregs(void)
|
|
|
|
{
|
|
|
|
return kvm_state->debugregs;
|
|
|
|
}
|
|
|
|
|
2010-06-17 11:53:07 +02:00
|
|
|
int kvm_has_xsave(void)
|
|
|
|
{
|
|
|
|
return kvm_state->xsave;
|
|
|
|
}
|
|
|
|
|
|
|
|
int kvm_has_xcrs(void)
|
|
|
|
{
|
|
|
|
return kvm_state->xcrs;
|
|
|
|
}
|
|
|
|
|
2012-03-02 20:28:48 +01:00
|
|
|
int kvm_has_pit_state2(void)
|
|
|
|
{
|
|
|
|
return kvm_state->pit_state2;
|
|
|
|
}
|
|
|
|
|
2011-01-10 12:50:05 +01:00
|
|
|
int kvm_has_many_ioeventfds(void)
|
|
|
|
{
|
|
|
|
if (!kvm_enabled()) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
return kvm_state->many_ioeventfds;
|
|
|
|
}
|
|
|
|
|
2011-10-15 11:49:47 +02:00
|
|
|
int kvm_has_gsi_routing(void)
|
|
|
|
{
|
2012-01-25 18:28:05 +01:00
|
|
|
#ifdef KVM_CAP_IRQ_ROUTING
|
2011-10-15 11:49:47 +02:00
|
|
|
return kvm_check_extension(kvm_state, KVM_CAP_IRQ_ROUTING);
|
2012-01-25 18:28:05 +01:00
|
|
|
#else
|
|
|
|
return false;
|
|
|
|
#endif
|
2011-10-15 11:49:47 +02:00
|
|
|
}
|
|
|
|
|
2012-08-27 08:28:39 +02:00
|
|
|
int kvm_has_intx_set_mask(void)
|
|
|
|
{
|
|
|
|
return kvm_state->intx_set_mask;
|
|
|
|
}
|
|
|
|
|
2009-04-26 18:03:40 +02:00
|
|
|
void kvm_setup_guest_memory(void *start, size_t size)
|
|
|
|
{
|
2012-08-10 15:11:45 +02:00
|
|
|
#ifdef CONFIG_VALGRIND_H
|
|
|
|
VALGRIND_MAKE_MEM_DEFINED(start, size);
|
|
|
|
#endif
|
2009-04-26 18:03:40 +02:00
|
|
|
if (!kvm_has_sync_mmu()) {
|
2010-09-25 13:26:05 +02:00
|
|
|
int ret = qemu_madvise(start, size, QEMU_MADV_DONTFORK);
|
2009-04-26 18:03:40 +02:00
|
|
|
|
|
|
|
if (ret) {
|
2010-09-25 13:26:05 +02:00
|
|
|
perror("qemu_madvise");
|
|
|
|
fprintf(stderr,
|
|
|
|
"Need MADV_DONTFORK in absence of synchronous KVM MMU\n");
|
2009-04-26 18:03:40 +02:00
|
|
|
exit(1);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2009-03-12 21:12:48 +01:00
|
|
|
#ifdef KVM_CAP_SET_GUEST_DEBUG
|
2012-12-01 05:35:08 +01:00
|
|
|
struct kvm_sw_breakpoint *kvm_find_sw_breakpoint(CPUState *cpu,
|
2009-03-12 21:12:48 +01:00
|
|
|
target_ulong pc)
|
|
|
|
{
|
|
|
|
struct kvm_sw_breakpoint *bp;
|
|
|
|
|
2012-12-01 05:35:08 +01:00
|
|
|
QTAILQ_FOREACH(bp, &cpu->kvm_state->kvm_sw_breakpoints, entry) {
|
2011-01-04 09:32:13 +01:00
|
|
|
if (bp->pc == pc) {
|
2009-03-12 21:12:48 +01:00
|
|
|
return bp;
|
2011-01-04 09:32:13 +01:00
|
|
|
}
|
2009-03-12 21:12:48 +01:00
|
|
|
}
|
|
|
|
return NULL;
|
|
|
|
}
|
|
|
|
|
2012-12-01 05:35:08 +01:00
|
|
|
int kvm_sw_breakpoints_active(CPUState *cpu)
|
2009-03-12 21:12:48 +01:00
|
|
|
{
|
2012-12-01 05:35:08 +01:00
|
|
|
return !QTAILQ_EMPTY(&cpu->kvm_state->kvm_sw_breakpoints);
|
2009-03-12 21:12:48 +01:00
|
|
|
}
|
|
|
|
|
2009-07-16 23:55:28 +02:00
|
|
|
struct kvm_set_guest_debug_data {
|
|
|
|
struct kvm_guest_debug dbg;
|
2012-12-01 05:35:08 +01:00
|
|
|
CPUState *cpu;
|
2009-07-16 23:55:28 +02:00
|
|
|
int err;
|
|
|
|
};
|
|
|
|
|
|
|
|
static void kvm_invoke_set_guest_debug(void *data)
|
|
|
|
{
|
|
|
|
struct kvm_set_guest_debug_data *dbg_data = data;
|
2009-09-17 20:05:58 +02:00
|
|
|
|
2012-12-01 05:35:08 +01:00
|
|
|
dbg_data->err = kvm_vcpu_ioctl(dbg_data->cpu, KVM_SET_GUEST_DEBUG,
|
|
|
|
&dbg_data->dbg);
|
2009-07-16 23:55:28 +02:00
|
|
|
}
|
|
|
|
|
2013-07-25 20:50:21 +02:00
|
|
|
int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap)
|
2009-03-12 21:12:48 +01:00
|
|
|
{
|
2009-07-16 23:55:28 +02:00
|
|
|
struct kvm_set_guest_debug_data data;
|
2009-03-12 21:12:48 +01:00
|
|
|
|
2010-03-01 19:10:29 +01:00
|
|
|
data.dbg.control = reinject_trap;
|
2009-03-12 21:12:48 +01:00
|
|
|
|
2013-06-21 20:20:45 +02:00
|
|
|
if (cpu->singlestep_enabled) {
|
2010-03-01 19:10:29 +01:00
|
|
|
data.dbg.control |= KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_SINGLESTEP;
|
|
|
|
}
|
2012-10-31 06:57:49 +01:00
|
|
|
kvm_arch_update_guest_debug(cpu, &data.dbg);
|
2012-12-01 05:35:08 +01:00
|
|
|
data.cpu = cpu;
|
2009-03-12 21:12:48 +01:00
|
|
|
|
2012-05-03 14:58:47 +02:00
|
|
|
run_on_cpu(cpu, kvm_invoke_set_guest_debug, &data);
|
2009-07-16 23:55:28 +02:00
|
|
|
return data.err;
|
2009-03-12 21:12:48 +01:00
|
|
|
}
|
|
|
|
|
2013-06-27 17:12:06 +02:00
|
|
|
int kvm_insert_breakpoint(CPUState *cpu, target_ulong addr,
|
2009-03-12 21:12:48 +01:00
|
|
|
target_ulong len, int type)
|
|
|
|
{
|
|
|
|
struct kvm_sw_breakpoint *bp;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (type == GDB_BREAKPOINT_SW) {
|
2013-06-19 17:37:31 +02:00
|
|
|
bp = kvm_find_sw_breakpoint(cpu, addr);
|
2009-03-12 21:12:48 +01:00
|
|
|
if (bp) {
|
|
|
|
bp->use_count++;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2011-08-21 05:09:37 +02:00
|
|
|
bp = g_malloc(sizeof(struct kvm_sw_breakpoint));
|
2011-01-04 09:32:13 +01:00
|
|
|
if (!bp) {
|
2009-03-12 21:12:48 +01:00
|
|
|
return -ENOMEM;
|
2011-01-04 09:32:13 +01:00
|
|
|
}
|
2009-03-12 21:12:48 +01:00
|
|
|
|
|
|
|
bp->pc = addr;
|
|
|
|
bp->use_count = 1;
|
2013-06-19 17:37:31 +02:00
|
|
|
err = kvm_arch_insert_sw_breakpoint(cpu, bp);
|
2009-03-12 21:12:48 +01:00
|
|
|
if (err) {
|
2011-08-21 05:09:37 +02:00
|
|
|
g_free(bp);
|
2009-03-12 21:12:48 +01:00
|
|
|
return err;
|
|
|
|
}
|
|
|
|
|
2013-06-19 17:37:31 +02:00
|
|
|
QTAILQ_INSERT_HEAD(&cpu->kvm_state->kvm_sw_breakpoints, bp, entry);
|
2009-03-12 21:12:48 +01:00
|
|
|
} else {
|
|
|
|
err = kvm_arch_insert_hw_breakpoint(addr, len, type);
|
2011-01-04 09:32:13 +01:00
|
|
|
if (err) {
|
2009-03-12 21:12:48 +01:00
|
|
|
return err;
|
2011-01-04 09:32:13 +01:00
|
|
|
}
|
2009-03-12 21:12:48 +01:00
|
|
|
}
|
|
|
|
|
2013-06-24 23:50:24 +02:00
|
|
|
CPU_FOREACH(cpu) {
|
2013-07-25 20:50:21 +02:00
|
|
|
err = kvm_update_guest_debug(cpu, 0);
|
2011-01-04 09:32:13 +01:00
|
|
|
if (err) {
|
2009-03-12 21:12:48 +01:00
|
|
|
return err;
|
2011-01-04 09:32:13 +01:00
|
|
|
}
|
2009-03-12 21:12:48 +01:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-06-27 17:12:06 +02:00
|
|
|
int kvm_remove_breakpoint(CPUState *cpu, target_ulong addr,
|
2009-03-12 21:12:48 +01:00
|
|
|
target_ulong len, int type)
|
|
|
|
{
|
|
|
|
struct kvm_sw_breakpoint *bp;
|
|
|
|
int err;
|
|
|
|
|
|
|
|
if (type == GDB_BREAKPOINT_SW) {
|
2013-06-19 17:37:31 +02:00
|
|
|
bp = kvm_find_sw_breakpoint(cpu, addr);
|
2011-01-04 09:32:13 +01:00
|
|
|
if (!bp) {
|
2009-03-12 21:12:48 +01:00
|
|
|
return -ENOENT;
|
2011-01-04 09:32:13 +01:00
|
|
|
}
|
2009-03-12 21:12:48 +01:00
|
|
|
|
|
|
|
if (bp->use_count > 1) {
|
|
|
|
bp->use_count--;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-06-19 17:37:31 +02:00
|
|
|
err = kvm_arch_remove_sw_breakpoint(cpu, bp);
|
2011-01-04 09:32:13 +01:00
|
|
|
if (err) {
|
2009-03-12 21:12:48 +01:00
|
|
|
return err;
|
2011-01-04 09:32:13 +01:00
|
|
|
}
|
2009-03-12 21:12:48 +01:00
|
|
|
|
2013-06-19 17:37:31 +02:00
|
|
|
QTAILQ_REMOVE(&cpu->kvm_state->kvm_sw_breakpoints, bp, entry);
|
2011-08-21 05:09:37 +02:00
|
|
|
g_free(bp);
|
2009-03-12 21:12:48 +01:00
|
|
|
} else {
|
|
|
|
err = kvm_arch_remove_hw_breakpoint(addr, len, type);
|
2011-01-04 09:32:13 +01:00
|
|
|
if (err) {
|
2009-03-12 21:12:48 +01:00
|
|
|
return err;
|
2011-01-04 09:32:13 +01:00
|
|
|
}
|
2009-03-12 21:12:48 +01:00
|
|
|
}
|
|
|
|
|
2013-06-24 23:50:24 +02:00
|
|
|
CPU_FOREACH(cpu) {
|
2013-07-25 20:50:21 +02:00
|
|
|
err = kvm_update_guest_debug(cpu, 0);
|
2011-01-04 09:32:13 +01:00
|
|
|
if (err) {
|
2009-03-12 21:12:48 +01:00
|
|
|
return err;
|
2011-01-04 09:32:13 +01:00
|
|
|
}
|
2009-03-12 21:12:48 +01:00
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2013-05-27 14:40:48 +02:00
|
|
|
void kvm_remove_all_breakpoints(CPUState *cpu)
|
2009-03-12 21:12:48 +01:00
|
|
|
{
|
|
|
|
struct kvm_sw_breakpoint *bp, *next;
|
2013-06-19 17:37:31 +02:00
|
|
|
KVMState *s = cpu->kvm_state;
|
2009-03-12 21:12:48 +01:00
|
|
|
|
2009-09-12 09:36:22 +02:00
|
|
|
QTAILQ_FOREACH_SAFE(bp, &s->kvm_sw_breakpoints, entry, next) {
|
2013-06-19 17:37:31 +02:00
|
|
|
if (kvm_arch_remove_sw_breakpoint(cpu, bp) != 0) {
|
2009-03-12 21:12:48 +01:00
|
|
|
/* Try harder to find a CPU that currently sees the breakpoint. */
|
2013-06-24 23:50:24 +02:00
|
|
|
CPU_FOREACH(cpu) {
|
2012-10-31 06:57:49 +01:00
|
|
|
if (kvm_arch_remove_sw_breakpoint(cpu, bp) == 0) {
|
2009-03-12 21:12:48 +01:00
|
|
|
break;
|
2011-01-04 09:32:13 +01:00
|
|
|
}
|
2009-03-12 21:12:48 +01:00
|
|
|
}
|
|
|
|
}
|
2012-11-12 15:04:35 +01:00
|
|
|
QTAILQ_REMOVE(&s->kvm_sw_breakpoints, bp, entry);
|
|
|
|
g_free(bp);
|
2009-03-12 21:12:48 +01:00
|
|
|
}
|
|
|
|
kvm_arch_remove_all_hw_breakpoints();
|
|
|
|
|
2013-06-24 23:50:24 +02:00
|
|
|
CPU_FOREACH(cpu) {
|
2013-07-25 20:50:21 +02:00
|
|
|
kvm_update_guest_debug(cpu, 0);
|
2011-01-04 09:32:13 +01:00
|
|
|
}
|
2009-03-12 21:12:48 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
#else /* !KVM_CAP_SET_GUEST_DEBUG */
|
|
|
|
|
2013-07-25 20:50:21 +02:00
|
|
|
int kvm_update_guest_debug(CPUState *cpu, unsigned long reinject_trap)
|
2009-03-12 21:12:48 +01:00
|
|
|
{
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2013-06-27 17:12:06 +02:00
|
|
|
int kvm_insert_breakpoint(CPUState *cpu, target_ulong addr,
|
2009-03-12 21:12:48 +01:00
|
|
|
target_ulong len, int type)
|
|
|
|
{
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2013-06-27 17:12:06 +02:00
|
|
|
int kvm_remove_breakpoint(CPUState *cpu, target_ulong addr,
|
2009-03-12 21:12:48 +01:00
|
|
|
target_ulong len, int type)
|
|
|
|
{
|
|
|
|
return -EINVAL;
|
|
|
|
}
|
|
|
|
|
2013-05-27 14:40:48 +02:00
|
|
|
void kvm_remove_all_breakpoints(CPUState *cpu)
|
2009-03-12 21:12:48 +01:00
|
|
|
{
|
|
|
|
}
|
|
|
|
#endif /* !KVM_CAP_SET_GUEST_DEBUG */
|
2010-02-17 23:14:42 +01:00
|
|
|
|
2013-05-26 23:38:10 +02:00
|
|
|
int kvm_set_signal_mask(CPUState *cpu, const sigset_t *sigset)
|
2010-02-17 23:14:42 +01:00
|
|
|
{
|
|
|
|
struct kvm_signal_mask *sigmask;
|
|
|
|
int r;
|
|
|
|
|
2011-01-04 09:32:13 +01:00
|
|
|
if (!sigset) {
|
2012-10-31 06:06:49 +01:00
|
|
|
return kvm_vcpu_ioctl(cpu, KVM_SET_SIGNAL_MASK, NULL);
|
2011-01-04 09:32:13 +01:00
|
|
|
}
|
2010-02-17 23:14:42 +01:00
|
|
|
|
2011-08-21 05:09:37 +02:00
|
|
|
sigmask = g_malloc(sizeof(*sigmask) + sizeof(*sigset));
|
2010-02-17 23:14:42 +01:00
|
|
|
|
|
|
|
sigmask->len = 8;
|
|
|
|
memcpy(sigmask->sigset, sigset, sizeof(*sigset));
|
2012-10-31 06:06:49 +01:00
|
|
|
r = kvm_vcpu_ioctl(cpu, KVM_SET_SIGNAL_MASK, sigmask);
|
2011-08-21 05:09:37 +02:00
|
|
|
g_free(sigmask);
|
2010-02-17 23:14:42 +01:00
|
|
|
|
|
|
|
return r;
|
|
|
|
}
|
2013-01-17 09:30:27 +01:00
|
|
|
int kvm_on_sigbus_vcpu(CPUState *cpu, int code, void *addr)
|
2011-02-01 22:15:51 +01:00
|
|
|
{
|
2012-10-31 06:57:49 +01:00
|
|
|
return kvm_arch_on_sigbus_vcpu(cpu, code, addr);
|
2011-02-01 22:15:51 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
int kvm_on_sigbus(int code, void *addr)
|
|
|
|
{
|
|
|
|
return kvm_arch_on_sigbus(code, addr);
|
|
|
|
}
|
2014-02-26 18:20:00 +01:00
|
|
|
|
|
|
|
int kvm_create_device(KVMState *s, uint64_t type, bool test)
|
|
|
|
{
|
|
|
|
int ret;
|
|
|
|
struct kvm_create_device create_dev;
|
|
|
|
|
|
|
|
create_dev.type = type;
|
|
|
|
create_dev.fd = -1;
|
|
|
|
create_dev.flags = test ? KVM_CREATE_DEVICE_TEST : 0;
|
|
|
|
|
|
|
|
if (!kvm_check_extension(s, KVM_CAP_DEVICE_CTRL)) {
|
|
|
|
return -ENOTSUP;
|
|
|
|
}
|
|
|
|
|
|
|
|
ret = kvm_vm_ioctl(s, KVM_CREATE_DEVICE, &create_dev);
|
|
|
|
if (ret) {
|
|
|
|
return ret;
|
|
|
|
}
|
|
|
|
|
|
|
|
return test ? 0 : create_dev.fd;
|
|
|
|
}
|