Add KVM support to QEMU

This patch adds very basic KVM support.  KVM is a kernel module for Linux that
allows userspace programs to make use of hardware virtualization support.  It
current supports x86 hardware virtualization using Intel VT-x or AMD-V.  It
also supports IA64 VT-i, PPC 440, and S390.

This patch only implements the bare minimum support to get a guest booting.  It
has very little impact the rest of QEMU and attempts to integrate nicely with
the rest of QEMU.

Even though this implementation is basic, it is significantly faster than TCG.
Booting and shutting down a Linux guest:

w/TCG:  1:32.36 elapsed  84% CPU

w/KVM:  0:31.14 elapsed  59% CPU

Right now, KVM is disabled by default and must be explicitly enabled with
 -enable-kvm.  We can enable it by default later when we have had better
testing.

Signed-off-by: Anthony Liguori <aliguori@us.ibm.com>



git-svn-id: svn://svn.savannah.nongnu.org/qemu/trunk@5627 c046a42c-6fe2-441c-8c8c-71466251a162
This commit is contained in:
aliguori 2008-11-05 16:04:33 +00:00
parent 6fd805e1d4
commit 7ba1e61953
10 changed files with 215 additions and 1 deletions

View File

@ -183,6 +183,9 @@ CFLAGS+=-I/opt/SUNWspro/prod/include/cc
endif endif
endif endif
kvm.o: CFLAGS+=$(KVM_CFLAGS)
kvm-all.o: CFLAGS+=$(KVM_CFLAGS)
all: $(PROGS) all: $(PROGS)
######################################################### #########################################################
@ -581,6 +584,9 @@ ifndef CONFIG_USER_ONLY
OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o net-checksum.o OBJS=vl.o osdep.o monitor.o pci.o loader.o isa_mmio.o machine.o net-checksum.o
OBJS+=fw_cfg.o aio.o buffered_file.o migration.o migration-tcp.o qemu-char.o OBJS+=fw_cfg.o aio.o buffered_file.o migration.o migration-tcp.o qemu-char.o
OBJS+=net.o OBJS+=net.o
ifdef CONFIG_KVM
OBJS+=kvm.o kvm-all.o
endif
ifdef CONFIG_WIN32 ifdef CONFIG_WIN32
OBJS+=block-raw-win32.o OBJS+=block-raw-win32.o
else else

48
configure vendored
View File

@ -115,6 +115,7 @@ aio="yes"
nptl="yes" nptl="yes"
mixemu="no" mixemu="no"
bluez="yes" bluez="yes"
kvm="yes"
# OS specific # OS specific
targetos=`uname -s` targetos=`uname -s`
@ -303,6 +304,8 @@ for opt do
;; ;;
--disable-bluez) bluez="no" --disable-bluez) bluez="no"
;; ;;
--disable-kvm) kvm="no"
;;
--enable-profiler) profiler="yes" --enable-profiler) profiler="yes"
;; ;;
--enable-cocoa) --enable-cocoa)
@ -448,6 +451,7 @@ echo " --disable-brlapi disable BrlAPI"
echo " --disable-vnc-tls disable TLS encryption for VNC server" echo " --disable-vnc-tls disable TLS encryption for VNC server"
echo " --disable-curses disable curses output" echo " --disable-curses disable curses output"
echo " --disable-bluez disable bluez stack connectivity" echo " --disable-bluez disable bluez stack connectivity"
echo " --disable-kvm disable KVM acceleration support"
echo " --disable-nptl disable usermode NPTL support" echo " --disable-nptl disable usermode NPTL support"
echo " --enable-system enable all system emulation targets" echo " --enable-system enable all system emulation targets"
echo " --disable-system disable all system emulation targets" echo " --disable-system disable all system emulation targets"
@ -950,6 +954,30 @@ EOF
fi fi
fi fi
##########################################
# kvm probe
if test "$kvm" = "yes" ; then
cat > $TMPC <<EOF
#include <linux/kvm.h>
#if !defined(KVM_API_VERSION) || \
KVM_API_VERSION < 12 || \
KVM_API_VERSION > 12 || \
!defined(KVM_CAP_USER_MEMORY) || \
!defined(KVM_CAP_SET_TSS_ADDR)
#error Invalid KVM version
#endif
int main(void) { return 0; }
EOF
# FIXME make this configurable
kvm_cflags=-I/lib/modules/`uname -r`/build/include
if $cc $ARCH_CFLAGS -o $TMPE ${OS_CFLAGS} $kvm_cflags $TMPC \
2>/dev/null ; then
:
else
kvm="no"
fi
fi
########################################## ##########################################
# AIO probe # AIO probe
if test "$aio" = "yes" ; then if test "$aio" = "yes" ; then
@ -1036,6 +1064,7 @@ echo "uname -r $uname_release"
echo "NPTL support $nptl" echo "NPTL support $nptl"
echo "vde support $vde" echo "vde support $vde"
echo "AIO support $aio" echo "AIO support $aio"
echo "KVM support $kvm"
if test $sdl_too_old = "yes"; then if test $sdl_too_old = "yes"; then
echo "-> Your SDL version is too old - please upgrade to have SDL support" echo "-> Your SDL version is too old - please upgrade to have SDL support"
@ -1411,6 +1440,15 @@ interp_prefix1=`echo "$interp_prefix" | sed "s/%M/$target_cpu/g"`
echo "#define CONFIG_QEMU_PREFIX \"$interp_prefix1\"" >> $config_h echo "#define CONFIG_QEMU_PREFIX \"$interp_prefix1\"" >> $config_h
gdb_xml_files="" gdb_xml_files=""
# FIXME allow i386 to build on x86_64 and vice versa
if test "$kvm" = "yes" -a "$target_cpu" != "$cpu" ; then
kvm="no"
fi
# Disable KVM for linux-user
if test "$kvm" = "yes" -a "$target_softmmu" = "no" ; then
kvm="no"
fi
case "$target_cpu" in case "$target_cpu" in
i386) i386)
echo "TARGET_ARCH=i386" >> $config_mak echo "TARGET_ARCH=i386" >> $config_mak
@ -1420,6 +1458,11 @@ case "$target_cpu" in
then then
echo "#define USE_KQEMU 1" >> $config_h echo "#define USE_KQEMU 1" >> $config_h
fi fi
if test "$kvm" = "yes" ; then
echo "CONFIG_KVM=yes" >> $config_mak
echo "KVM_CFLAGS=$kvm_cflags" >> $config_mak
echo "#define CONFIG_KVM" >> $config_h
fi
gcc3minver=`$cc --version 2> /dev/null| fgrep "(GCC) 3." | awk '{ print $3 }' | cut -f2 -d.` gcc3minver=`$cc --version 2> /dev/null| fgrep "(GCC) 3." | awk '{ print $3 }' | cut -f2 -d.`
if test -n "$gcc3minver" && test $gcc3minver -gt 3 if test -n "$gcc3minver" && test $gcc3minver -gt 3
then then
@ -1437,6 +1480,11 @@ case "$target_cpu" in
then then
echo "#define USE_KQEMU 1" >> $config_h echo "#define USE_KQEMU 1" >> $config_h
fi fi
if test "$kvm" = "yes" ; then
echo "CONFIG_KVM=yes" >> $config_mak
echo "KVM_CFLAGS=$kvm_cflags" >> $config_mak
echo "#define CONFIG_KVM 1" >> $config_h
fi
;; ;;
alpha) alpha)
echo "TARGET_ARCH=alpha" >> $config_mak echo "TARGET_ARCH=alpha" >> $config_mak

View File

@ -142,6 +142,9 @@ typedef struct icount_decr_u16 {
} icount_decr_u16; } icount_decr_u16;
#endif #endif
struct kvm_run;
struct KVMState;
#define CPU_TEMP_BUF_NLONGS 128 #define CPU_TEMP_BUF_NLONGS 128
#define CPU_COMMON \ #define CPU_COMMON \
struct TranslationBlock *current_tb; /* currently executing TB */ \ struct TranslationBlock *current_tb; /* currently executing TB */ \
@ -199,6 +202,9 @@ typedef struct icount_decr_u16 {
/* user data */ \ /* user data */ \
void *opaque; \ void *opaque; \
\ \
const char *cpu_model_str; const char *cpu_model_str; \
struct KVMState *kvm_state; \
struct kvm_run *kvm_run; \
int kvm_fd;
#endif #endif

View File

@ -22,6 +22,7 @@
#include "exec.h" #include "exec.h"
#include "disas.h" #include "disas.h"
#include "tcg.h" #include "tcg.h"
#include "kvm.h"
#if !defined(CONFIG_SOFTMMU) #if !defined(CONFIG_SOFTMMU)
#undef EAX #undef EAX
@ -371,6 +372,19 @@ int cpu_exec(CPUState *env1)
} }
#endif #endif
if (kvm_enabled()) {
int ret;
ret = kvm_cpu_exec(env);
if ((env->interrupt_request & CPU_INTERRUPT_EXIT)) {
env->interrupt_request &= ~CPU_INTERRUPT_EXIT;
env->exception_index = EXCP_INTERRUPT;
cpu_loop_exit();
} else if (env->halted) {
cpu_loop_exit();
} else
longjmp(env->jmp_env, 1);
}
next_tb = 0; /* force lookup of first TB */ next_tb = 0; /* force lookup of first TB */
for(;;) { for(;;) {
interrupt_request = env->interrupt_request; interrupt_request = env->interrupt_request;

4
exec.c
View File

@ -39,6 +39,7 @@
#include "tcg.h" #include "tcg.h"
#include "hw/hw.h" #include "hw/hw.h"
#include "osdep.h" #include "osdep.h"
#include "kvm.h"
#if defined(CONFIG_USER_ONLY) #if defined(CONFIG_USER_ONLY)
#include <qemu.h> #include <qemu.h>
#endif #endif
@ -2212,6 +2213,9 @@ void cpu_register_physical_memory(target_phys_addr_t start_addr,
kqemu_set_phys_mem(start_addr, size, phys_offset); kqemu_set_phys_mem(start_addr, size, phys_offset);
} }
#endif #endif
if (kvm_enabled())
kvm_set_phys_mem(start_addr, size, phys_offset);
size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK; size = (size + TARGET_PAGE_SIZE - 1) & TARGET_PAGE_MASK;
end_addr = start_addr + (target_phys_addr_t)size; end_addr = start_addr + (target_phys_addr_t)size;
for(addr = start_addr; addr != end_addr; addr += TARGET_PAGE_SIZE) { for(addr = start_addr; addr != end_addr; addr += TARGET_PAGE_SIZE) {

View File

@ -23,6 +23,7 @@
#include "sysemu.h" #include "sysemu.h"
#include "i2c.h" #include "i2c.h"
#include "smbus.h" #include "smbus.h"
#include "kvm.h"
//#define DEBUG //#define DEBUG
@ -501,6 +502,12 @@ i2c_bus *piix4_pm_init(PCIBus *bus, int devfn, uint32_t smb_io_base,
register_ioport_write(ACPI_DBG_IO_ADDR, 4, 4, acpi_dbg_writel, s); register_ioport_write(ACPI_DBG_IO_ADDR, 4, 4, acpi_dbg_writel, s);
if (kvm_enabled()) {
/* Mark SMM as already inited to prevent SMM from running. KVM does not
* support SMM mode. */
pci_conf[0x5B] = 0x02;
}
/* XXX: which specification is used ? The i82731AB has different /* XXX: which specification is used ? The i82731AB has different
mappings */ mappings */
pci_conf[0x5f] = (parallel_hds[0] != NULL ? 0x80 : 0) | 0x10; pci_conf[0x5f] = (parallel_hds[0] != NULL ? 0x80 : 0) | 0x10;

View File

@ -37,6 +37,7 @@
#include <dirent.h> #include <dirent.h>
#include "qemu-timer.h" #include "qemu-timer.h"
#include "migration.h" #include "migration.h"
#include "kvm.h"
//#define DEBUG //#define DEBUG
//#define DEBUG_COMPLETION //#define DEBUG_COMPLETION
@ -1263,6 +1264,19 @@ static void do_info_kqemu(void)
#endif #endif
} }
static void do_info_kvm(void)
{
#ifdef CONFIG_KVM
term_printf("kvm support: ");
if (kvm_enabled())
term_printf("enabled\n");
else
term_printf("disabled\n");
#else
term_printf("kvm support: not compiled\n");
#endif
}
#ifdef CONFIG_PROFILER #ifdef CONFIG_PROFILER
int64_t kqemu_time; int64_t kqemu_time;
@ -1497,6 +1511,8 @@ static const term_cmd_t info_cmds[] = {
"", "show dynamic compiler info", }, "", "show dynamic compiler info", },
{ "kqemu", "", do_info_kqemu, { "kqemu", "", do_info_kqemu,
"", "show kqemu information", }, "", "show kqemu information", },
{ "kvm", "", do_info_kvm,
"", "show kvm information", },
{ "usb", "", usb_info, { "usb", "", usb_info,
"", "show guest USB devices", }, "", "show guest USB devices", },
{ "usbhost", "", usb_host_info, { "usbhost", "", usb_host_info,

View File

@ -587,6 +587,8 @@ typedef struct CPUX86State {
target_ulong kernelgsbase; target_ulong kernelgsbase;
#endif #endif
uint64_t tsc;
uint64_t pat; uint64_t pat;
/* exception/interrupt handling */ /* exception/interrupt handling */
@ -617,6 +619,10 @@ typedef struct CPUX86State {
int kqemu_enabled; int kqemu_enabled;
int last_io_time; int last_io_time;
#endif #endif
/* For KVM */
uint64_t interrupt_bitmap[256 / 64];
/* in order to simplify APIC support, we leave this pointer to the /* in order to simplify APIC support, we leave this pointer to the
user */ user */
struct APICState *apic_state; struct APICState *apic_state;

View File

@ -29,6 +29,7 @@
#include "exec-all.h" #include "exec-all.h"
#include "svm.h" #include "svm.h"
#include "qemu-common.h" #include "qemu-common.h"
#include "kvm.h"
//#define DEBUG_MMU //#define DEBUG_MMU
@ -115,6 +116,8 @@ CPUX86State *cpu_x86_init(const char *cpu_model)
#ifdef USE_KQEMU #ifdef USE_KQEMU
kqemu_init(env); kqemu_init(env);
#endif #endif
if (kvm_enabled())
kvm_init_vcpu(env);
return env; return env;
} }
@ -1288,6 +1291,40 @@ target_phys_addr_t cpu_get_phys_page_debug(CPUState *env, target_ulong addr)
} }
#endif /* !CONFIG_USER_ONLY */ #endif /* !CONFIG_USER_ONLY */
#if defined(CONFIG_KVM)
static void host_cpuid(uint32_t function, uint32_t *eax, uint32_t *ebx,
uint32_t *ecx, uint32_t *edx)
{
uint32_t vec[4];
#ifdef __x86_64__
asm volatile("cpuid"
: "=a"(vec[0]), "=b"(vec[1]),
"=c"(vec[2]), "=d"(vec[3])
: "0"(function) : "cc");
#else
asm volatile("pusha \n\t"
"cpuid \n\t"
"mov %%eax, 0(%1) \n\t"
"mov %%ebx, 4(%1) \n\t"
"mov %%ecx, 8(%1) \n\t"
"mov %%edx, 12(%1) \n\t"
"popa"
: : "a"(function), "S"(vec)
: "memory", "cc");
#endif
if (eax)
*eax = vec[0];
if (ebx)
*ebx = vec[1];
if (ecx)
*ecx = vec[2];
if (edx)
*edx = vec[3];
}
#endif
void cpu_x86_cpuid(CPUX86State *env, uint32_t index, void cpu_x86_cpuid(CPUX86State *env, uint32_t index,
uint32_t *eax, uint32_t *ebx, uint32_t *eax, uint32_t *ebx,
uint32_t *ecx, uint32_t *edx) uint32_t *ecx, uint32_t *edx)
@ -1307,12 +1344,23 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index,
*ebx = env->cpuid_vendor1; *ebx = env->cpuid_vendor1;
*edx = env->cpuid_vendor2; *edx = env->cpuid_vendor2;
*ecx = env->cpuid_vendor3; *ecx = env->cpuid_vendor3;
/* sysenter isn't supported on compatibility mode on AMD. and syscall
* isn't supported in compatibility mode on Intel. so advertise the
* actuall cpu, and say goodbye to migration between different vendors
* is you use compatibility mode. */
if (kvm_enabled())
host_cpuid(0, NULL, ebx, ecx, edx);
break; break;
case 1: case 1:
*eax = env->cpuid_version; *eax = env->cpuid_version;
*ebx = (env->cpuid_apic_id << 24) | 8 << 8; /* CLFLUSH size in quad words, Linux wants it. */ *ebx = (env->cpuid_apic_id << 24) | 8 << 8; /* CLFLUSH size in quad words, Linux wants it. */
*ecx = env->cpuid_ext_features; *ecx = env->cpuid_ext_features;
*edx = env->cpuid_features; *edx = env->cpuid_features;
/* "Hypervisor present" bit required for Microsoft SVVP */
if (kvm_enabled())
*ecx |= (1 << 31);
break; break;
case 2: case 2:
/* cache info: needed for Pentium Pro compatibility */ /* cache info: needed for Pentium Pro compatibility */
@ -1390,6 +1438,31 @@ void cpu_x86_cpuid(CPUX86State *env, uint32_t index,
*ebx = 0; *ebx = 0;
*ecx = env->cpuid_ext3_features; *ecx = env->cpuid_ext3_features;
*edx = env->cpuid_ext2_features; *edx = env->cpuid_ext2_features;
if (kvm_enabled()) {
uint32_t h_eax, h_edx;
host_cpuid(0x80000001, &h_eax, NULL, NULL, &h_edx);
/* disable CPU features that the host does not support */
/* long mode */
if ((h_edx & 0x20000000) == 0 /* || !lm_capable_kernel */)
*edx &= ~0x20000000;
/* syscall */
if ((h_edx & 0x00000800) == 0)
*edx &= ~0x00000800;
/* nx */
if ((h_edx & 0x00100000) == 0)
*edx &= ~0x00100000;
/* disable CPU features that KVM cannot support */
/* svm */
*ecx &= ~4UL;
/* 3dnow */
*edx = ~0xc0000000;
}
break; break;
case 0x80000002: case 0x80000002:
case 0x80000003: case 0x80000003:

34
vl.c
View File

@ -39,6 +39,7 @@
#include "block.h" #include "block.h"
#include "audio/audio.h" #include "audio/audio.h"
#include "migration.h" #include "migration.h"
#include "kvm.h"
#include <unistd.h> #include <unistd.h>
#include <fcntl.h> #include <fcntl.h>
@ -4782,6 +4783,9 @@ static void help(int exitcode)
"-kernel-kqemu enable KQEMU full virtualization (default is user mode only)\n" "-kernel-kqemu enable KQEMU full virtualization (default is user mode only)\n"
"-no-kqemu disable KQEMU kernel module usage\n" "-no-kqemu disable KQEMU kernel module usage\n"
#endif #endif
#ifdef CONFIG_KVM
"-enable-kvm enable KVM full virtualization support\n"
#endif
#ifdef TARGET_I386 #ifdef TARGET_I386
"-no-acpi disable ACPI\n" "-no-acpi disable ACPI\n"
#endif #endif
@ -4887,6 +4891,7 @@ enum {
QEMU_OPTION_pidfile, QEMU_OPTION_pidfile,
QEMU_OPTION_no_kqemu, QEMU_OPTION_no_kqemu,
QEMU_OPTION_kernel_kqemu, QEMU_OPTION_kernel_kqemu,
QEMU_OPTION_enable_kvm,
QEMU_OPTION_win2k_hack, QEMU_OPTION_win2k_hack,
QEMU_OPTION_usb, QEMU_OPTION_usb,
QEMU_OPTION_usbdevice, QEMU_OPTION_usbdevice,
@ -4973,6 +4978,9 @@ static const QEMUOption qemu_options[] = {
{ "no-kqemu", 0, QEMU_OPTION_no_kqemu }, { "no-kqemu", 0, QEMU_OPTION_no_kqemu },
{ "kernel-kqemu", 0, QEMU_OPTION_kernel_kqemu }, { "kernel-kqemu", 0, QEMU_OPTION_kernel_kqemu },
#endif #endif
#ifdef CONFIG_KVM
{ "enable-kvm", 0, QEMU_OPTION_enable_kvm },
#endif
#if defined(TARGET_PPC) || defined(TARGET_SPARC) #if defined(TARGET_PPC) || defined(TARGET_SPARC)
{ "g", 1, QEMU_OPTION_g }, { "g", 1, QEMU_OPTION_g },
#endif #endif
@ -5793,6 +5801,14 @@ int main(int argc, char **argv)
case QEMU_OPTION_kernel_kqemu: case QEMU_OPTION_kernel_kqemu:
kqemu_allowed = 2; kqemu_allowed = 2;
break; break;
#endif
#ifdef CONFIG_KVM
case QEMU_OPTION_enable_kvm:
kvm_allowed = 1;
#ifdef USE_KQEMU
kqemu_allowed = 0;
#endif
break;
#endif #endif
case QEMU_OPTION_usb: case QEMU_OPTION_usb:
usb_enabled = 1; usb_enabled = 1;
@ -5928,6 +5944,14 @@ int main(int argc, char **argv)
} }
} }
#if defined(CONFIG_KVM) && defined(USE_KQEMU)
if (kvm_allowed && kqemu_allowed) {
fprintf(stderr,
"You can not enable both KVM and kqemu at the same time\n");
exit(1);
}
#endif
machine->max_cpus = machine->max_cpus ?: 1; /* Default to UP */ machine->max_cpus = machine->max_cpus ?: 1; /* Default to UP */
if (smp_cpus > machine->max_cpus) { if (smp_cpus > machine->max_cpus) {
fprintf(stderr, "Number of SMP cpus requested (%d), exceeds max cpus " fprintf(stderr, "Number of SMP cpus requested (%d), exceeds max cpus "
@ -6229,6 +6253,16 @@ int main(int argc, char **argv)
} }
} }
if (kvm_enabled()) {
int ret;
ret = kvm_init(smp_cpus);
if (ret < 0) {
fprintf(stderr, "failed to initialize KVM\n");
exit(1);
}
}
machine->init(ram_size, vga_ram_size, boot_devices, ds, machine->init(ram_size, vga_ram_size, boot_devices, ds,
kernel_filename, kernel_cmdline, initrd_filename, cpu_model); kernel_filename, kernel_cmdline, initrd_filename, cpu_model);