diff --git a/Makefile b/Makefile index fc9d74f9c6..15506a19c0 100644 --- a/Makefile +++ b/Makefile @@ -11,7 +11,7 @@ ifndef CONFIG_WIN32 TOOLS=qemu-mkcow endif -all: dyngen$(EXESUF) $(TOOLS) qemu-doc.html qemu.1 +all: dyngen$(EXESUF) $(TOOLS) qemu-doc.html qemu-tech.html qemu.1 for d in $(TARGET_DIRS); do \ make -C $$d $@ || exit 1 ; \ done @@ -61,7 +61,7 @@ TAGS: etags *.[ch] tests/*.[ch] # documentation -qemu-doc.html: qemu-doc.texi +%.html: %.texi texi2html -monolithic -number $< qemu.1: qemu-doc.texi diff --git a/TODO b/TODO index 8f66ee5c0e..3d8b0b8058 100644 --- a/TODO +++ b/TODO @@ -2,7 +2,6 @@ short term: ---------- - handle fast timers + add explicit clocks - OS/2 install bug -- win 95 install bug - handle Self Modifying Code even if modifying current TB (BE OS 5 install) - physical memory cache (reduce qemu-fast address space size to about 32 MB) - better code fetch diff --git a/linux-2.6-qemu-fast.patch b/linux-2.6-qemu-fast.patch new file mode 100644 index 0000000000..34ca5a232e --- /dev/null +++ b/linux-2.6-qemu-fast.patch @@ -0,0 +1,305 @@ +diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .32324-linux-2.6.0/arch/i386/Kconfig .32324-linux-2.6.0.updated/arch/i386/Kconfig +--- .32324-linux-2.6.0/arch/i386/Kconfig 2003-10-09 18:02:48.000000000 +1000 ++++ .32324-linux-2.6.0.updated/arch/i386/Kconfig 2003-12-26 16:46:49.000000000 +1100 +@@ -307,6 +307,14 @@ config X86_GENERIC + when it has moderate overhead. This is intended for generic + distributions kernels. + ++config QEMU ++ bool "Kernel to run under QEMU" ++ depends on EXPERIMENTAL ++ help ++ Select this if you want to boot the kernel inside qemu-fast, ++ the non-mmu version of the x86 emulator. See ++ . Say N. ++ + # + # Define implied options from the CPU selection here + # +diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .32324-linux-2.6.0/arch/i386/kernel/Makefile .32324-linux-2.6.0.updated/arch/i386/kernel/Makefile +--- .32324-linux-2.6.0/arch/i386/kernel/Makefile 2003-09-29 10:25:15.000000000 +1000 ++++ .32324-linux-2.6.0.updated/arch/i386/kernel/Makefile 2003-12-26 16:46:49.000000000 +1100 +@@ -46,12 +46,14 @@ quiet_cmd_syscall = SYSCALL $@ + cmd_syscall = $(CC) -nostdlib $(SYSCFLAGS_$(@F)) \ + -Wl,-T,$(filter-out FORCE,$^) -o $@ + ++export AFLAGS_vsyscall.lds.o += -P -C -U$(ARCH) ++ + vsyscall-flags = -shared -s -Wl,-soname=linux-gate.so.1 + SYSCFLAGS_vsyscall-sysenter.so = $(vsyscall-flags) + SYSCFLAGS_vsyscall-int80.so = $(vsyscall-flags) + + $(obj)/vsyscall-int80.so $(obj)/vsyscall-sysenter.so: \ +-$(obj)/vsyscall-%.so: $(src)/vsyscall.lds $(obj)/vsyscall-%.o FORCE ++$(obj)/vsyscall-%.so: $(src)/vsyscall.lds.s $(obj)/vsyscall-%.o FORCE + $(call if_changed,syscall) + + # We also create a special relocatable object that should mirror the symbol +@@ -62,5 +64,5 @@ $(obj)/built-in.o: $(obj)/vsyscall-syms. + $(obj)/built-in.o: ld_flags += -R $(obj)/vsyscall-syms.o + + SYSCFLAGS_vsyscall-syms.o = -r +-$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds $(obj)/vsyscall-sysenter.o FORCE ++$(obj)/vsyscall-syms.o: $(src)/vsyscall.lds.s $(obj)/vsyscall-sysenter.o FORCE + $(call if_changed,syscall) +diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .32324-linux-2.6.0/arch/i386/kernel/vmlinux.lds.S .32324-linux-2.6.0.updated/arch/i386/kernel/vmlinux.lds.S +--- .32324-linux-2.6.0/arch/i386/kernel/vmlinux.lds.S 2003-09-22 10:27:28.000000000 +1000 ++++ .32324-linux-2.6.0.updated/arch/i386/kernel/vmlinux.lds.S 2003-12-26 16:46:49.000000000 +1100 +@@ -3,6 +3,7 @@ + */ + + #include ++#include + + OUTPUT_FORMAT("elf32-i386", "elf32-i386", "elf32-i386") + OUTPUT_ARCH(i386) +@@ -10,7 +11,7 @@ ENTRY(startup_32) + jiffies = jiffies_64; + SECTIONS + { +- . = 0xC0000000 + 0x100000; ++ . = __PAGE_OFFSET + 0x100000; + /* read-only */ + _text = .; /* Text and read-only data */ + .text : { +diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .32324-linux-2.6.0/arch/i386/kernel/vsyscall.lds .32324-linux-2.6.0.updated/arch/i386/kernel/vsyscall.lds +--- .32324-linux-2.6.0/arch/i386/kernel/vsyscall.lds 2003-09-22 10:07:26.000000000 +1000 ++++ .32324-linux-2.6.0.updated/arch/i386/kernel/vsyscall.lds 1970-01-01 10:00:00.000000000 +1000 +@@ -1,67 +0,0 @@ +-/* +- * Linker script for vsyscall DSO. The vsyscall page is an ELF shared +- * object prelinked to its virtual address, and with only one read-only +- * segment (that fits in one page). This script controls its layout. +- */ +- +-/* This must match . */ +-VSYSCALL_BASE = 0xffffe000; +- +-SECTIONS +-{ +- . = VSYSCALL_BASE + SIZEOF_HEADERS; +- +- .hash : { *(.hash) } :text +- .dynsym : { *(.dynsym) } +- .dynstr : { *(.dynstr) } +- .gnu.version : { *(.gnu.version) } +- .gnu.version_d : { *(.gnu.version_d) } +- .gnu.version_r : { *(.gnu.version_r) } +- +- /* This linker script is used both with -r and with -shared. +- For the layouts to match, we need to skip more than enough +- space for the dynamic symbol table et al. If this amount +- is insufficient, ld -shared will barf. Just increase it here. */ +- . = VSYSCALL_BASE + 0x400; +- +- .text : { *(.text) } :text =0x90909090 +- +- .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr +- .eh_frame : { KEEP (*(.eh_frame)) } :text +- .dynamic : { *(.dynamic) } :text :dynamic +- .useless : { +- *(.got.plt) *(.got) +- *(.data .data.* .gnu.linkonce.d.*) +- *(.dynbss) +- *(.bss .bss.* .gnu.linkonce.b.*) +- } :text +-} +- +-/* +- * We must supply the ELF program headers explicitly to get just one +- * PT_LOAD segment, and set the flags explicitly to make segments read-only. +- */ +-PHDRS +-{ +- text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ +- dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ +- eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */ +-} +- +-/* +- * This controls what symbols we export from the DSO. +- */ +-VERSION +-{ +- LINUX_2.5 { +- global: +- __kernel_vsyscall; +- __kernel_sigreturn; +- __kernel_rt_sigreturn; +- +- local: *; +- }; +-} +- +-/* The ELF entry point can be used to set the AT_SYSINFO value. */ +-ENTRY(__kernel_vsyscall); +diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .32324-linux-2.6.0/arch/i386/kernel/vsyscall.lds.S .32324-linux-2.6.0.updated/arch/i386/kernel/vsyscall.lds.S +--- .32324-linux-2.6.0/arch/i386/kernel/vsyscall.lds.S 1970-01-01 10:00:00.000000000 +1000 ++++ .32324-linux-2.6.0.updated/arch/i386/kernel/vsyscall.lds.S 2003-12-26 16:46:49.000000000 +1100 +@@ -0,0 +1,67 @@ ++/* ++ * Linker script for vsyscall DSO. The vsyscall page is an ELF shared ++ * object prelinked to its virtual address, and with only one read-only ++ * segment (that fits in one page). This script controls its layout. ++ */ ++#include ++ ++VSYSCALL_BASE = __FIXADDR_TOP - 0x1000; ++ ++SECTIONS ++{ ++ . = VSYSCALL_BASE + SIZEOF_HEADERS; ++ ++ .hash : { *(.hash) } :text ++ .dynsym : { *(.dynsym) } ++ .dynstr : { *(.dynstr) } ++ .gnu.version : { *(.gnu.version) } ++ .gnu.version_d : { *(.gnu.version_d) } ++ .gnu.version_r : { *(.gnu.version_r) } ++ ++ /* This linker script is used both with -r and with -shared. ++ For the layouts to match, we need to skip more than enough ++ space for the dynamic symbol table et al. If this amount ++ is insufficient, ld -shared will barf. Just increase it here. */ ++ . = VSYSCALL_BASE + 0x400; ++ ++ .text : { *(.text) } :text =0x90909090 ++ ++ .eh_frame_hdr : { *(.eh_frame_hdr) } :text :eh_frame_hdr ++ .eh_frame : { KEEP (*(.eh_frame)) } :text ++ .dynamic : { *(.dynamic) } :text :dynamic ++ .useless : { ++ *(.got.plt) *(.got) ++ *(.data .data.* .gnu.linkonce.d.*) ++ *(.dynbss) ++ *(.bss .bss.* .gnu.linkonce.b.*) ++ } :text ++} ++ ++/* ++ * We must supply the ELF program headers explicitly to get just one ++ * PT_LOAD segment, and set the flags explicitly to make segments read-only. ++ */ ++PHDRS ++{ ++ text PT_LOAD FILEHDR PHDRS FLAGS(5); /* PF_R|PF_X */ ++ dynamic PT_DYNAMIC FLAGS(4); /* PF_R */ ++ eh_frame_hdr 0x6474e550; /* PT_GNU_EH_FRAME, but ld doesn't match the name */ ++} ++ ++/* ++ * This controls what symbols we export from the DSO. ++ */ ++VERSION ++{ ++ LINUX_2.5 { ++ global: ++ __kernel_vsyscall; ++ __kernel_sigreturn; ++ __kernel_rt_sigreturn; ++ ++ local: *; ++ }; ++} ++ ++/* The ELF entry point can be used to set the AT_SYSINFO value. */ ++ENTRY(__kernel_vsyscall); +diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .32324-linux-2.6.0/include/asm-i386/fixmap.h .32324-linux-2.6.0.updated/include/asm-i386/fixmap.h +--- .32324-linux-2.6.0/include/asm-i386/fixmap.h 2003-09-22 10:09:12.000000000 +1000 ++++ .32324-linux-2.6.0.updated/include/asm-i386/fixmap.h 2003-12-26 16:46:49.000000000 +1100 +@@ -14,6 +14,19 @@ + #define _ASM_FIXMAP_H + + #include ++ ++/* used by vmalloc.c, vsyscall.lds.S. ++ * ++ * Leave one empty page between vmalloc'ed areas and ++ * the start of the fixmap. ++ */ ++#ifdef CONFIG_QEMU ++#define __FIXADDR_TOP 0xa7fff000 ++#else ++#define __FIXADDR_TOP 0xfffff000 ++#endif ++ ++#ifndef __ASSEMBLY__ + #include + #include + #include +@@ -94,13 +107,8 @@ extern void __set_fixmap (enum fixed_add + #define clear_fixmap(idx) \ + __set_fixmap(idx, 0, __pgprot(0)) + +-/* +- * used by vmalloc.c. +- * +- * Leave one empty page between vmalloc'ed areas and +- * the start of the fixmap. +- */ +-#define FIXADDR_TOP (0xfffff000UL) ++#define FIXADDR_TOP ((unsigned long)__FIXADDR_TOP) ++ + #define __FIXADDR_SIZE (__end_of_permanent_fixed_addresses << PAGE_SHIFT) + #define FIXADDR_START (FIXADDR_TOP - __FIXADDR_SIZE) + +@@ -145,4 +153,5 @@ static inline unsigned long virt_to_fix( + return __virt_to_fix(vaddr); + } + ++#endif /* !__ASSEMBLY__ */ + #endif +diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .32324-linux-2.6.0/include/asm-i386/page.h .32324-linux-2.6.0.updated/include/asm-i386/page.h +--- .32324-linux-2.6.0/include/asm-i386/page.h 2003-09-22 10:06:42.000000000 +1000 ++++ .32324-linux-2.6.0.updated/include/asm-i386/page.h 2003-12-26 16:46:49.000000000 +1100 +@@ -10,10 +10,10 @@ + #define LARGE_PAGE_SIZE (1UL << PMD_SHIFT) + + #ifdef __KERNEL__ +-#ifndef __ASSEMBLY__ +- + #include + ++#ifndef __ASSEMBLY__ ++ + #ifdef CONFIG_X86_USE_3DNOW + + #include +@@ -115,12 +115,19 @@ static __inline__ int get_order(unsigned + #endif /* __ASSEMBLY__ */ + + #ifdef __ASSEMBLY__ ++#ifdef CONFIG_QEMU ++#define __PAGE_OFFSET (0x90000000) ++#else + #define __PAGE_OFFSET (0xC0000000) ++#endif /* QEMU */ ++#else ++#ifdef CONFIG_QEMU ++#define __PAGE_OFFSET (0x90000000UL) + #else + #define __PAGE_OFFSET (0xC0000000UL) ++#endif /* QEMU */ + #endif + +- + #define PAGE_OFFSET ((unsigned long)__PAGE_OFFSET) + #define VMALLOC_RESERVE ((unsigned long)__VMALLOC_RESERVE) + #define MAXMEM (-__PAGE_OFFSET-__VMALLOC_RESERVE) +diff -urpN --exclude TAGS -X /home/rusty/devel/kernel/kernel-patches/current-dontdiff --minimal .32324-linux-2.6.0/include/asm-i386/param.h .32324-linux-2.6.0.updated/include/asm-i386/param.h +--- .32324-linux-2.6.0/include/asm-i386/param.h 2003-09-21 17:26:06.000000000 +1000 ++++ .32324-linux-2.6.0.updated/include/asm-i386/param.h 2003-12-26 16:46:49.000000000 +1100 +@@ -2,7 +2,12 @@ + #define _ASMi386_PARAM_H + + #ifdef __KERNEL__ +-# define HZ 1000 /* Internal kernel timer frequency */ ++# include ++# ifdef CONFIG_QEMU ++# define HZ 100 ++# else ++# define HZ 1000 /* Internal kernel timer frequency */ ++# endif + # define USER_HZ 100 /* .. some user interfaces are in "ticks" */ + # define CLOCKS_PER_SEC (USER_HZ) /* like times() */ + #endif diff --git a/qemu-doc.texi b/qemu-doc.texi index 5ca8e8f3bb..1f056065bd 100644 --- a/qemu-doc.texi +++ b/qemu-doc.texi @@ -1,10 +1,10 @@ \input texinfo @c -*- texinfo -*- @iftex -@settitle QEMU CPU Emulator Reference Documentation +@settitle QEMU CPU Emulator User Documentation @titlepage @sp 7 -@center @titlefont{QEMU CPU Emulator Reference Documentation} +@center @titlefont{QEMU CPU Emulator User Documentation} @sp 3 @end titlepage @end iftex @@ -13,126 +13,39 @@ @section Features -QEMU is a FAST! processor emulator. By using dynamic translation it -achieves a reasonnable speed while being easy to port on new host -CPUs. +QEMU is a FAST! processor emulator using dynamic translation to +achieve good emulation speed. QEMU has two operating modes: @itemize @minus @item -User mode emulation. In this mode, QEMU can launch Linux processes -compiled for one CPU on another CPU. Linux system calls are converted -because of endianness and 32/64 bit mismatches. The Wine Windows API -emulator (@url{http://www.winehq.org}) and the DOSEMU DOS emulator -(@url{http://www.dosemu.org}) are the main targets for QEMU. +Full system emulation. In this mode, QEMU emulates a full system (for +example a PC), including a processor and various peripherials. It can +be used to launch different Operating Systems without rebooting the +PC or to debug system code. @item -Full system emulation. In this mode, QEMU emulates a full -system, including a processor and various peripherials. Currently, it -is only used to launch an x86 Linux kernel on an x86 Linux system. It -enables easier testing and debugging of system code. It can also be -used to provide virtual hosting of several virtual PCs on a single -server. +User mode emulation (Linux host only). In this mode, QEMU can launch +Linux processes compiled for one CPU on another CPU. It can be used to +launch the Wine Windows API emulator (@url{http://www.winehq.org}) or +to ease cross-compilation and cross-debugging. @end itemize -As QEMU requires no host kernel patches to run, it is very safe and +As QEMU requires no host kernel driver to run, it is very safe and easy to use. -QEMU generic features: +For system emulation, only the x86 PC emulator is currently +usable. The PowerPC system emulator is being developped. -@itemize - -@item User space only or full system emulation. - -@item Using dynamic translation to native code for reasonnable speed. - -@item Working on x86 and PowerPC hosts. Being tested on ARM, Sparc32, Alpha and S390. - -@item Self-modifying code support. - -@item Precise exceptions support. - -@item The virtual CPU is a library (@code{libqemu}) which can be used -in other projects. - -@end itemize - -QEMU user mode emulation features: -@itemize -@item Generic Linux system call converter, including most ioctls. - -@item clone() emulation using native CPU clone() to use Linux scheduler for threads. - -@item Accurate signal handling by remapping host signals to target signals. -@end itemize -@end itemize - -QEMU full system emulation features: -@itemize -@item QEMU can either use a full software MMU for maximum portability or use the host system call mmap() to simulate the target MMU. -@end itemize - -@section x86 emulation - -QEMU x86 target features: - -@itemize - -@item The virtual x86 CPU supports 16 bit and 32 bit addressing with segmentation. -LDT/GDT and IDT are emulated. VM86 mode is also supported to run DOSEMU. - -@item Support of host page sizes bigger than 4KB in user mode emulation. - -@item QEMU can emulate itself on x86. - -@item An extensive Linux x86 CPU test program is included @file{tests/test-i386}. -It can be used to test other x86 virtual CPUs. - -@end itemize - -Current QEMU limitations: - -@itemize - -@item No SSE/MMX support (yet). - -@item No x86-64 support. - -@item IPC syscalls are missing. - -@item The x86 segment limits and access rights are not tested at every -memory access. - -@item On non x86 host CPUs, @code{double}s are used instead of the non standard -10 byte @code{long double}s of x86 for floating point emulation to get -maximum performances. - -@item Some priviledged instructions or behaviors are missing, especially for segment protection testing (yet). - -@end itemize - -@section ARM emulation - -@itemize - -@item ARM emulation can currently launch small programs while using the -generic dynamic code generation architecture of QEMU. - -@item No FPU support (yet). - -@item No automatic regression testing (yet). - -@end itemize - -@section SPARC emulation - -The SPARC emulation is currently in development. +For user emulation, x86, PowerPC, ARM, and SPARC CPUs are supported. @chapter Installation +@section Linux + If you want to compile QEMU, please read the @file{README} which gives the related information. Otherwise just download the binary distribution (@file{qemu-XXX-i386.tar.gz}) and untar it as root in @@ -144,106 +57,69 @@ cd / tar zxvf /tmp/qemu-XXX-i386.tar.gz @end example -@chapter QEMU User space emulator invocation - -@section Quick Start - -In order to launch a Linux process, QEMU needs the process executable -itself and all the target (x86) dynamic libraries used by it. - +@section Windows +w @itemize +@item Install the current versions of MSYS and MinGW from +@url{http://www.mingw.org/}. You can find detailed installation +instructions in the download section and the FAQ. -@item On x86, you can just try to launch any process by using the native -libraries: +@item Download +the MinGW development library of SDL 1.2.x +(@file{SDL-devel-1.2.x-mingw32.tar.gz}) from +@url{http://www.libsdl.org}. Unpack it in a temporary place, and +unpack the archive @file{i386-mingw32msvc.tar.gz} in the MinGW tool +directory. Edit the @file{sdl-config} script so that it gives the +correct SDL directory when invoked. -@example -qemu-i386 -L / /bin/ls -@end example +@item Extract the current version of QEMU. + +@item Start the MSYS shell (file @file{msys.bat}). -@code{-L /} tells that the x86 dynamic linker must be searched with a -@file{/} prefix. +@item Change to the QEMU directory. Launch @file{./configure} and +@file{make}. If you have problems using SDL, verify that +@file{sdl-config} can be launched from the MSYS command line. -@item Since QEMU is also a linux process, you can launch qemu with qemu (NOTE: you can only do that if you compiled QEMU from the sources): - -@example -qemu-i386 -L / qemu-i386 -L / /bin/ls -@end example - -@item On non x86 CPUs, you need first to download at least an x86 glibc -(@file{qemu-runtime-i386-XXX-.tar.gz} on the QEMU web page). Ensure that -@code{LD_LIBRARY_PATH} is not set: - -@example -unset LD_LIBRARY_PATH -@end example - -Then you can launch the precompiled @file{ls} x86 executable: - -@example -qemu-i386 tests/i386/ls -@end example -You can look at @file{qemu-binfmt-conf.sh} so that -QEMU is automatically launched by the Linux kernel when you try to -launch x86 executables. It requires the @code{binfmt_misc} module in the -Linux kernel. - -@item The x86 version of QEMU is also included. You can try weird things such as: -@example -qemu-i386 /usr/local/qemu-i386/bin/qemu-i386 /usr/local/qemu-i386/bin/ls-i386 -@end example +@item You can install QEMU in @file{Program Files/Qemu} by typing +@file{make install}. Don't forget to copy @file{SDL.dll} in +@file{Program Files/Qemu}. @end itemize -@section Wine launch +@section Cross compilation for Windows with Linux @itemize +@item +Install the MinGW cross compilation tools available at +@url{http://www.mingw.org/}. -@item Ensure that you have a working QEMU with the x86 glibc -distribution (see previous section). In order to verify it, you must be -able to do: +@item +Install the Win32 version of SDL (@url{http://www.libsdl.org}) by +unpacking @file{i386-mingw32msvc.tar.gz}. Set up the PATH environment +variable so that @file{i386-mingw32msvc-sdl-config} can be launched by +the QEMU configuration script. +@item +Configure QEMU for Windows cross compilation: @example -qemu-i386 /usr/local/qemu-i386/bin/ls-i386 +./configure --enable-mingw32 @end example +If necessary, you can change the cross-prefix according to the prefix +choosen for the MinGW tools with --cross-prefix. You can also use +--prefix to set the Win32 install path. -@item Download the binary x86 Wine install -(@file{qemu-XXX-i386-wine.tar.gz} on the QEMU web page). - -@item Configure Wine on your account. Look at the provided script -@file{/usr/local/qemu-i386/bin/wine-conf.sh}. Your previous -@code{$@{HOME@}/.wine} directory is saved to @code{$@{HOME@}/.wine.org}. - -@item Then you can try the example @file{putty.exe}: - -@example -qemu-i386 /usr/local/qemu-i386/wine/bin/wine /usr/local/qemu-i386/wine/c/Program\ Files/putty.exe -@end example +@item You can install QEMU in the installation directory by typing +@file{make install}. Don't forget to copy @file{SDL.dll} in the +installation directory. @end itemize -@section Command line options +Note: Currently, Wine does not seem able to launch +QEMU for Win32. -@example -usage: qemu-i386 [-h] [-d] [-L path] [-s size] program [arguments...] -@end example +@section Mac OS X -@table @option -@item -h -Print the help -@item -L path -Set the x86 elf interpreter prefix (default=/usr/local/qemu-i386) -@item -s size -Set the x86 stack size in bytes (default=524288) -@end table - -Debug options: - -@table @option -@item -d -Activate log (logfile=/tmp/qemu.log) -@item -p pagesize -Act as if the host page size was 'pagesize' bytes -@end table +Mac OS X is currently not supported. @chapter QEMU System emulator invocation @@ -251,9 +127,7 @@ Act as if the host page size was 'pagesize' bytes @c man begin DESCRIPTION -The QEMU System emulator simulates a complete PC. It can either boot -directly a Linux kernel (without any BIOS or boot loader) or boot like a -real PC with the included BIOS. +The QEMU System emulator simulates a complete PC. In order to meet specific user needs, two versions of QEMU are available: @@ -282,18 +156,14 @@ VGA (hardware level, including all non standard modes) PS/2 mouse and keyboard @item 2 IDE interfaces with hard disk and CD-ROM support +@item +Floppy disk @item -NE2000 network adapter (port=0x300, irq=9) +up to 6 NE2000 network adapters @item Serial port @item Soundblaster 16 card -@item -PIC (interrupt controler) -@item -PIT (timers) -@item -CMOS memory @end itemize @c man end @@ -308,6 +178,364 @@ qemu linux.img Linux should boot and give you a prompt. +@section Invocation + +@example +@c man begin SYNOPSIS +usage: qemu [options] [disk_image] +@c man end +@end example + +@c man begin OPTIONS +@var{disk_image} is a raw hard disk image for IDE hard disk 0. + +General options: +@table @option +@item -fda file +@item -fdb file +Use @var{file} as floppy disk 0/1 image (@xref{disk_images}). + +@item -hda file +@item -hdb file +@item -hdc file +@item -hdd file +Use @var{file} as hard disk 0, 1, 2 or 3 image (@xref{disk_images}). + +@item -cdrom file +Use @var{file} as CD-ROM image (you cannot use @option{-hdc} and and +@option{-cdrom} at the same time). + +@item -boot [a|c|d] +Boot on floppy (a), hard disk (c) or CD-ROM (d). Hard disk boot is +the default. + +@item -snapshot +Write to temporary files instead of disk image files. In this case, +the raw disk image you use is not written back. You can however force +the write back by pressing @key{C-a s} (@xref{disk_images}). + +@item -m megs +Set virtual RAM size to @var{megs} megabytes. + +@item -initrd file +Use @var{file} as initial ram disk. + +@item -nographic + +Normally, QEMU uses SDL to display the VGA output. With this option, +you can totally disable graphical output so that QEMU is a simple +command line application. The emulated serial port is redirected on +the console. Therefore, you can still use QEMU to debug a Linux kernel +with a serial console. + +@end table + +Network options: + +@table @option + +@item -n script +Set network init script [default=/etc/qemu-ifup]. This script is +launched to configure the host network interface (usually tun0) +corresponding to the virtual NE2000 card. + +@item nics n +Simulate @var{n} network interfaces (default=1). + +@item -macaddr addr + +Set the mac address of the first interface (the format is +aa:bb:cc:dd:ee:ff in hexa). The mac address is incremented for each +new network interface. + +@item -tun-fd fd1,... +Assumes @var{fd} talks to tap/tun and use it. Read +@url{http://bellard.org/qemu/tetrinet.html} to have an example of its +use. + +@end table + +Linux boot specific. When using this options, you can use a given +Linux kernel without installing it in the disk image. It can be useful +for easier testing of various kernels. + +@table @option + +@item -kernel bzImage +Use @var{bzImage} as kernel image. + +@item -append cmdline +Use @var{cmdline} as kernel command line + +@item -initrd file +Use @var{file} as initial ram disk. + +@end table + +Debug options: +@table @option +@item -s +Wait gdb connection to port 1234 (@xref{gdb_usage}). +@item -p port +Change gdb connection port. +@item -d +Output log in /tmp/qemu.log +@end table + +During emulation, if you are using the serial console, use @key{C-a h} +to get terminal commands: + +@table @key +@item C-a h +Print this help +@item C-a x +Exit emulatior +@item C-a s +Save disk data back to file (if -snapshot) +@item C-a b +Send break (magic sysrq in Linux) +@item C-a c +Switch between console and monitor +@item C-a C-a +Send C-a +@end table +@c man end + +@ignore + +@setfilename qemu +@settitle QEMU System Emulator + +@c man begin SEEALSO +The HTML documentation of QEMU for more precise information and Linux +user mode emulator invocation. +@c man end + +@c man begin AUTHOR +Fabrice Bellard +@c man end + +@end ignore + +@end ignore + + +@section QEMU Monitor + +The QEMU monitor is used to give complex commands to the QEMU +emulator. You can use it to: + +@itemize @minus + +@item +Remove or insert removable medias images +(such as CD-ROM or floppies) + +@item +Freeze/unfreeze the Virtual Machine (VM) and save or restore its state +from a disk file. + +@item Inspect the VM state without an external debugger. + +@end itemize + +@subsection Commands + +The following commands are available: + +@table @option + +@item help or ? [cmd] +Show the help for all commands or just for command @var{cmd}. + +@item commit +Commit changes to the disk images (if -snapshot is used) + +@item info subcommand +show various information about the system state + +@table @option +@item info network +show the network state +@item info block +show the block devices +@item info registers +show the cpu registers +@item info history +show the command line history +@end table + +@item q or quit +Quit the emulator. + +@item eject [-f] device +Eject a removable media (use -f to force it). + +@item change device filename +Change a removable media. + +@item screendump filename +Save screen into PPM image @var{filename}. + +@item log item1[,...] +Activate logging of the specified items to @file{/tmp/qemu.log}. + +@item savevm filename +Save the whole virtual machine state to @var{filename}. + +@item loadvm filename +Restore the whole virtual machine state from @var{filename}. + +@item stop +Stop emulation. + +@item c or cont +Resume emulation. + +@item gdbserver [port] +Start gdbserver session (default port=1234) + +@item x/fmt addr +Virtual memory dump starting at @var{addr}. + +@item xp /fmt addr +Physical memory dump starting at @var{addr}. + +@var{fmt} is a format which tells the command how to format the +data. Its syntax is: @option{/@{count@}@{format@}@{size@}} + +@table @var +@item count +is the number of items to be dumped. + +@item format +can be x (hexa), d (signed decimal), u (unsigned decimal), o (octal), +c (char) or i (asm instruction). + +@item size +can be b (8 bits), h (16 bits), w (32 bits) or g (64 bits) + +@end table + +Examples: +@itemize +@item +Dump 10 instructions at the current instruction pointer: +@example +(qemu) x/10i $eip +0x90107063: ret +0x90107064: sti +0x90107065: lea 0x0(%esi,1),%esi +0x90107069: lea 0x0(%edi,1),%edi +0x90107070: ret +0x90107071: jmp 0x90107080 +0x90107073: nop +0x90107074: nop +0x90107075: nop +0x90107076: nop +@end example + +@item +Dump 80 16 bit values at the start of the video memory. +@example +(qemu) xp/80hx 0xb8000 +0x000b8000: 0x0b50 0x0b6c 0x0b65 0x0b78 0x0b38 0x0b36 0x0b2f 0x0b42 +0x000b8010: 0x0b6f 0x0b63 0x0b68 0x0b73 0x0b20 0x0b56 0x0b47 0x0b41 +0x000b8020: 0x0b42 0x0b69 0x0b6f 0x0b73 0x0b20 0x0b63 0x0b75 0x0b72 +0x000b8030: 0x0b72 0x0b65 0x0b6e 0x0b74 0x0b2d 0x0b63 0x0b76 0x0b73 +0x000b8040: 0x0b20 0x0b30 0x0b35 0x0b20 0x0b4e 0x0b6f 0x0b76 0x0b20 +0x000b8050: 0x0b32 0x0b30 0x0b30 0x0b33 0x0720 0x0720 0x0720 0x0720 +0x000b8060: 0x0720 0x0720 0x0720 0x0720 0x0720 0x0720 0x0720 0x0720 +0x000b8070: 0x0720 0x0720 0x0720 0x0720 0x0720 0x0720 0x0720 0x0720 +0x000b8080: 0x0720 0x0720 0x0720 0x0720 0x0720 0x0720 0x0720 0x0720 +0x000b8090: 0x0720 0x0720 0x0720 0x0720 0x0720 0x0720 0x0720 0x0720 +@end example +@end itemize + +@item p or print/fmt expr + +Print expression value. Only the @var{format} part of @var{fmt} is +used. + +@end table + +@subsection Integer expressions + +The monitor understands integers expressions for every integer +argument. You can use register names to get the value of specifics +CPU registers by prefixing them with @emph{$}. + +@node disk_images +@section Disk Images + +@subsection Raw disk images + +The disk images can simply be raw images of the hard disk. You can +create them with the command: +@example +dd if=/dev/zero of=myimage bs=1024 count=mysize +@end example +where @var{myimage} is the image filename and @var{mysize} is its size +in kilobytes. + +@subsection Snapshot mode + +If you use the option @option{-snapshot}, all disk images are +considered as read only. When sectors in written, they are written in +a temporary file created in @file{/tmp}. You can however force the +write back to the raw disk images by pressing @key{C-a s}. + +NOTE: The snapshot mode only works with raw disk images. + +@subsection Copy On Write disk images + +QEMU also supports user mode Linux +(@url{http://user-mode-linux.sourceforge.net/}) Copy On Write (COW) +disk images. The COW disk images are much smaller than normal images +as they store only modified sectors. They also permit the use of the +same disk image template for many users. + +To create a COW disk images, use the command: + +@example +qemu-mkcow -f myrawimage.bin mycowimage.cow +@end example + +@file{myrawimage.bin} is a raw image you want to use as original disk +image. It will never be written to. + +@file{mycowimage.cow} is the COW disk image which is created by +@code{qemu-mkcow}. You can use it directly with the @option{-hdx} +options. You must not modify the original raw disk image if you use +COW images, as COW images only store the modified sectors from the raw +disk image. QEMU stores the original raw disk image name and its +modified time in the COW disk image so that chances of mistakes are +reduced. + +If the raw disk image is not read-only, by pressing @key{C-a s} you +can flush the COW disk image back into the raw disk image, as in +snapshot mode. + +COW disk images can also be created without a corresponding raw disk +image. It is useful to have a big initial virtual disk image without +using much disk space. Use: + +@example +qemu-mkcow mycowimage.cow 1024 +@end example + +to create a 1 gigabyte empty COW disk image. + +NOTES: +@enumerate +@item +COW disk images must be created on file systems supporting +@emph{holes} such as ext2 or ext3. +@item +Since holes are used, the displayed size of the COW disk image is not +the real one. To know it, use the @code{ls -ls} command. +@end enumerate + @section Direct Linux Boot and Network emulation This section explains how to launch a Linux kernel inside QEMU without @@ -436,7 +664,7 @@ A 2.5.74 kernel is also included in the archive. Just replace the bzImage in qemu.sh to try it. @item -qemu creates a temporary file in @var{$QEMU_TMPDIR} (@file{/tmp} is the +qemu-fast creates a temporary file in @var{$QEMU_TMPDIR} (@file{/tmp} is the default) containing all the simulated PC memory. If possible, try to use a temporary directory using the tmpfs filesystem to avoid too many unnecessary disk accesses. @@ -459,203 +687,15 @@ Lawton for the plex86 Project (@url{www.plex86.org}). @end enumerate -@section Invocation - -@example -@c man begin SYNOPSIS -usage: qemu [options] [disk_image] -@c man end -@end example - -@c man begin OPTIONS -@var{disk_image} is a raw hard disk image for IDE hard disk 0. - -General options: -@table @option -@item -fda file -@item -fdb file -Use @var{file} as floppy disk 0/1 image (@xref{disk_images}). - -@item -hda file -@item -hdb file -@item -hdc file -@item -hdd file -Use @var{file} as hard disk 0, 1, 2 or 3 image (@xref{disk_images}). - -@item -cdrom file -Use @var{file} as CD-ROM image (you cannot use @option{-hdc} and and -@option{-cdrom} at the same time). - -@item -boot [a|b|c|d] -Boot on floppy (a, b), hard disk (c) or CD-ROM (d). Hard disk boot is -the default. - -@item -snapshot -Write to temporary files instead of disk image files. In this case, -the raw disk image you use is not written back. You can however force -the write back by pressing @key{C-a s} (@xref{disk_images}). - -@item -m megs -Set virtual RAM size to @var{megs} megabytes. - -@item -n script -Set network init script [default=/etc/qemu-ifup]. This script is -launched to configure the host network interface (usually tun0) -corresponding to the virtual NE2000 card. - -@item -initrd file -Use @var{file} as initial ram disk. - -@item -tun-fd fd -Assumes @var{fd} talks to tap/tun and use it. Read -@url{http://bellard.org/qemu/tetrinet.html} to have an example of its -use. - -@item -nographic - -Normally, QEMU uses SDL to display the VGA output. With this option, -you can totally disable graphical output so that QEMU is a simple -command line application. The emulated serial port is redirected on -the console. Therefore, you can still use QEMU to debug a Linux kernel -with a serial console. - -@end table - -Linux boot specific (does not require a full PC boot with a BIOS): -@table @option - -@item -kernel bzImage -Use @var{bzImage} as kernel image. - -@item -append cmdline -Use @var{cmdline} as kernel command line - -@item -initrd file -Use @var{file} as initial ram disk. - -@end table - -Debug options: -@table @option -@item -s -Wait gdb connection to port 1234 (@xref{gdb_usage}). -@item -p port -Change gdb connection port. -@item -d -Output log in /tmp/qemu.log -@end table - -During emulation, use @key{C-a h} to get terminal commands: - -@table @key -@item C-a h -Print this help -@item C-a x -Exit emulatior -@item C-a s -Save disk data back to file (if -snapshot) -@item C-a b -Send break (magic sysrq) -@item C-a C-a -Send C-a -@end table -@c man end - -@ignore - -@setfilename qemu -@settitle QEMU System Emulator - -@c man begin SEEALSO -The HTML documentation of QEMU for more precise information and Linux -user mode emulator invocation. -@c man end - -@c man begin AUTHOR -Fabrice Bellard -@c man end - -@end ignore - -@end ignore -@node disk_images -@section Disk Images - -@subsection Raw disk images - -The disk images can simply be raw images of the hard disk. You can -create them with the command: -@example -dd if=/dev/zero of=myimage bs=1024 count=mysize -@end example -where @var{myimage} is the image filename and @var{mysize} is its size -in kilobytes. - -@subsection Snapshot mode - -If you use the option @option{-snapshot}, all disk images are -considered as read only. When sectors in written, they are written in -a temporary file created in @file{/tmp}. You can however force the -write back to the raw disk images by pressing @key{C-a s}. - -NOTE: The snapshot mode only works with raw disk images. - -@subsection Copy On Write disk images - -QEMU also supports user mode Linux -(@url{http://user-mode-linux.sourceforge.net/}) Copy On Write (COW) -disk images. The COW disk images are much smaller than normal images -as they store only modified sectors. They also permit the use of the -same disk image template for many users. - -To create a COW disk images, use the command: - -@example -qemu-mkcow -f myrawimage.bin mycowimage.cow -@end example - -@file{myrawimage.bin} is a raw image you want to use as original disk -image. It will never be written to. - -@file{mycowimage.cow} is the COW disk image which is created by -@code{qemu-mkcow}. You can use it directly with the @option{-hdx} -options. You must not modify the original raw disk image if you use -COW images, as COW images only store the modified sectors from the raw -disk image. QEMU stores the original raw disk image name and its -modified time in the COW disk image so that chances of mistakes are -reduced. - -If the raw disk image is not read-only, by pressing @key{C-a s} you -can flush the COW disk image back into the raw disk image, as in -snapshot mode. - -COW disk images can also be created without a corresponding raw disk -image. It is useful to have a big initial virtual disk image without -using much disk space. Use: - -@example -qemu-mkcow mycowimage.cow 1024 -@end example - -to create a 1 gigabyte empty COW disk image. - -NOTES: -@enumerate -@item -COW disk images must be created on file systems supporting -@emph{holes} such as ext2 or ext3. -@item -Since holes are used, the displayed size of the COW disk image is not -the real one. To know it, use the @code{ls -ls} command. -@end enumerate - @node linux_compile @section Linux Kernel Compilation You can use any linux kernel with QEMU. However, if you want to use -@code{qemu-fast} to get maximum performances, you should make the -following changes to the Linux kernel (only 2.4.x and 2.5.x were -tested): +@code{qemu-fast} to get maximum performances, you must use a modified +guest kernel. If you are using a 2.6 guest kernel, you can use +directly the patch @file{linux-2.6-qemu-fast.patch} made by Rusty +Russel available in the QEMU source archive. Otherwise, you can make the +following changes @emph{by hand} to the Linux kernel: @enumerate @item @@ -694,10 +734,10 @@ by use an SMP kernel with QEMU, it only supports one CPU. @item -If you are not using a 2.5 kernel as host kernel but if you use a target -2.5 kernel, you must also ensure that the 'HZ' define is set to 100 +If you are not using a 2.6 kernel as host kernel but if you use a target +2.6 kernel, you must also ensure that the 'HZ' define is set to 100 (1000 is the default) as QEMU cannot currently emulate timers at -frequencies greater than 100 Hz on host Linux systems < 2.5. In +frequencies greater than 100 Hz on host Linux systems < 2.6. In @file{include/asm/param.h}, replace: @example @@ -762,322 +802,104 @@ Use @code{set architecture i8086} to dump 16 bit code. Then use @code{x/10i $cs*16+*eip} to dump the code at the PC position. @end enumerate -@chapter QEMU Internals +@chapter QEMU User space emulator invocation -@section QEMU compared to other emulators +@section Quick Start -Like bochs [3], QEMU emulates an x86 CPU. But QEMU is much faster than -bochs as it uses dynamic compilation and because it uses the host MMU to -simulate the x86 MMU. The downside is that currently the emulation is -not as accurate as bochs (for example, you cannot currently run Windows -inside QEMU). +In order to launch a Linux process, QEMU needs the process executable +itself and all the target (x86) dynamic libraries used by it. -Like Valgrind [2], QEMU does user space emulation and dynamic -translation. Valgrind is mainly a memory debugger while QEMU has no -support for it (QEMU could be used to detect out of bound memory -accesses as Valgrind, but it has no support to track uninitialised data -as Valgrind does). The Valgrind dynamic translator generates better code -than QEMU (in particular it does register allocation) but it is closely -tied to an x86 host and target and has no support for precise exceptions -and system emulation. +@itemize -EM86 [4] is the closest project to user space QEMU (and QEMU still uses -some of its code, in particular the ELF file loader). EM86 was limited -to an alpha host and used a proprietary and slow interpreter (the -interpreter part of the FX!32 Digital Win32 code translator [5]). +@item On x86, you can just try to launch any process by using the native +libraries: -TWIN [6] is a Windows API emulator like Wine. It is less accurate than -Wine but includes a protected mode x86 interpreter to launch x86 Windows -executables. Such an approach as greater potential because most of the -Windows API is executed natively but it is far more difficult to develop -because all the data structures and function parameters exchanged -between the API and the x86 code must be converted. +@example +qemu-i386 -L / /bin/ls +@end example -User mode Linux [7] was the only solution before QEMU to launch a Linux -kernel as a process while not needing any host kernel patches. However, -user mode Linux requires heavy kernel patches while QEMU accepts -unpatched Linux kernels. It would be interesting to compare the -performance of the two approaches. +@code{-L /} tells that the x86 dynamic linker must be searched with a +@file{/} prefix. -The new Plex86 [8] PC virtualizer is done in the same spirit as the QEMU -system emulator. It requires a patched Linux kernel to work (you cannot -launch the same kernel on your PC), but the patches are really small. As -it is a PC virtualizer (no emulation is done except for some priveledged -instructions), it has the potential of being faster than QEMU. The -downside is that a complicated (and potentially unsafe) host kernel -patch is needed. +@item Since QEMU is also a linux process, you can launch qemu with qemu (NOTE: you can only do that if you compiled QEMU from the sources): -@section Portable dynamic translation +@example +qemu-i386 -L / qemu-i386 -L / /bin/ls +@end example -QEMU is a dynamic translator. When it first encounters a piece of code, -it converts it to the host instruction set. Usually dynamic translators -are very complicated and highly CPU dependent. QEMU uses some tricks -which make it relatively easily portable and simple while achieving good -performances. +@item On non x86 CPUs, you need first to download at least an x86 glibc +(@file{qemu-runtime-i386-XXX-.tar.gz} on the QEMU web page). Ensure that +@code{LD_LIBRARY_PATH} is not set: -The basic idea is to split every x86 instruction into fewer simpler -instructions. Each simple instruction is implemented by a piece of C -code (see @file{op-i386.c}). Then a compile time tool (@file{dyngen}) -takes the corresponding object file (@file{op-i386.o}) to generate a -dynamic code generator which concatenates the simple instructions to -build a function (see @file{op-i386.h:dyngen_code()}). +@example +unset LD_LIBRARY_PATH +@end example -In essence, the process is similar to [1], but more work is done at -compile time. +Then you can launch the precompiled @file{ls} x86 executable: -A key idea to get optimal performances is that constant parameters can -be passed to the simple operations. For that purpose, dummy ELF -relocations are generated with gcc for each constant parameter. Then, -the tool (@file{dyngen}) can locate the relocations and generate the -appriopriate C code to resolve them when building the dynamic code. +@example +qemu-i386 tests/i386/ls +@end example +You can look at @file{qemu-binfmt-conf.sh} so that +QEMU is automatically launched by the Linux kernel when you try to +launch x86 executables. It requires the @code{binfmt_misc} module in the +Linux kernel. -That way, QEMU is no more difficult to port than a dynamic linker. +@item The x86 version of QEMU is also included. You can try weird things such as: +@example +qemu-i386 /usr/local/qemu-i386/bin/qemu-i386 /usr/local/qemu-i386/bin/ls-i386 +@end example -To go even faster, GCC static register variables are used to keep the -state of the virtual CPU. +@end itemize -@section Register allocation +@section Wine launch -Since QEMU uses fixed simple instructions, no efficient register -allocation can be done. However, because RISC CPUs have a lot of -register, most of the virtual CPU state can be put in registers without -doing complicated register allocation. +@itemize -@section Condition code optimisations +@item Ensure that you have a working QEMU with the x86 glibc +distribution (see previous section). In order to verify it, you must be +able to do: -Good CPU condition codes emulation (@code{EFLAGS} register on x86) is a -critical point to get good performances. QEMU uses lazy condition code -evaluation: instead of computing the condition codes after each x86 -instruction, it just stores one operand (called @code{CC_SRC}), the -result (called @code{CC_DST}) and the type of operation (called -@code{CC_OP}). +@example +qemu-i386 /usr/local/qemu-i386/bin/ls-i386 +@end example -@code{CC_OP} is almost never explicitely set in the generated code -because it is known at translation time. +@item Download the binary x86 Wine install +(@file{qemu-XXX-i386-wine.tar.gz} on the QEMU web page). -In order to increase performances, a backward pass is performed on the -generated simple instructions (see -@code{translate-i386.c:optimize_flags()}). When it can be proved that -the condition codes are not needed by the next instructions, no -condition codes are computed at all. +@item Configure Wine on your account. Look at the provided script +@file{/usr/local/qemu-i386/bin/wine-conf.sh}. Your previous +@code{$@{HOME@}/.wine} directory is saved to @code{$@{HOME@}/.wine.org}. -@section CPU state optimisations +@item Then you can try the example @file{putty.exe}: -The x86 CPU has many internal states which change the way it evaluates -instructions. In order to achieve a good speed, the translation phase -considers that some state information of the virtual x86 CPU cannot -change in it. For example, if the SS, DS and ES segments have a zero -base, then the translator does not even generate an addition for the -segment base. +@example +qemu-i386 /usr/local/qemu-i386/wine/bin/wine /usr/local/qemu-i386/wine/c/Program\ Files/putty.exe +@end example -[The FPU stack pointer register is not handled that way yet]. +@end itemize -@section Translation cache +@section Command line options -A 2MByte cache holds the most recently used translations. For -simplicity, it is completely flushed when it is full. A translation unit -contains just a single basic block (a block of x86 instructions -terminated by a jump or by a virtual CPU state change which the -translator cannot deduce statically). - -@section Direct block chaining - -After each translated basic block is executed, QEMU uses the simulated -Program Counter (PC) and other cpu state informations (such as the CS -segment base value) to find the next basic block. - -In order to accelerate the most common cases where the new simulated PC -is known, QEMU can patch a basic block so that it jumps directly to the -next one. - -The most portable code uses an indirect jump. An indirect jump makes it -easier to make the jump target modification atomic. On some -architectures (such as PowerPC), the @code{JUMP} opcode is directly -patched so that the block chaining has no overhead. - -@section Self-modifying code and translated code invalidation - -Self-modifying code is a special challenge in x86 emulation because no -instruction cache invalidation is signaled by the application when code -is modified. - -When translated code is generated for a basic block, the corresponding -host page is write protected if it is not already read-only (with the -system call @code{mprotect()}). Then, if a write access is done to the -page, Linux raises a SEGV signal. QEMU then invalidates all the -translated code in the page and enables write accesses to the page. - -Correct translated code invalidation is done efficiently by maintaining -a linked list of every translated block contained in a given page. Other -linked lists are also maintained to undo direct block chaining. - -Although the overhead of doing @code{mprotect()} calls is important, -most MSDOS programs can be emulated at reasonnable speed with QEMU and -DOSEMU. - -Note that QEMU also invalidates pages of translated code when it detects -that memory mappings are modified with @code{mmap()} or @code{munmap()}. - -@section Exception support - -longjmp() is used when an exception such as division by zero is -encountered. - -The host SIGSEGV and SIGBUS signal handlers are used to get invalid -memory accesses. The exact CPU state can be retrieved because all the -x86 registers are stored in fixed host registers. The simulated program -counter is found by retranslating the corresponding basic block and by -looking where the host program counter was at the exception point. - -The virtual CPU cannot retrieve the exact @code{EFLAGS} register because -in some cases it is not computed because of condition code -optimisations. It is not a big concern because the emulated code can -still be restarted in any cases. - -@section Linux system call translation - -QEMU includes a generic system call translator for Linux. It means that -the parameters of the system calls can be converted to fix the -endianness and 32/64 bit issues. The IOCTLs are converted with a generic -type description system (see @file{ioctls.h} and @file{thunk.c}). - -QEMU supports host CPUs which have pages bigger than 4KB. It records all -the mappings the process does and try to emulated the @code{mmap()} -system calls in cases where the host @code{mmap()} call would fail -because of bad page alignment. - -@section Linux signals - -Normal and real-time signals are queued along with their information -(@code{siginfo_t}) as it is done in the Linux kernel. Then an interrupt -request is done to the virtual CPU. When it is interrupted, one queued -signal is handled by generating a stack frame in the virtual CPU as the -Linux kernel does. The @code{sigreturn()} system call is emulated to return -from the virtual signal handler. - -Some signals (such as SIGALRM) directly come from the host. Other -signals are synthetized from the virtual CPU exceptions such as SIGFPE -when a division by zero is done (see @code{main.c:cpu_loop()}). - -The blocked signal mask is still handled by the host Linux kernel so -that most signal system calls can be redirected directly to the host -Linux kernel. Only the @code{sigaction()} and @code{sigreturn()} system -calls need to be fully emulated (see @file{signal.c}). - -@section clone() system call and threads - -The Linux clone() system call is usually used to create a thread. QEMU -uses the host clone() system call so that real host threads are created -for each emulated thread. One virtual CPU instance is created for each -thread. - -The virtual x86 CPU atomic operations are emulated with a global lock so -that their semantic is preserved. - -Note that currently there are still some locking issues in QEMU. In -particular, the translated cache flush is not protected yet against -reentrancy. - -@section Self-virtualization - -QEMU was conceived so that ultimately it can emulate itself. Although -it is not very useful, it is an important test to show the power of the -emulator. - -Achieving self-virtualization is not easy because there may be address -space conflicts. QEMU solves this problem by being an executable ELF -shared object as the ld-linux.so ELF interpreter. That way, it can be -relocated at load time. - -@section MMU emulation - -For system emulation, QEMU uses the mmap() system call to emulate the -target CPU MMU. It works as long the emulated OS does not use an area -reserved by the host OS (such as the area above 0xc0000000 on x86 -Linux). - -It is planned to add a slower but more precise MMU emulation -with a software MMU. - -@section Bibliography - -@table @asis - -@item [1] -@url{http://citeseer.nj.nec.com/piumarta98optimizing.html}, Optimizing -direct threaded code by selective inlining (1998) by Ian Piumarta, Fabio -Riccardi. - -@item [2] -@url{http://developer.kde.org/~sewardj/}, Valgrind, an open-source -memory debugger for x86-GNU/Linux, by Julian Seward. - -@item [3] -@url{http://bochs.sourceforge.net/}, the Bochs IA-32 Emulator Project, -by Kevin Lawton et al. - -@item [4] -@url{http://www.cs.rose-hulman.edu/~donaldlf/em86/index.html}, the EM86 -x86 emulator on Alpha-Linux. - -@item [5] -@url{http://www.usenix.org/publications/library/proceedings/usenix-nt97/full_papers/chernoff/chernoff.pdf}, -DIGITAL FX!32: Running 32-Bit x86 Applications on Alpha NT, by Anton -Chernoff and Ray Hookway. - -@item [6] -@url{http://www.willows.com/}, Windows API library emulation from -Willows Software. - -@item [7] -@url{http://user-mode-linux.sourceforge.net/}, -The User-mode Linux Kernel. - -@item [8] -@url{http://www.plex86.org/}, -The new Plex86 project. +@example +usage: qemu-i386 [-h] [-d] [-L path] [-s size] program [arguments...] +@end example +@table @option +@item -h +Print the help +@item -L path +Set the x86 elf interpreter prefix (default=/usr/local/qemu-i386) +@item -s size +Set the x86 stack size in bytes (default=524288) @end table -@chapter Regression Tests +Debug options: -In the directory @file{tests/}, various interesting testing programs -are available. There are used for regression testing. - -@section @file{test-i386} - -This program executes most of the 16 bit and 32 bit x86 instructions and -generates a text output. It can be compared with the output obtained with -a real CPU or another emulator. The target @code{make test} runs this -program and a @code{diff} on the generated output. - -The Linux system call @code{modify_ldt()} is used to create x86 selectors -to test some 16 bit addressing and 32 bit with segmentation cases. - -The Linux system call @code{vm86()} is used to test vm86 emulation. - -Various exceptions are raised to test most of the x86 user space -exception reporting. - -@section @file{linux-test} - -This program tests various Linux system calls. It is used to verify -that the system call parameters are correctly converted between target -and host CPUs. - -@section @file{hello-i386} - -Very simple statically linked x86 program, just to test QEMU during a -port to a new host CPU. - -@section @file{hello-arm} - -Very simple statically linked ARM program, just to test QEMU during a -port to a new host CPU. - -@section @file{sha1} - -It is a simple benchmark. Care must be taken to interpret the results -because it mostly tests the ability of the virtual CPU to optimize the -@code{rol} x86 instruction and the condition code computations. +@table @option +@item -d +Activate log (logfile=/tmp/qemu.log) +@item -p pagesize +Act as if the host page size was 'pagesize' bytes +@end table diff --git a/qemu-tech.texi b/qemu-tech.texi new file mode 100644 index 0000000000..0185934756 --- /dev/null +++ b/qemu-tech.texi @@ -0,0 +1,506 @@ +\input texinfo @c -*- texinfo -*- + +@iftex +@settitle QEMU Internals +@titlepage +@sp 7 +@center @titlefont{QEMU Internals} +@sp 3 +@end titlepage +@end iftex + +@chapter Introduction + +@section Features + +QEMU is a FAST! processor emulator using a portable dynamic +translator. + +QEMU has two operating modes: + +@itemize @minus + +@item +Full system emulation. In this mode, QEMU emulates a full system +(usually a PC), including a processor and various peripherials. It can +be used to launch an different Operating System without rebooting the +PC or to debug system code. + +@item +User mode emulation (Linux host only). In this mode, QEMU can launch +Linux processes compiled for one CPU on another CPU. It can be used to +launch the Wine Windows API emulator (@url{http://www.winehq.org}) or +to ease cross-compilation and cross-debugging. + +@end itemize + +As QEMU requires no host kernel driver to run, it is very safe and +easy to use. + +QEMU generic features: + +@itemize + +@item User space only or full system emulation. + +@item Using dynamic translation to native code for reasonnable speed. + +@item Working on x86 and PowerPC hosts. Being tested on ARM, Sparc32, Alpha and S390. + +@item Self-modifying code support. + +@item Precise exceptions support. + +@item The virtual CPU is a library (@code{libqemu}) which can be used +in other projects. + +@end itemize + +QEMU user mode emulation features: +@itemize +@item Generic Linux system call converter, including most ioctls. + +@item clone() emulation using native CPU clone() to use Linux scheduler for threads. + +@item Accurate signal handling by remapping host signals to target signals. +@end itemize +@end itemize + +QEMU full system emulation features: +@itemize +@item QEMU can either use a full software MMU for maximum portability or use the host system call mmap() to simulate the target MMU. +@end itemize + +@section x86 emulation + +QEMU x86 target features: + +@itemize + +@item The virtual x86 CPU supports 16 bit and 32 bit addressing with segmentation. +LDT/GDT and IDT are emulated. VM86 mode is also supported to run DOSEMU. + +@item Support of host page sizes bigger than 4KB in user mode emulation. + +@item QEMU can emulate itself on x86. + +@item An extensive Linux x86 CPU test program is included @file{tests/test-i386}. +It can be used to test other x86 virtual CPUs. + +@end itemize + +Current QEMU limitations: + +@itemize + +@item No SSE/MMX support (yet). + +@item No x86-64 support. + +@item IPC syscalls are missing. + +@item The x86 segment limits and access rights are not tested at every +memory access (yet). Hopefully, very few OSes seem to rely on that for +normal use. + +@item On non x86 host CPUs, @code{double}s are used instead of the non standard +10 byte @code{long double}s of x86 for floating point emulation to get +maximum performances. + +@end itemize + +@section ARM emulation + +@itemize + +@item Full ARM 7 user emulation. + +@item NWFPE FPU support included in user Linux emulation. + +@item Can run most ARM Linux binaries. + +@end itemize + +@section PowerPC emulation + +@itemize + +@item Full PowerPC 32 bit emulation, including priviledged instructions, +FPU and MMU. + +@item Can run most PowerPC Linux binaries. + +@end itemize + +@section SPARC emulation + +@itemize + +@item SPARC V8 user support, except FPU instructions. + +@item Can run some SPARC Linux binaries. + +@end itemize + +@chapter QEMU Internals + +@section QEMU compared to other emulators + +Like bochs [3], QEMU emulates an x86 CPU. But QEMU is much faster than +bochs as it uses dynamic compilation. Bochs is closely tied to x86 PC +emulation while QEMU can emulate several processors. + +Like Valgrind [2], QEMU does user space emulation and dynamic +translation. Valgrind is mainly a memory debugger while QEMU has no +support for it (QEMU could be used to detect out of bound memory +accesses as Valgrind, but it has no support to track uninitialised data +as Valgrind does). The Valgrind dynamic translator generates better code +than QEMU (in particular it does register allocation) but it is closely +tied to an x86 host and target and has no support for precise exceptions +and system emulation. + +EM86 [4] is the closest project to user space QEMU (and QEMU still uses +some of its code, in particular the ELF file loader). EM86 was limited +to an alpha host and used a proprietary and slow interpreter (the +interpreter part of the FX!32 Digital Win32 code translator [5]). + +TWIN [6] is a Windows API emulator like Wine. It is less accurate than +Wine but includes a protected mode x86 interpreter to launch x86 Windows +executables. Such an approach as greater potential because most of the +Windows API is executed natively but it is far more difficult to develop +because all the data structures and function parameters exchanged +between the API and the x86 code must be converted. + +User mode Linux [7] was the only solution before QEMU to launch a +Linux kernel as a process while not needing any host kernel +patches. However, user mode Linux requires heavy kernel patches while +QEMU accepts unpatched Linux kernels. The price to pay is that QEMU is +slower. + +The new Plex86 [8] PC virtualizer is done in the same spirit as the +qemu-fast system emulator. It requires a patched Linux kernel to work +(you cannot launch the same kernel on your PC), but the patches are +really small. As it is a PC virtualizer (no emulation is done except +for some priveledged instructions), it has the potential of being +faster than QEMU. The downside is that a complicated (and potentially +unsafe) host kernel patch is needed. + +The commercial PC Virtualizers (VMWare [9], VirtualPC [10], TwoOStwo +[11]) are faster than QEMU, but they all need specific, proprietary +and potentially unsafe host drivers. Moreover, they are unable to +provide cycle exact simulation as an emulator can. + +@section Portable dynamic translation + +QEMU is a dynamic translator. When it first encounters a piece of code, +it converts it to the host instruction set. Usually dynamic translators +are very complicated and highly CPU dependent. QEMU uses some tricks +which make it relatively easily portable and simple while achieving good +performances. + +The basic idea is to split every x86 instruction into fewer simpler +instructions. Each simple instruction is implemented by a piece of C +code (see @file{target-i386/op.c}). Then a compile time tool +(@file{dyngen}) takes the corresponding object file (@file{op.o}) +to generate a dynamic code generator which concatenates the simple +instructions to build a function (see @file{op.h:dyngen_code()}). + +In essence, the process is similar to [1], but more work is done at +compile time. + +A key idea to get optimal performances is that constant parameters can +be passed to the simple operations. For that purpose, dummy ELF +relocations are generated with gcc for each constant parameter. Then, +the tool (@file{dyngen}) can locate the relocations and generate the +appriopriate C code to resolve them when building the dynamic code. + +That way, QEMU is no more difficult to port than a dynamic linker. + +To go even faster, GCC static register variables are used to keep the +state of the virtual CPU. + +@section Register allocation + +Since QEMU uses fixed simple instructions, no efficient register +allocation can be done. However, because RISC CPUs have a lot of +register, most of the virtual CPU state can be put in registers without +doing complicated register allocation. + +@section Condition code optimisations + +Good CPU condition codes emulation (@code{EFLAGS} register on x86) is a +critical point to get good performances. QEMU uses lazy condition code +evaluation: instead of computing the condition codes after each x86 +instruction, it just stores one operand (called @code{CC_SRC}), the +result (called @code{CC_DST}) and the type of operation (called +@code{CC_OP}). + +@code{CC_OP} is almost never explicitely set in the generated code +because it is known at translation time. + +In order to increase performances, a backward pass is performed on the +generated simple instructions (see +@code{target-i386/translate.c:optimize_flags()}). When it can be proved that +the condition codes are not needed by the next instructions, no +condition codes are computed at all. + +@section CPU state optimisations + +The x86 CPU has many internal states which change the way it evaluates +instructions. In order to achieve a good speed, the translation phase +considers that some state information of the virtual x86 CPU cannot +change in it. For example, if the SS, DS and ES segments have a zero +base, then the translator does not even generate an addition for the +segment base. + +[The FPU stack pointer register is not handled that way yet]. + +@section Translation cache + +A 2MByte cache holds the most recently used translations. For +simplicity, it is completely flushed when it is full. A translation unit +contains just a single basic block (a block of x86 instructions +terminated by a jump or by a virtual CPU state change which the +translator cannot deduce statically). + +@section Direct block chaining + +After each translated basic block is executed, QEMU uses the simulated +Program Counter (PC) and other cpu state informations (such as the CS +segment base value) to find the next basic block. + +In order to accelerate the most common cases where the new simulated PC +is known, QEMU can patch a basic block so that it jumps directly to the +next one. + +The most portable code uses an indirect jump. An indirect jump makes +it easier to make the jump target modification atomic. On some host +architectures (such as x86 or PowerPC), the @code{JUMP} opcode is +directly patched so that the block chaining has no overhead. + +@section Self-modifying code and translated code invalidation + +Self-modifying code is a special challenge in x86 emulation because no +instruction cache invalidation is signaled by the application when code +is modified. + +When translated code is generated for a basic block, the corresponding +host page is write protected if it is not already read-only (with the +system call @code{mprotect()}). Then, if a write access is done to the +page, Linux raises a SEGV signal. QEMU then invalidates all the +translated code in the page and enables write accesses to the page. + +Correct translated code invalidation is done efficiently by maintaining +a linked list of every translated block contained in a given page. Other +linked lists are also maintained to undo direct block chaining. + +Although the overhead of doing @code{mprotect()} calls is important, +most MSDOS programs can be emulated at reasonnable speed with QEMU and +DOSEMU. + +Note that QEMU also invalidates pages of translated code when it detects +that memory mappings are modified with @code{mmap()} or @code{munmap()}. + +When using a software MMU, the code invalidation is more efficient: if +a given code page is invalidated too often because of write accesses, +then a bitmap representing all the code inside the page is +built. Every store into that page checks the bitmap to see if the code +really needs to be invalidated. It avoids invalidating the code when +only data is modified in the page. + +@section Exception support + +longjmp() is used when an exception such as division by zero is +encountered. + +The host SIGSEGV and SIGBUS signal handlers are used to get invalid +memory accesses. The exact CPU state can be retrieved because all the +x86 registers are stored in fixed host registers. The simulated program +counter is found by retranslating the corresponding basic block and by +looking where the host program counter was at the exception point. + +The virtual CPU cannot retrieve the exact @code{EFLAGS} register because +in some cases it is not computed because of condition code +optimisations. It is not a big concern because the emulated code can +still be restarted in any cases. + +@section MMU emulation + +For system emulation, QEMU uses the mmap() system call to emulate the +target CPU MMU. It works as long the emulated OS does not use an area +reserved by the host OS (such as the area above 0xc0000000 on x86 +Linux). + +In order to be able to launch any OS, QEMU also supports a soft +MMU. In that mode, the MMU virtual to physical address translation is +done at every memory access. QEMU uses an address translation cache to +speed up the translation. + +In order to avoid flushing the translated code each time the MMU +mappings change, QEMU uses a physically indexed translation cache. It +means that each basic block is indexed with its physical address. + +When MMU mappings change, only the chaining of the basic blocks is +reset (i.e. a basic block can no longer jump directly to another one). + +@section Hardware interrupts + +In order to be faster, QEMU does not check at every basic block if an +hardware interrupt is pending. Instead, the user must asynchrously +call a specific function to tell that an interrupt is pending. This +function resets the chaining of the currently executing basic +block. It ensures that the execution will return soon in the main loop +of the CPU emulator. Then the main loop can test if the interrupt is +pending and handle it. + +@section User emulation specific details + +@subsection Linux system call translation + +QEMU includes a generic system call translator for Linux. It means that +the parameters of the system calls can be converted to fix the +endianness and 32/64 bit issues. The IOCTLs are converted with a generic +type description system (see @file{ioctls.h} and @file{thunk.c}). + +QEMU supports host CPUs which have pages bigger than 4KB. It records all +the mappings the process does and try to emulated the @code{mmap()} +system calls in cases where the host @code{mmap()} call would fail +because of bad page alignment. + +@subsection Linux signals + +Normal and real-time signals are queued along with their information +(@code{siginfo_t}) as it is done in the Linux kernel. Then an interrupt +request is done to the virtual CPU. When it is interrupted, one queued +signal is handled by generating a stack frame in the virtual CPU as the +Linux kernel does. The @code{sigreturn()} system call is emulated to return +from the virtual signal handler. + +Some signals (such as SIGALRM) directly come from the host. Other +signals are synthetized from the virtual CPU exceptions such as SIGFPE +when a division by zero is done (see @code{main.c:cpu_loop()}). + +The blocked signal mask is still handled by the host Linux kernel so +that most signal system calls can be redirected directly to the host +Linux kernel. Only the @code{sigaction()} and @code{sigreturn()} system +calls need to be fully emulated (see @file{signal.c}). + +@subsection clone() system call and threads + +The Linux clone() system call is usually used to create a thread. QEMU +uses the host clone() system call so that real host threads are created +for each emulated thread. One virtual CPU instance is created for each +thread. + +The virtual x86 CPU atomic operations are emulated with a global lock so +that their semantic is preserved. + +Note that currently there are still some locking issues in QEMU. In +particular, the translated cache flush is not protected yet against +reentrancy. + +@subsection Self-virtualization + +QEMU was conceived so that ultimately it can emulate itself. Although +it is not very useful, it is an important test to show the power of the +emulator. + +Achieving self-virtualization is not easy because there may be address +space conflicts. QEMU solves this problem by being an executable ELF +shared object as the ld-linux.so ELF interpreter. That way, it can be +relocated at load time. + +@section Bibliography + +@table @asis + +@item [1] +@url{http://citeseer.nj.nec.com/piumarta98optimizing.html}, Optimizing +direct threaded code by selective inlining (1998) by Ian Piumarta, Fabio +Riccardi. + +@item [2] +@url{http://developer.kde.org/~sewardj/}, Valgrind, an open-source +memory debugger for x86-GNU/Linux, by Julian Seward. + +@item [3] +@url{http://bochs.sourceforge.net/}, the Bochs IA-32 Emulator Project, +by Kevin Lawton et al. + +@item [4] +@url{http://www.cs.rose-hulman.edu/~donaldlf/em86/index.html}, the EM86 +x86 emulator on Alpha-Linux. + +@item [5] +@url{http://www.usenix.org/publications/library/proceedings/usenix-nt97/full_papers/chernoff/chernoff.pdf}, +DIGITAL FX!32: Running 32-Bit x86 Applications on Alpha NT, by Anton +Chernoff and Ray Hookway. + +@item [6] +@url{http://www.willows.com/}, Windows API library emulation from +Willows Software. + +@item [7] +@url{http://user-mode-linux.sourceforge.net/}, +The User-mode Linux Kernel. + +@item [8] +@url{http://www.plex86.org/}, +The new Plex86 project. + +@item [9] +@url{http://www.vmware.com/}, +The VMWare PC virtualizer. + +@item [10] +@url{http://www.microsoft.com/windowsxp/virtualpc/}, +The VirtualPC PC virtualizer. + +@item [11] +@url{http://www.twoostwo.org/}, +The TwoOStwo PC virtualizer. + +@end table + +@chapter Regression Tests + +In the directory @file{tests/}, various interesting testing programs +are available. There are used for regression testing. + +@section @file{test-i386} + +This program executes most of the 16 bit and 32 bit x86 instructions and +generates a text output. It can be compared with the output obtained with +a real CPU or another emulator. The target @code{make test} runs this +program and a @code{diff} on the generated output. + +The Linux system call @code{modify_ldt()} is used to create x86 selectors +to test some 16 bit addressing and 32 bit with segmentation cases. + +The Linux system call @code{vm86()} is used to test vm86 emulation. + +Various exceptions are raised to test most of the x86 user space +exception reporting. + +@section @file{linux-test} + +This program tests various Linux system calls. It is used to verify +that the system call parameters are correctly converted between target +and host CPUs. + +@section @file{hello-i386} + +Very simple statically linked x86 program, just to test QEMU during a +port to a new host CPU. + +@section @file{hello-arm} + +Very simple statically linked ARM program, just to test QEMU during a +port to a new host CPU. + +@section @file{sha1} + +It is a simple benchmark. Care must be taken to interpret the results +because it mostly tests the ability of the virtual CPU to optimize the +@code{rol} x86 instruction and the condition code computations. +