From 9157eee1b1c076ff3316361b760e891dda13e9bf Mon Sep 17 00:00:00 2001 From: Miroslav Rezanina Date: Wed, 13 May 2015 11:39:30 +0200 Subject: [PATCH 01/62] Move parallel_hds_isa_init to hw/isa/isa-bus.c Disabling CONFIG_PARALLEL cause removing parallel_hds_isa_init defined in parallel.c. This function is called during initialization of some boards so disabling CONFIG_PARALLEL cause build failure. This patch moves parallel_hds_isa_init to hw/isa/isa-bus.c so it is included in case of disabled CONFIG_PARALLEL. Build is successful but qemu will abort with "Unknown device" error when function is called. Signed-off-by: Miroslav Rezanina Message-Id: <1431509970-32154-1-git-send-email-mrezanin@redhat.com> Reviewed-by: Markus Armbruster Signed-off-by: Paolo Bonzini --- hw/char/parallel.c | 25 ------------------------- hw/isa/isa-bus.c | 26 ++++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 25 deletions(-) diff --git a/hw/char/parallel.c b/hw/char/parallel.c index 4079554bb9..c2b553f0d1 100644 --- a/hw/char/parallel.c +++ b/hw/char/parallel.c @@ -641,28 +641,3 @@ static void parallel_register_types(void) } type_init(parallel_register_types) - -static void parallel_init(ISABus *bus, int index, CharDriverState *chr) -{ - DeviceState *dev; - ISADevice *isadev; - - isadev = isa_create(bus, "isa-parallel"); - dev = DEVICE(isadev); - qdev_prop_set_uint32(dev, "index", index); - qdev_prop_set_chr(dev, "chardev", chr); - qdev_init_nofail(dev); -} - -void parallel_hds_isa_init(ISABus *bus, int n) -{ - int i; - - assert(n <= MAX_PARALLEL_PORTS); - - for (i = 0; i < n; i++) { - if (parallel_hds[i]) { - parallel_init(bus, i, parallel_hds[i]); - } - } -} diff --git a/hw/isa/isa-bus.c b/hw/isa/isa-bus.c index 825aa627df..ec8e7decfc 100644 --- a/hw/isa/isa-bus.c +++ b/hw/isa/isa-bus.c @@ -21,6 +21,7 @@ #include "hw/sysbus.h" #include "sysemu/sysemu.h" #include "hw/isa/isa.h" +#include "hw/i386/pc.h" static ISABus *isabus; @@ -267,3 +268,28 @@ MemoryRegion *isa_address_space_io(ISADevice *dev) } type_init(isabus_register_types) + +static void parallel_init(ISABus *bus, int index, CharDriverState *chr) +{ + DeviceState *dev; + ISADevice *isadev; + + isadev = isa_create(bus, "isa-parallel"); + dev = DEVICE(isadev); + qdev_prop_set_uint32(dev, "index", index); + qdev_prop_set_chr(dev, "chardev", chr); + qdev_init_nofail(dev); +} + +void parallel_hds_isa_init(ISABus *bus, int n) +{ + int i; + + assert(n <= MAX_PARALLEL_PORTS); + + for (i = 0; i < n; i++) { + if (parallel_hds[i]) { + parallel_init(bus, i, parallel_hds[i]); + } + } +} From dae02ba55a66cb3194a2410c7725734e5bc6166f Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Mon, 18 May 2015 21:06:47 +0200 Subject: [PATCH 02/62] ppc: add helpful message when KVM fails to start VCPU On POWER8 systems, KVM checks if VCPU is running on primary threads, and that secondary threads are offline. If this is not the case, ioctl() fails with errno set to EBUSY. QEMU aborts with a non explicit error message: $ ./qemu-system-ppc64 --nographic -machine pseries,accel=kvm error: kvm run failed Device or resource busy To help user to diagnose the problem, this patch adds an informative error message. There is no easy way to check if SMT is enabled before starting the VCPU, and as this case is the only one setting errno to EBUSY, we just check the errno value to display a message. Signed-off-by: Laurent Vivier Message-Id: <1431976007-20503-1-git-send-email-lvivier@redhat.com> Signed-off-by: Paolo Bonzini --- kvm-all.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/kvm-all.c b/kvm-all.c index b2b1bc3359..44a34a52d8 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -1828,6 +1828,14 @@ int kvm_cpu_exec(CPUState *cpu) } fprintf(stderr, "error: kvm run failed %s\n", strerror(-run_ret)); +#ifdef TARGET_PPC + if (run_ret == -EBUSY) { + fprintf(stderr, + "This is probably because your SMT is enabled.\n" + "VCPU can only run on primary threads with all " + "secondary threads offline.\n"); + } +#endif ret = -1; break; } From e4afbf4fb4d026510700cb40bb72dea9aef14e3b Mon Sep 17 00:00:00 2001 From: Fam Zheng Date: Tue, 19 May 2015 10:50:59 +0000 Subject: [PATCH 03/62] qemu-nbd: Switch to qemu_set_fd_handler Achieved by: - Remembering the server fd with a global variable, in order to access it from nbd_client_closed. - Checking nbd_can_accept() and updating server_fd handler whenever client connects or disconnects. Signed-off-by: Fam Zheng Reviewed-by: Paolo Bonzini Message-Id: <1432032670-15124-3-git-send-email-famz@redhat.com> Signed-off-by: Paolo Bonzini --- qemu-nbd.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/qemu-nbd.c b/qemu-nbd.c index 7e690fff7e..5af6d11e33 100644 --- a/qemu-nbd.c +++ b/qemu-nbd.c @@ -53,6 +53,7 @@ static int persistent = 0; static enum { RUNNING, TERMINATE, TERMINATING, TERMINATED } state; static int shared = 1; static int nb_fds; +static int server_fd; static void usage(const char *name) { @@ -340,7 +341,7 @@ out: return (void *) EXIT_FAILURE; } -static int nbd_can_accept(void *opaque) +static int nbd_can_accept(void) { return nb_fds < shared; } @@ -351,19 +352,21 @@ static void nbd_export_closed(NBDExport *exp) state = TERMINATED; } +static void nbd_update_server_fd_handler(int fd); + static void nbd_client_closed(NBDClient *client) { nb_fds--; if (nb_fds == 0 && !persistent && state == RUNNING) { state = TERMINATE; } + nbd_update_server_fd_handler(server_fd); qemu_notify_event(); nbd_client_put(client); } static void nbd_accept(void *opaque) { - int server_fd = (uintptr_t) opaque; struct sockaddr_in addr; socklen_t addr_len = sizeof(addr); @@ -380,12 +383,22 @@ static void nbd_accept(void *opaque) if (nbd_client_new(exp, fd, nbd_client_closed)) { nb_fds++; + nbd_update_server_fd_handler(server_fd); } else { shutdown(fd, 2); close(fd); } } +static void nbd_update_server_fd_handler(int fd) +{ + if (nbd_can_accept()) { + qemu_set_fd_handler(fd, nbd_accept, NULL, (void *)(uintptr_t)fd); + } else { + qemu_set_fd_handler(fd, NULL, NULL, NULL); + } +} + int main(int argc, char **argv) { BlockBackend *blk; @@ -761,8 +774,8 @@ int main(int argc, char **argv) memset(&client_thread, 0, sizeof(client_thread)); } - qemu_set_fd_handler2(fd, nbd_can_accept, nbd_accept, NULL, - (void *)(uintptr_t)fd); + server_fd = fd; + nbd_update_server_fd_handler(fd); /* now when the initialization is (almost) complete, chdir("/") * to free any busy filesystems */ From db94604b20278c1dc227a04e4c564d80230e6c3f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 21 May 2015 15:12:29 +0200 Subject: [PATCH 04/62] exec: optimize phys_page_set_level phys_page_set_level is writing zeroes to a struct that has just been filled in by phys_map_node_alloc. Instead, tell phys_map_node_alloc whether to fill in the page "as a leaf" or "as a non-leaf". memcpy is faster than struct assignment, which copies each bitfield individually. A compiler bug (https://gcc.gnu.org/PR66391), and small memcpys like this one are special-cased anyway, and optimized to a register move, so just use the memcpy. This cuts the cost of phys_page_set_level from 25% to 5% when booting qboot. Reviewed-by: Stefan Hajnoczi Signed-off-by: Paolo Bonzini --- exec.c | 24 ++++++++++-------------- 1 file changed, 10 insertions(+), 14 deletions(-) diff --git a/exec.c b/exec.c index e19ab22cd6..fc8d05d50a 100644 --- a/exec.c +++ b/exec.c @@ -173,17 +173,22 @@ static void phys_map_node_reserve(PhysPageMap *map, unsigned nodes) } } -static uint32_t phys_map_node_alloc(PhysPageMap *map) +static uint32_t phys_map_node_alloc(PhysPageMap *map, bool leaf) { unsigned i; uint32_t ret; + PhysPageEntry e; + PhysPageEntry *p; ret = map->nodes_nb++; + p = map->nodes[ret]; assert(ret != PHYS_MAP_NODE_NIL); assert(ret != map->nodes_nb_alloc); + + e.skip = leaf ? 0 : 1; + e.ptr = leaf ? PHYS_SECTION_UNASSIGNED : PHYS_MAP_NODE_NIL; for (i = 0; i < P_L2_SIZE; ++i) { - map->nodes[ret][i].skip = 1; - map->nodes[ret][i].ptr = PHYS_MAP_NODE_NIL; + memcpy(&p[i], &e, sizeof(e)); } return ret; } @@ -193,21 +198,12 @@ static void phys_page_set_level(PhysPageMap *map, PhysPageEntry *lp, int level) { PhysPageEntry *p; - int i; hwaddr step = (hwaddr)1 << (level * P_L2_BITS); if (lp->skip && lp->ptr == PHYS_MAP_NODE_NIL) { - lp->ptr = phys_map_node_alloc(map); - p = map->nodes[lp->ptr]; - if (level == 0) { - for (i = 0; i < P_L2_SIZE; i++) { - p[i].skip = 0; - p[i].ptr = PHYS_SECTION_UNASSIGNED; - } - } - } else { - p = map->nodes[lp->ptr]; + lp->ptr = phys_map_node_alloc(map, level == 0); } + p = map->nodes[lp->ptr]; lp = &p[(*index >> (level * P_L2_BITS)) & (P_L2_SIZE - 1)]; while (*nb && lp < &p[P_L2_SIZE]) { From 6b4ad3b28d4a70ad93f287b50200b04766aeb0de Mon Sep 17 00:00:00 2001 From: Peter Crosthwaite Date: Mon, 25 May 2015 22:38:06 -0700 Subject: [PATCH 05/62] Makefile.target: set master BUILD_DIR make can be invoked in the individual build dirs to build an individual target or just a single file of a target. e.g. touch translate-all.c make -C microblazeel-softmmu translate-all.o There is however a small bug when using the pixman submodule. config-host.mak will ref BUILD_DIR for the pixman -I CFLAGS: grep BUILD_DIR config-host.mak QEMU_CFLAGS=-I$(SRC_PATH)/pixman/pixman -I$(BUILD_DIR)/pixman/pixman ... This causes a build failure as -I/pixman/pixman (BUILD_DIR=="") will not be found. BUILD_DIR is usually set by the top level Makefile. Just lazy-set it in Makefile.target to the parent directory. Granted, this will not work if the pixman submodule is not prebuilt, but it at least means you can do incremental partial builds once you have done your initial full build (or attempt) from the top level. The next step would be refactor make infrastructure to rebuild pixman on a submake like the one above. Cc: Gerd Hoffmann Signed-off-by: Peter Crosthwaite Message-Id: <1432618686-16077-1-git-send-email-crosthwaite.peter@gmail.com> Signed-off-by: Paolo Bonzini --- Makefile.target | 2 ++ 1 file changed, 2 insertions(+) diff --git a/Makefile.target b/Makefile.target index 3e861c8413..ec5b92cb60 100644 --- a/Makefile.target +++ b/Makefile.target @@ -1,5 +1,7 @@ # -*- Mode: makefile -*- +BUILD_DIR?=$(CURDIR)/.. + include ../config-host.mak include config-target.mak include config-devices.mak From dbddac6da01a13c9d5d162994a0a265173acecab Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 23 Mar 2015 10:31:53 +0100 Subject: [PATCH 06/62] memory: the only dirty memory flag for users is DIRTY_MEMORY_VGA DIRTY_MEMORY_MIGRATION is triggered by memory_global_dirty_log_start and memory_global_dirty_log_stop, so it cannot be used with memory_region_set_log. Specify this in the documentation and assert it. Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- include/exec/memory.h | 3 +-- memory.c | 1 + 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/include/exec/memory.h b/include/exec/memory.h index b61c84f62a..7b0929d54f 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -647,8 +647,7 @@ void memory_region_ram_resize(MemoryRegion *mr, ram_addr_t newsize, * * @mr: the memory region being updated. * @log: whether dirty logging is to be enabled or disabled. - * @client: the user of the logging information; %DIRTY_MEMORY_MIGRATION or - * %DIRTY_MEMORY_VGA. + * @client: the user of the logging information; %DIRTY_MEMORY_VGA only. */ void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client); diff --git a/memory.c b/memory.c index 03c536b857..a8f8599079 100644 --- a/memory.c +++ b/memory.c @@ -1425,6 +1425,7 @@ void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client) { uint8_t mask = 1 << client; + assert(client == DIRTY_MEMORY_VGA); memory_region_transaction_begin(); mr->dirty_log_mask = (mr->dirty_log_mask & ~mask) | (log * mask); memory_region_update_pending |= mr->enabled; From 086f90e890fb25e7f12fbe72fe5a8078792398aa Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 22 Apr 2015 12:43:24 +0200 Subject: [PATCH 07/62] g364fb: remove pointless call to memory_region_set_coalescing Coalescing work on MMIO, not RAM, thus this call has no effect. Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- hw/display/g364fb.c | 1 - 1 file changed, 1 deletion(-) diff --git a/hw/display/g364fb.c b/hw/display/g364fb.c index 46f7b41211..6543f2f4a3 100644 --- a/hw/display/g364fb.c +++ b/hw/display/g364fb.c @@ -489,7 +489,6 @@ static void g364fb_init(DeviceState *dev, G364State *s) memory_region_init_ram_ptr(&s->mem_vram, NULL, "vram", s->vram_size, s->vram); vmstate_register_ram(&s->mem_vram, dev); - memory_region_set_coalescing(&s->mem_vram); } #define TYPE_G364 "sysbus-g364" From 74259ae55b15bff4ef7b26faa6431a3ff16d7c9d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 23 Mar 2015 10:47:45 +0100 Subject: [PATCH 08/62] display: enable DIRTY_MEMORY_VGA tracking explicitly This will be required soon by the memory core. Tested-by: Aurelien Jarno Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- hw/display/cg3.c | 1 + hw/display/exynos4210_fimd.c | 20 +++++++++++++------- hw/display/g364fb.c | 1 + hw/display/sm501.c | 1 + hw/display/tcx.c | 1 + 5 files changed, 17 insertions(+), 7 deletions(-) diff --git a/hw/display/cg3.c b/hw/display/cg3.c index 1e6ff2b546..cbcf518739 100644 --- a/hw/display/cg3.c +++ b/hw/display/cg3.c @@ -309,6 +309,7 @@ static void cg3_realizefn(DeviceState *dev, Error **errp) memory_region_init_ram(&s->vram_mem, NULL, "cg3.vram", s->vram_size, &error_abort); + memory_region_set_log(&s->vram_mem, true, DIRTY_MEMORY_VGA); vmstate_register_ram_global(&s->vram_mem); sysbus_init_mmio(sbd, &s->vram_mem); diff --git a/hw/display/exynos4210_fimd.c b/hw/display/exynos4210_fimd.c index 45c62afac1..72b3a1d063 100644 --- a/hw/display/exynos4210_fimd.c +++ b/hw/display/exynos4210_fimd.c @@ -1109,6 +1109,12 @@ static inline int fimd_get_buffer_id(Exynos4210fimdWindow *w) } } +static void exynos4210_fimd_invalidate(void *opaque) +{ + Exynos4210fimdState *s = (Exynos4210fimdState *)opaque; + s->invalidate = true; +} + /* Updates specified window's MemorySection based on values of WINCON, * VIDOSDA, VIDOSDB, VIDWADDx and SHADOWCON registers */ static void fimd_update_memory_section(Exynos4210fimdState *s, unsigned win) @@ -1136,7 +1142,11 @@ static void fimd_update_memory_section(Exynos4210fimdState *s, unsigned win) /* TODO: add .exit and unref the region there. Not needed yet since sysbus * does not support hot-unplug. */ - memory_region_unref(w->mem_section.mr); + if (w->mem_section.mr) { + memory_region_set_log(w->mem_section.mr, false, DIRTY_MEMORY_VGA); + memory_region_unref(w->mem_section.mr); + } + w->mem_section = memory_region_find(sysbus_address_space(sbd), fb_start_addr, w->fb_len); assert(w->mem_section.mr); @@ -1162,6 +1172,8 @@ static void fimd_update_memory_section(Exynos4210fimdState *s, unsigned win) cpu_physical_memory_unmap(w->host_fb_addr, fb_mapped_len, 0, 0); goto error_return; } + memory_region_set_log(w->mem_section.mr, true, DIRTY_MEMORY_VGA); + exynos4210_fimd_invalidate(s); return; error_return: @@ -1224,12 +1236,6 @@ static void exynos4210_fimd_update_irq(Exynos4210fimdState *s) } } -static void exynos4210_fimd_invalidate(void *opaque) -{ - Exynos4210fimdState *s = (Exynos4210fimdState *)opaque; - s->invalidate = true; -} - static void exynos4210_update_resolution(Exynos4210fimdState *s) { DisplaySurface *surface = qemu_console_surface(s->console); diff --git a/hw/display/g364fb.c b/hw/display/g364fb.c index 6543f2f4a3..be62dd6284 100644 --- a/hw/display/g364fb.c +++ b/hw/display/g364fb.c @@ -489,6 +489,7 @@ static void g364fb_init(DeviceState *dev, G364State *s) memory_region_init_ram_ptr(&s->mem_vram, NULL, "vram", s->vram_size, s->vram); vmstate_register_ram(&s->mem_vram, dev); + memory_region_set_log(&s->mem_vram, true, DIRTY_MEMORY_VGA); } #define TYPE_G364 "sysbus-g364" diff --git a/hw/display/sm501.c b/hw/display/sm501.c index c72154b6f1..43f8538f56 100644 --- a/hw/display/sm501.c +++ b/hw/display/sm501.c @@ -1412,6 +1412,7 @@ void sm501_init(MemoryRegion *address_space_mem, uint32_t base, memory_region_init_ram(&s->local_mem_region, NULL, "sm501.local", local_mem_bytes, &error_abort); vmstate_register_ram_global(&s->local_mem_region); + memory_region_set_log(&s->local_mem_region, true, DIRTY_MEMORY_VGA); s->local_mem = memory_region_get_ram_ptr(&s->local_mem_region); memory_region_add_subregion(address_space_mem, base, &s->local_mem_region); diff --git a/hw/display/tcx.c b/hw/display/tcx.c index a9f9f66d15..58faa96af5 100644 --- a/hw/display/tcx.c +++ b/hw/display/tcx.c @@ -1006,6 +1006,7 @@ static void tcx_realizefn(DeviceState *dev, Error **errp) memory_region_init_ram(&s->vram_mem, OBJECT(s), "tcx.vram", s->vram_size * (1 + 4 + 4), &error_abort); vmstate_register_ram_global(&s->vram_mem); + memory_region_set_log(&s->vram_mem, true, DIRTY_MEMORY_VGA); vram_base = memory_region_get_ram_ptr(&s->vram_mem); /* 10/ROM : FCode ROM */ From 5299c0f2cf951c23ec681ff87e455d1cf4ec537b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 22 Apr 2015 13:12:40 +0200 Subject: [PATCH 09/62] display: add memory_region_sync_dirty_bitmap calls These are strictly speaking only needed for KVM and Xen, but it's still nice to be consistent. Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- hw/display/cg3.c | 1 + hw/display/g364fb.c | 1 + hw/display/sm501.c | 1 + hw/display/tcx.c | 2 ++ 4 files changed, 5 insertions(+) diff --git a/hw/display/cg3.c b/hw/display/cg3.c index cbcf518739..b94e5e0d78 100644 --- a/hw/display/cg3.c +++ b/hw/display/cg3.c @@ -106,6 +106,7 @@ static void cg3_update_display(void *opaque) pix = memory_region_get_ram_ptr(&s->vram_mem); data = (uint32_t *)surface_data(surface); + memory_region_sync_dirty_bitmap(&s->vram_mem); for (y = 0; y < height; y++) { int update = s->full_update; diff --git a/hw/display/g364fb.c b/hw/display/g364fb.c index be62dd6284..52a9733bfd 100644 --- a/hw/display/g364fb.c +++ b/hw/display/g364fb.c @@ -260,6 +260,7 @@ static void g364fb_update_display(void *opaque) qemu_console_resize(s->con, s->width, s->height); } + memory_region_sync_dirty_bitmap(&s->mem_vram); if (s->ctla & CTLA_FORCE_BLANK) { g364fb_draw_blank(s); } else if (s->depth == 8) { diff --git a/hw/display/sm501.c b/hw/display/sm501.c index 43f8538f56..15a5ba8000 100644 --- a/hw/display/sm501.c +++ b/hw/display/sm501.c @@ -1322,6 +1322,7 @@ static void sm501_draw_crt(SM501State * s) } /* draw each line according to conditions */ + memory_region_sync_dirty_bitmap(&s->local_mem_region); for (y = 0; y < height; y++) { int update_hwc = draw_hwc_line ? within_hwc_y_range(s, y, 1) : 0; int update = full_update || update_hwc; diff --git a/hw/display/tcx.c b/hw/display/tcx.c index 58faa96af5..f3faf78bf8 100644 --- a/hw/display/tcx.c +++ b/hw/display/tcx.c @@ -353,6 +353,7 @@ static void tcx_update_display(void *opaque) return; } + memory_region_sync_dirty_bitmap(&ts->vram_mem); for (y = 0; y < ts->height; page += TARGET_PAGE_SIZE) { if (memory_region_get_dirty(&ts->vram_mem, page, TARGET_PAGE_SIZE, DIRTY_MEMORY_VGA)) { @@ -446,6 +447,7 @@ static void tcx24_update_display(void *opaque) dd = surface_stride(surface); ds = 1024; + memory_region_sync_dirty_bitmap(&ts->vram_mem); for (y = 0; y < ts->height; page += TARGET_PAGE_SIZE, page24 += TARGET_PAGE_SIZE, cpage += TARGET_PAGE_SIZE) { if (tcx24_check_dirty(ts, page, page24, cpage)) { From 2d1a35bef0ed96b3f23535e459c552414ccdbafd Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 23 Mar 2015 10:50:57 +0100 Subject: [PATCH 10/62] memory: differentiate memory_region_is_logging and memory_region_get_dirty_log_mask For now memory regions only track DIRTY_MEMORY_VGA individually, but this will change soon. To support this, split memory_region_is_logging in two functions: one that returns a given bit from dirty_log_mask, and one that returns the entire mask. memory_region_is_logging gets an extra parameter so that the compiler flags misuse. While VGA-specific users (including the Xen listener!) will want to keep checking that bit, KVM and vhost check for "any bit except migration" (because migration is handled via the global start/stop listener callbacks). Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- hw/display/vmware_vga.c | 2 +- hw/virtio/dataplane/vring.c | 2 +- hw/virtio/vhost.c | 3 ++- include/exec/memory.h | 16 ++++++++++++++-- kvm-all.c | 3 ++- memory.c | 7 ++++++- xen-hvm.c | 2 +- 7 files changed, 27 insertions(+), 8 deletions(-) diff --git a/hw/display/vmware_vga.c b/hw/display/vmware_vga.c index c17ddd1fcd..7f397d3c2e 100644 --- a/hw/display/vmware_vga.c +++ b/hw/display/vmware_vga.c @@ -1124,7 +1124,7 @@ static void vmsvga_update_display(void *opaque) * Is it more efficient to look at vram VGA-dirty bits or wait * for the driver to issue SVGA_CMD_UPDATE? */ - if (memory_region_is_logging(&s->vga.vram)) { + if (memory_region_is_logging(&s->vga.vram, DIRTY_MEMORY_VGA)) { vga_sync_dirty_bitmap(&s->vga); dirty = memory_region_get_dirty(&s->vga.vram, 0, surface_stride(surface) * surface_height(surface), diff --git a/hw/virtio/dataplane/vring.c b/hw/virtio/dataplane/vring.c index 5c7b8c20fa..e37873384f 100644 --- a/hw/virtio/dataplane/vring.c +++ b/hw/virtio/dataplane/vring.c @@ -42,7 +42,7 @@ static void *vring_map(MemoryRegion **mr, hwaddr phys, hwaddr len, } /* Ignore regions with dirty logging, we cannot mark them dirty */ - if (memory_region_is_logging(section.mr)) { + if (memory_region_get_dirty_log_mask(section.mr)) { goto out; } diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 01f1e0490f..53d23d281d 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -416,7 +416,8 @@ static void vhost_set_memory(MemoryListener *listener, memory_listener); hwaddr start_addr = section->offset_within_address_space; ram_addr_t size = int128_get64(section->size); - bool log_dirty = memory_region_is_logging(section->mr); + bool log_dirty = + memory_region_get_dirty_log_mask(section->mr) & ~(1 << DIRTY_MEMORY_MIGRATION); int s = offsetof(struct vhost_memory, regions) + (dev->mem->nregions + 1) * sizeof dev->mem->regions[0]; void *ram; diff --git a/include/exec/memory.h b/include/exec/memory.h index 7b0929d54f..156d506063 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -591,11 +591,23 @@ const char *memory_region_name(const MemoryRegion *mr); /** * memory_region_is_logging: return whether a memory region is logging writes * - * Returns %true if the memory region is logging writes + * Returns %true if the memory region is logging writes for the given client + * + * @mr: the memory region being queried + * @client: the client being queried + */ +bool memory_region_is_logging(MemoryRegion *mr, uint8_t client); + +/** + * memory_region_get_dirty_log_mask: return the clients for which a + * memory region is logging writes. + * + * Returns a bitmap of clients, which right now will be either 0 or + * (1 << DIRTY_MEMORY_VGA). * * @mr: the memory region being queried */ -bool memory_region_is_logging(MemoryRegion *mr); +uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr); /** * memory_region_is_rom: check whether a memory region is ROM diff --git a/kvm-all.c b/kvm-all.c index 44a34a52d8..d5416bbd74 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -663,7 +663,8 @@ static void kvm_set_phys_mem(MemoryRegionSection *section, bool add) KVMSlot *mem, old; int err; MemoryRegion *mr = section->mr; - bool log_dirty = memory_region_is_logging(mr); + bool log_dirty = + memory_region_get_dirty_log_mask(mr) & ~(1 << DIRTY_MEMORY_MIGRATION); bool writeable = !mr->readonly && !mr->rom_device; bool readonly_flag = mr->readonly || memory_region_is_romd(mr); hwaddr start_addr = section->offset_within_address_space; diff --git a/memory.c b/memory.c index a8f8599079..72e33e853b 100644 --- a/memory.c +++ b/memory.c @@ -1389,11 +1389,16 @@ bool memory_region_is_skip_dump(MemoryRegion *mr) return mr->skip_dump; } -bool memory_region_is_logging(MemoryRegion *mr) +uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr) { return mr->dirty_log_mask; } +bool memory_region_is_logging(MemoryRegion *mr, uint8_t client) +{ + return memory_region_get_dirty_log_mask(mr) & (1 << client); +} + bool memory_region_is_rom(MemoryRegion *mr) { return mr->ram && mr->readonly; diff --git a/xen-hvm.c b/xen-hvm.c index 315864ca70..338ab291f8 100644 --- a/xen-hvm.c +++ b/xen-hvm.c @@ -488,7 +488,7 @@ static void xen_set_memory(struct MemoryListener *listener, XenIOState *state = container_of(listener, XenIOState, memory_listener); hwaddr start_addr = section->offset_within_address_space; ram_addr_t size = int128_get64(section->size); - bool log_dirty = memory_region_is_logging(section->mr); + bool log_dirty = memory_region_is_logging(section->mr, DIRTY_MEMORY_VGA); hvmmem_type_t mem_type; if (section->mr == &ram_memory) { From b2dfd71c4843a762f2befe702adb249cf55baf66 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sat, 25 Apr 2015 14:38:30 +0200 Subject: [PATCH 11/62] memory: prepare for multiple bits in the dirty log mask When the dirty log mask will also cover other bits than DIRTY_MEMORY_VGA, some listeners may be interested in the overall zero/non-zero value of the dirty log mask; others may be interested in the value of single bits. For this reason, always call log_start/log_stop if bits have respectively appeared or disappeared, and pass the old and new values of the dirty log mask so that listeners can distinguish the kinds of change. For example, KVM checks if dirty logging used to be completely disabled (in log_start) or is now completely disabled (in log_stop). On the other hand, Xen has to check manually if DIRTY_MEMORY_VGA changed, since that is the only bit it cares about. Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- hw/virtio/vhost.c | 6 ++++-- include/exec/memory.h | 6 ++++-- kvm-all.c | 14 ++++++++++++-- memory.c | 17 +++++++++++------ xen-hvm.c | 20 +++++++++++++------- 5 files changed, 44 insertions(+), 19 deletions(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 53d23d281d..a7fe3c5104 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -676,13 +676,15 @@ static void vhost_log_global_stop(MemoryListener *listener) } static void vhost_log_start(MemoryListener *listener, - MemoryRegionSection *section) + MemoryRegionSection *section, + int old, int new) { /* FIXME: implement */ } static void vhost_log_stop(MemoryListener *listener, - MemoryRegionSection *section) + MemoryRegionSection *section, + int old, int new) { /* FIXME: implement */ } diff --git a/include/exec/memory.h b/include/exec/memory.h index 156d506063..55dc11d55c 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -206,8 +206,10 @@ struct MemoryListener { void (*region_add)(MemoryListener *listener, MemoryRegionSection *section); void (*region_del)(MemoryListener *listener, MemoryRegionSection *section); void (*region_nop)(MemoryListener *listener, MemoryRegionSection *section); - void (*log_start)(MemoryListener *listener, MemoryRegionSection *section); - void (*log_stop)(MemoryListener *listener, MemoryRegionSection *section); + void (*log_start)(MemoryListener *listener, MemoryRegionSection *section, + int old, int new); + void (*log_stop)(MemoryListener *listener, MemoryRegionSection *section, + int old, int new); void (*log_sync)(MemoryListener *listener, MemoryRegionSection *section); void (*log_global_start)(MemoryListener *listener); void (*log_global_stop)(MemoryListener *listener); diff --git a/kvm-all.c b/kvm-all.c index d5416bbd74..c713b22f8c 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -344,10 +344,15 @@ static int kvm_dirty_pages_log_change(hwaddr phys_addr, } static void kvm_log_start(MemoryListener *listener, - MemoryRegionSection *section) + MemoryRegionSection *section, + int old, int new) { int r; + if (old != 0) { + return; + } + r = kvm_dirty_pages_log_change(section->offset_within_address_space, int128_get64(section->size), true); if (r < 0) { @@ -356,10 +361,15 @@ static void kvm_log_start(MemoryListener *listener, } static void kvm_log_stop(MemoryListener *listener, - MemoryRegionSection *section) + MemoryRegionSection *section, + int old, int new) { int r; + if (new != 0) { + return; + } + r = kvm_dirty_pages_log_change(section->offset_within_address_space, int128_get64(section->size), false); if (r < 0) { diff --git a/memory.c b/memory.c index 72e33e853b..3e3d5b8ad6 100644 --- a/memory.c +++ b/memory.c @@ -152,7 +152,7 @@ static bool memory_listener_match(MemoryListener *listener, } while (0) /* No need to ref/unref .mr, the FlatRange keeps it alive. */ -#define MEMORY_LISTENER_UPDATE_REGION(fr, as, dir, callback) \ +#define MEMORY_LISTENER_UPDATE_REGION(fr, as, dir, callback, _args...) \ MEMORY_LISTENER_CALL(callback, dir, (&(MemoryRegionSection) { \ .mr = (fr)->mr, \ .address_space = (as), \ @@ -160,7 +160,7 @@ static bool memory_listener_match(MemoryListener *listener, .size = (fr)->addr.size, \ .offset_within_address_space = int128_get64((fr)->addr.start), \ .readonly = (fr)->readonly, \ - })) + }), ##_args) struct CoalescedMemoryRange { AddrRange addr; @@ -774,10 +774,15 @@ static void address_space_update_topology_pass(AddressSpace *as, if (adding) { MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, region_nop); - if (frold->dirty_log_mask && !frnew->dirty_log_mask) { - MEMORY_LISTENER_UPDATE_REGION(frnew, as, Reverse, log_stop); - } else if (frnew->dirty_log_mask && !frold->dirty_log_mask) { - MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, log_start); + if (frnew->dirty_log_mask & ~frold->dirty_log_mask) { + MEMORY_LISTENER_UPDATE_REGION(frnew, as, Forward, log_start, + frold->dirty_log_mask, + frnew->dirty_log_mask); + } + if (frold->dirty_log_mask & ~frnew->dirty_log_mask) { + MEMORY_LISTENER_UPDATE_REGION(frnew, as, Reverse, log_stop, + frold->dirty_log_mask, + frnew->dirty_log_mask); } } diff --git a/xen-hvm.c b/xen-hvm.c index 338ab291f8..42356b836a 100644 --- a/xen-hvm.c +++ b/xen-hvm.c @@ -646,21 +646,27 @@ static void xen_sync_dirty_bitmap(XenIOState *state, } static void xen_log_start(MemoryListener *listener, - MemoryRegionSection *section) + MemoryRegionSection *section, + int old, int new) { XenIOState *state = container_of(listener, XenIOState, memory_listener); - xen_sync_dirty_bitmap(state, section->offset_within_address_space, - int128_get64(section->size)); + if (new & ~old & (1 << DIRTY_MEMORY_VGA)) { + xen_sync_dirty_bitmap(state, section->offset_within_address_space, + int128_get64(section->size)); + } } -static void xen_log_stop(MemoryListener *listener, MemoryRegionSection *section) +static void xen_log_stop(MemoryListener *listener, MemoryRegionSection *section, + int old, int new) { XenIOState *state = container_of(listener, XenIOState, memory_listener); - state->log_for_dirtybit = NULL; - /* Disable dirty bit tracking */ - xc_hvm_track_dirty_vram(xen_xc, xen_domid, 0, 0, NULL); + if (old & ~new & (1 << DIRTY_MEMORY_VGA)) { + state->log_for_dirtybit = NULL; + /* Disable dirty bit tracking */ + xc_hvm_track_dirty_vram(xen_xc, xen_domid, 0, 0, NULL); + } } static void xen_log_sync(MemoryListener *listener, MemoryRegionSection *section) From d55d42078bfb507743747b761673507b95a76620 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 23 Mar 2015 10:46:52 +0100 Subject: [PATCH 12/62] framebuffer: check memory_region_is_logging framebuffer.c expects DIRTY_MEMORY_VGA logging to be always on, but that will not be the case soon. Because framebuffer.c computes the memory region on the fly for every update (with memory_region_find), it cannot enable/disable logging by itself. Instead, always treat updates as invalidations if dirty logging is not enabled, assuming that the board will enable logging on the RAM region that includes the framebuffer. Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- hw/display/framebuffer.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/display/framebuffer.c b/hw/display/framebuffer.c index 4546e42654..2cabced208 100644 --- a/hw/display/framebuffer.c +++ b/hw/display/framebuffer.c @@ -63,6 +63,10 @@ void framebuffer_update_display( assert(mem_section.offset_within_address_space == base); memory_region_sync_dirty_bitmap(mem); + if (!memory_region_is_logging(mem, DIRTY_MEMORY_VGA)) { + invalidate = true; + } + src_base = cpu_physical_memory_map(base, &src_len, 0); /* If we can't map the framebuffer then bail. We could try harder, but it's not really worth it as dirty flag tracking will probably From 42af3e3a02f6d0c38c46465b7f0311eabf532f77 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 22 Apr 2015 13:30:19 +0200 Subject: [PATCH 13/62] ui/console: remove dpy_gfx_update_dirty dpy_gfx_update_dirty expects DIRTY_MEMORY_VGA logging to be always on, but that will not be the case soon. Because it computes the memory region on the fly for every update (with memory_region_find), it cannot enable/disable logging by itself. We could always treat updates as invalidations if dirty logging is not enabled, assuming that the board will enable logging on the RAM region that includes the framebuffer. However, the function is unused, so just drop it. Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- include/ui/console.h | 4 --- ui/console.c | 61 -------------------------------------------- 2 files changed, 65 deletions(-) diff --git a/include/ui/console.h b/include/ui/console.h index 6f7550ef9c..06e47399f1 100644 --- a/include/ui/console.h +++ b/include/ui/console.h @@ -241,10 +241,6 @@ void dpy_text_resize(QemuConsole *con, int w, int h); void dpy_mouse_set(QemuConsole *con, int x, int y, int on); void dpy_cursor_define(QemuConsole *con, QEMUCursor *cursor); bool dpy_cursor_define_supported(QemuConsole *con); -void dpy_gfx_update_dirty(QemuConsole *con, - MemoryRegion *address_space, - uint64_t base, - bool invalidate); bool dpy_gfx_check_format(QemuConsole *con, pixman_format_code_t format); diff --git a/ui/console.c b/ui/console.c index 406c36b864..75fc492f73 100644 --- a/ui/console.c +++ b/ui/console.c @@ -1619,67 +1619,6 @@ bool dpy_cursor_define_supported(QemuConsole *con) return false; } -/* - * Call dpy_gfx_update for all dirity scanlines. Works for - * DisplaySurfaces backed by guest memory (i.e. the ones created - * using qemu_create_displaysurface_guestmem). - */ -void dpy_gfx_update_dirty(QemuConsole *con, - MemoryRegion *address_space, - hwaddr base, - bool invalidate) -{ - DisplaySurface *ds = qemu_console_surface(con); - int width = surface_stride(ds); - int height = surface_height(ds); - hwaddr size = width * height; - MemoryRegionSection mem_section; - MemoryRegion *mem; - ram_addr_t addr; - int first, last, i; - bool dirty; - - mem_section = memory_region_find(address_space, base, size); - mem = mem_section.mr; - if (int128_get64(mem_section.size) != size || - !memory_region_is_ram(mem_section.mr)) { - goto out; - } - assert(mem); - - memory_region_sync_dirty_bitmap(mem); - addr = mem_section.offset_within_region; - - first = -1; - last = -1; - for (i = 0; i < height; i++, addr += width) { - dirty = invalidate || - memory_region_get_dirty(mem, addr, width, DIRTY_MEMORY_VGA); - if (dirty) { - if (first == -1) { - first = i; - } - last = i; - } - if (first != -1 && !dirty) { - assert(last != -1 && last >= first); - dpy_gfx_update(con, 0, first, surface_width(ds), - last - first + 1); - first = -1; - } - } - if (first != -1) { - assert(last != -1 && last >= first); - dpy_gfx_update(con, 0, first, surface_width(ds), - last - first + 1); - } - - memory_region_reset_dirty(mem, mem_section.offset_within_region, size, - DIRTY_MEMORY_VGA); -out: - memory_region_unref(mem); -} - /***********************************************************/ /* register display */ From 677e7805cf95f3b2bca8baf0888d1ebed7f0c606 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 23 Mar 2015 10:53:21 +0100 Subject: [PATCH 14/62] memory: track DIRTY_MEMORY_CODE in mr->dirty_log_mask DIRTY_MEMORY_CODE is only needed for TCG. By adding it directly to mr->dirty_log_mask, we avoid testing for TCG everywhere a region is checked for the enabled/disabled state of dirty logging. Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- include/exec/memory.h | 4 ++-- memory.c | 4 ++++ 2 files changed, 6 insertions(+), 2 deletions(-) diff --git a/include/exec/memory.h b/include/exec/memory.h index 55dc11d55c..8ae004eb06 100644 --- a/include/exec/memory.h +++ b/include/exec/memory.h @@ -604,8 +604,8 @@ bool memory_region_is_logging(MemoryRegion *mr, uint8_t client); * memory_region_get_dirty_log_mask: return the clients for which a * memory region is logging writes. * - * Returns a bitmap of clients, which right now will be either 0 or - * (1 << DIRTY_MEMORY_VGA). + * Returns a bitmap of clients, in which the DIRTY_MEMORY_* constants + * are the bit indices. * * @mr: the memory region being queried */ diff --git a/memory.c b/memory.c index 3e3d5b8ad6..47cc181f66 100644 --- a/memory.c +++ b/memory.c @@ -1212,6 +1212,7 @@ void memory_region_init_ram(MemoryRegion *mr, mr->terminates = true; mr->destructor = memory_region_destructor_ram; mr->ram_addr = qemu_ram_alloc(size, mr, errp); + mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0; } void memory_region_init_resizeable_ram(MemoryRegion *mr, @@ -1229,6 +1230,7 @@ void memory_region_init_resizeable_ram(MemoryRegion *mr, mr->terminates = true; mr->destructor = memory_region_destructor_ram; mr->ram_addr = qemu_ram_alloc_resizeable(size, max_size, resized, mr, errp); + mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0; } #ifdef __linux__ @@ -1245,6 +1247,7 @@ void memory_region_init_ram_from_file(MemoryRegion *mr, mr->terminates = true; mr->destructor = memory_region_destructor_ram; mr->ram_addr = qemu_ram_alloc_from_file(size, mr, share, path, errp); + mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0; } #endif @@ -1258,6 +1261,7 @@ void memory_region_init_ram_ptr(MemoryRegion *mr, mr->ram = true; mr->terminates = true; mr->destructor = memory_region_destructor_ram_from_ptr; + mr->dirty_log_mask = tcg_enabled() ? (1 << DIRTY_MEMORY_CODE) : 0; /* qemu_ram_alloc_from_ptr cannot fail with ptr != NULL. */ assert(ptr != NULL); From ea8cb1a8d98f5e3822a23a7cecdb4add0f29178b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 27 Apr 2015 14:51:31 +0200 Subject: [PATCH 15/62] kvm: accept non-mapped memory in kvm_dirty_pages_log_change It is okay if memory is not mapped into the guest but has dirty logging enabled. When this happens, KVM will not do anything and only accesses from the host will be logged. This can be triggered by iofuzz. Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- kvm-all.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/kvm-all.c b/kvm-all.c index c713b22f8c..36e81099fb 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -335,12 +335,10 @@ static int kvm_dirty_pages_log_change(hwaddr phys_addr, KVMSlot *mem = kvm_lookup_matching_slot(s, phys_addr, phys_addr + size); if (mem == NULL) { - fprintf(stderr, "BUG: %s: invalid parameters " TARGET_FMT_plx "-" - TARGET_FMT_plx "\n", __func__, phys_addr, - (hwaddr)(phys_addr + size - 1)); - return -EINVAL; + return 0; + } else { + return kvm_slot_dirty_pages_log_change(mem, log_dirty); } - return kvm_slot_dirty_pages_log_change(mem, log_dirty); } static void kvm_log_start(MemoryListener *listener, From 6f6a5ef3e429f92f987678ea8c396aab4dc6aa19 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 23 Mar 2015 10:57:21 +0100 Subject: [PATCH 16/62] memory: include DIRTY_MEMORY_MIGRATION in the dirty log mask The separate handling of DIRTY_MEMORY_MIGRATION, which does not call log_start/log_stop callbacks when it changes in a region's dirty logging mask, has caused several bugs. One recent example is commit 4cc856f (kvm-all: Sync dirty-bitmap from kvm before kvm destroy the corresponding dirty_bitmap, 2015-04-02). Another performance problem is that KVM keeps tracking dirty pages after a failed live migration, which causes bad performance due to disallowing huge page mapping. This patch removes the root cause of the problem by reporting DIRTY_MEMORY_MIGRATION changes via log_start and log_stop. Note that we now have to rebuild the FlatView when global dirty logging is enabled or disabled; this ensures that log_start and log_stop callbacks are invoked. This will also be used to make the setting of bitmaps conditional. In general, this patch lets users of the memory API ignore the global state of dirty logging if they handle dirty logging generically per region. Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- memory.c | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/memory.c b/memory.c index 47cc181f66..b7ca987c07 100644 --- a/memory.c +++ b/memory.c @@ -588,7 +588,7 @@ static void render_memory_region(FlatView *view, remain = clip.size; fr.mr = mr; - fr.dirty_log_mask = mr->dirty_log_mask; + fr.dirty_log_mask = memory_region_get_dirty_log_mask(mr); fr.romd_mode = mr->romd_mode; fr.readonly = readonly; @@ -1400,7 +1400,11 @@ bool memory_region_is_skip_dump(MemoryRegion *mr) uint8_t memory_region_get_dirty_log_mask(MemoryRegion *mr) { - return mr->dirty_log_mask; + uint8_t mask = mr->dirty_log_mask; + if (global_dirty_log) { + mask |= (1 << DIRTY_MEMORY_MIGRATION); + } + return mask; } bool memory_region_is_logging(MemoryRegion *mr, uint8_t client) @@ -1962,12 +1966,24 @@ void address_space_sync_dirty_bitmap(AddressSpace *as) void memory_global_dirty_log_start(void) { global_dirty_log = true; + MEMORY_LISTENER_CALL_GLOBAL(log_global_start, Forward); + + /* Refresh DIRTY_LOG_MIGRATION bit. */ + memory_region_transaction_begin(); + memory_region_update_pending = true; + memory_region_transaction_commit(); } void memory_global_dirty_log_stop(void) { global_dirty_log = false; + + /* Refresh DIRTY_LOG_MIGRATION bit. */ + memory_region_transaction_begin(); + memory_region_update_pending = true; + memory_region_transaction_commit(); + MEMORY_LISTENER_CALL_GLOBAL(log_global_stop, Reverse); } From 1bfbac4ee16e2ea95d087e0926727d9a113b483e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 23 Mar 2015 10:57:21 +0100 Subject: [PATCH 17/62] kvm: remove special handling of DIRTY_MEMORY_MIGRATION in the dirty log mask One recent example is commit 4cc856f (kvm-all: Sync dirty-bitmap from kvm before kvm destroy the corresponding dirty_bitmap, 2015-04-02). Another performance problem is that KVM keeps tracking dirty pages after a failed live migration, which causes bad performance due to disallowing huge page mapping. Thanks to the previous patch, KVM can now stop hooking into log_global_start/stop. This simplifies the KVM code noticeably. Reported-by: Wanpeng Li Reported-by: Xiao Guangrong Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- kvm-all.c | 56 ++----------------------------------------------------- 1 file changed, 2 insertions(+), 54 deletions(-) diff --git a/kvm-all.c b/kvm-all.c index 36e81099fb..53e01d468e 100644 --- a/kvm-all.c +++ b/kvm-all.c @@ -83,7 +83,6 @@ struct KVMState struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; bool coalesced_flush_in_progress; int broken_set_mem_region; - int migration_log; int vcpu_events; int robust_singlestep; int debugregs; @@ -234,9 +233,6 @@ static int kvm_set_user_memory_region(KVMState *s, KVMSlot *slot) mem.guest_phys_addr = slot->start_addr; mem.userspace_addr = (unsigned long)slot->ram; mem.flags = slot->flags; - if (s->migration_log) { - mem.flags |= KVM_MEM_LOG_DIRTY_PAGES; - } if (slot->memory_size && mem.flags & KVM_MEM_READONLY) { /* Set the slot size to 0 before setting the slot to the desired @@ -317,10 +313,6 @@ static int kvm_slot_dirty_pages_log_change(KVMSlot *mem, bool log_dirty) mem->flags = flags; /* If nothing changed effectively, no need to issue ioctl */ - if (s->migration_log) { - flags |= KVM_MEM_LOG_DIRTY_PAGES; - } - if (flags == old_flags) { return 0; } @@ -375,31 +367,6 @@ static void kvm_log_stop(MemoryListener *listener, } } -static int kvm_set_migration_log(bool enable) -{ - KVMState *s = kvm_state; - KVMSlot *mem; - int i, err; - - s->migration_log = enable; - - for (i = 0; i < s->nr_slots; i++) { - mem = &s->slots[i]; - - if (!mem->memory_size) { - continue; - } - if (!!(mem->flags & KVM_MEM_LOG_DIRTY_PAGES) == enable) { - continue; - } - err = kvm_set_user_memory_region(s, mem); - if (err) { - return err; - } - } - return 0; -} - /* get kvm's dirty pages bitmap and update qemu's */ static int kvm_get_dirty_pages_log_range(MemoryRegionSection *section, unsigned long *bitmap) @@ -671,8 +638,7 @@ static void kvm_set_phys_mem(MemoryRegionSection *section, bool add) KVMSlot *mem, old; int err; MemoryRegion *mr = section->mr; - bool log_dirty = - memory_region_get_dirty_log_mask(mr) & ~(1 << DIRTY_MEMORY_MIGRATION); + bool log_dirty = memory_region_get_dirty_log_mask(mr) != 0; bool writeable = !mr->readonly && !mr->rom_device; bool readonly_flag = mr->readonly || memory_region_is_romd(mr); hwaddr start_addr = section->offset_within_address_space; @@ -724,7 +690,7 @@ static void kvm_set_phys_mem(MemoryRegionSection *section, bool add) old = *mem; - if ((mem->flags & KVM_MEM_LOG_DIRTY_PAGES) || s->migration_log) { + if (mem->flags & KVM_MEM_LOG_DIRTY_PAGES) { kvm_physical_sync_dirty_bitmap(section); } @@ -853,22 +819,6 @@ static void kvm_log_sync(MemoryListener *listener, } } -static void kvm_log_global_start(struct MemoryListener *listener) -{ - int r; - - r = kvm_set_migration_log(1); - assert(r >= 0); -} - -static void kvm_log_global_stop(struct MemoryListener *listener) -{ - int r; - - r = kvm_set_migration_log(0); - assert(r >= 0); -} - static void kvm_mem_ioeventfd_add(MemoryListener *listener, MemoryRegionSection *section, bool match_data, uint64_t data, @@ -944,8 +894,6 @@ static MemoryListener kvm_memory_listener = { .log_start = kvm_log_start, .log_stop = kvm_log_stop, .log_sync = kvm_log_sync, - .log_global_start = kvm_log_global_start, - .log_global_stop = kvm_log_global_stop, .eventfd_add = kvm_mem_ioeventfd_add, .eventfd_del = kvm_mem_ioeventfd_del, .coalesced_mmio_add = kvm_coalesce_mmio_region, From 49dfcec40349245ad365964468b67e132c3cedc7 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 23 Mar 2015 11:35:19 +0100 Subject: [PATCH 18/62] ram_addr: tweaks to xen_modified_memory Invoke xen_modified_memory from cpu_physical_memory_set_dirty_range_nocode; it is akin to DIRTY_MEMORY_MIGRATION, so set it together with that bitmap. The remaining call from invalidate_and_set_dirty's "else" branch will go away soon. Second, fix the second argument to the function in the cpu_physical_memory_set_dirty_lebitmap call site. That function is only used by KVM, but it is better to be clean anyway. Acked-by: Stefano Stabellini Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- exec.c | 3 ++- include/exec/ram_addr.h | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) diff --git a/exec.c b/exec.c index fc8d05d50a..6f6dbc8e60 100644 --- a/exec.c +++ b/exec.c @@ -2281,8 +2281,9 @@ static void invalidate_and_set_dirty(hwaddr addr, if (cpu_physical_memory_range_includes_clean(addr, length)) { tb_invalidate_phys_range(addr, addr + length, 0); cpu_physical_memory_set_dirty_range_nocode(addr, length); + } else { + xen_modified_memory(addr, length); } - xen_modified_memory(addr, length); } static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr) diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index ff558a4734..7f6e928a7f 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -112,6 +112,7 @@ static inline void cpu_physical_memory_set_dirty_range_nocode(ram_addr_t start, page = start >> TARGET_PAGE_BITS; bitmap_set(ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION], page, end - page); bitmap_set(ram_list.dirty_memory[DIRTY_MEMORY_VGA], page, end - page); + xen_modified_memory(start, length); } static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start, @@ -155,7 +156,7 @@ static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, ram_list.dirty_memory[DIRTY_MEMORY_CODE][page + k] |= temp; } } - xen_modified_memory(start, pages); + xen_modified_memory(start, pages << TARGET_PAGE_BITS); } else { /* * bitmap-traveling is faster than memory-traveling (for addr...) From 845b6214a309fa58a4405050bf8313e19fde5c91 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 23 Mar 2015 11:45:53 +0100 Subject: [PATCH 19/62] exec: use memory_region_get_dirty_log_mask to optimize dirty tracking The memory API can now return the exact set of bitmaps that have to be tracked. Use it instead of the in_migration variable. In the next patches, we will also use it to set only DIRTY_MEMORY_VGA or DIRTY_MEMORY_MIGRATION if necessary. This can make a difference for dataplane, especially after the dirty bitmap is changed to use more expensive atomic operations. Of some interest is the change to stl_phys_notdirty. When migration was introduced, stl_phys_notdirty was changed to effectively behave as stl_phys during migration. In fact, if one looks at the function as it was in the beginning (commit 8df1cd0, physical memory access functions, 2005-01-28), at the time the dirty bitmap was the equivalent of DIRTY_MEMORY_CODE nowadays; hence, the function simply should not touch the dirty code bits. This patch changes it to do the intended thing. Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- exec.c | 59 +++++++++++++++++++--------------------------------------- 1 file changed, 19 insertions(+), 40 deletions(-) diff --git a/exec.c b/exec.c index 6f6dbc8e60..950fea410e 100644 --- a/exec.c +++ b/exec.c @@ -59,8 +59,6 @@ //#define DEBUG_SUBPAGE #if !defined(CONFIG_USER_ONLY) -static bool in_migration; - /* ram_list is read under rcu_read_lock()/rcu_read_unlock(). Writes * are protected by the ramlist lock. */ @@ -866,11 +864,6 @@ void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length, } } -static void cpu_physical_memory_set_dirty_tracking(bool enable) -{ - in_migration = enable; -} - /* Called from RCU critical section */ hwaddr memory_region_section_get_iotlb(CPUState *cpu, MemoryRegionSection *section, @@ -2161,22 +2154,6 @@ static void tcg_commit(MemoryListener *listener) } } -static void core_log_global_start(MemoryListener *listener) -{ - cpu_physical_memory_set_dirty_tracking(true); -} - -static void core_log_global_stop(MemoryListener *listener) -{ - cpu_physical_memory_set_dirty_tracking(false); -} - -static MemoryListener core_memory_listener = { - .log_global_start = core_log_global_start, - .log_global_stop = core_log_global_stop, - .priority = 1, -}; - void address_space_init_dispatch(AddressSpace *as) { as->dispatch = NULL; @@ -2216,8 +2193,6 @@ static void memory_map_init(void) memory_region_init_io(system_io, NULL, &unassigned_io_ops, NULL, "io", 65536); address_space_init(&address_space_io, system_io, "I/O"); - - memory_listener_register(&core_memory_listener, &address_space_memory); } MemoryRegion *get_system_memory(void) @@ -2275,12 +2250,18 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, #else -static void invalidate_and_set_dirty(hwaddr addr, +static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr, hwaddr length) { if (cpu_physical_memory_range_includes_clean(addr, length)) { - tb_invalidate_phys_range(addr, addr + length, 0); - cpu_physical_memory_set_dirty_range_nocode(addr, length); + uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr); + if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) { + tb_invalidate_phys_range(addr, addr + length, 0); + dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE); + } + if (dirty_log_mask) { + cpu_physical_memory_set_dirty_range_nocode(addr, length); + } } else { xen_modified_memory(addr, length); } @@ -2368,7 +2349,7 @@ MemTxResult address_space_rw(AddressSpace *as, hwaddr addr, MemTxAttrs attrs, /* RAM case */ ptr = qemu_get_ram_ptr(addr1); memcpy(ptr, buf, l); - invalidate_and_set_dirty(addr1, l); + invalidate_and_set_dirty(mr, addr1, l); } } else { if (!memory_access_is_direct(mr, is_write)) { @@ -2465,7 +2446,7 @@ static inline void cpu_physical_memory_write_rom_internal(AddressSpace *as, switch (type) { case WRITE_DATA: memcpy(ptr, buf, l); - invalidate_and_set_dirty(addr1, l); + invalidate_and_set_dirty(mr, addr1, l); break; case FLUSH_CACHE: flush_icache_range((uintptr_t)ptr, (uintptr_t)ptr + l); @@ -2690,7 +2671,7 @@ void address_space_unmap(AddressSpace *as, void *buffer, hwaddr len, mr = qemu_ram_addr_from_host(buffer, &addr1); assert(mr != NULL); if (is_write) { - invalidate_and_set_dirty(addr1, access_len); + invalidate_and_set_dirty(mr, addr1, access_len); } if (xen_enabled()) { xen_invalidate_map_cache_entry(buffer); @@ -3019,6 +3000,7 @@ void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val, hwaddr l = 4; hwaddr addr1; MemTxResult r; + uint8_t dirty_log_mask; rcu_read_lock(); mr = address_space_translate(as, addr, &addr1, &l, @@ -3030,13 +3012,10 @@ void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val, ptr = qemu_get_ram_ptr(addr1); stl_p(ptr, val); - if (unlikely(in_migration)) { - if (cpu_physical_memory_is_clean(addr1)) { - /* invalidate code */ - tb_invalidate_phys_page_range(addr1, addr1 + 4, 0); - /* set dirty bit */ - cpu_physical_memory_set_dirty_range_nocode(addr1, 4); - } + dirty_log_mask = memory_region_get_dirty_log_mask(mr); + dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE); + if (dirty_log_mask) { + cpu_physical_memory_set_dirty_range_nocode(addr1, 4); } r = MEMTX_OK; } @@ -3093,7 +3072,7 @@ static inline void address_space_stl_internal(AddressSpace *as, stl_p(ptr, val); break; } - invalidate_and_set_dirty(addr1, 4); + invalidate_and_set_dirty(mr, addr1, 4); r = MEMTX_OK; } if (result) { @@ -3197,7 +3176,7 @@ static inline void address_space_stw_internal(AddressSpace *as, stw_p(ptr, val); break; } - invalidate_and_set_dirty(addr1, 2); + invalidate_and_set_dirty(mr, addr1, 2); r = MEMTX_OK; } if (result) { From 1652b974766401743879d78f796f44b8929b0787 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 22 Apr 2015 14:15:48 +0200 Subject: [PATCH 20/62] exec: move functions to translate-all.h Remove them from the sundry exec-all.h header, since they are only used by the TCG runtime in exec.c and user-exec.c. Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- include/exec/exec-all.h | 6 +----- linux-user/mmap.c | 1 + translate-all.h | 8 ++++++++ user-exec.c | 1 + 4 files changed, 11 insertions(+), 5 deletions(-) diff --git a/include/exec/exec-all.h b/include/exec/exec-all.h index b58cd47ced..2f7a4f1700 100644 --- a/include/exec/exec-all.h +++ b/include/exec/exec-all.h @@ -90,11 +90,7 @@ TranslationBlock *tb_gen_code(CPUState *cpu, int cflags); void cpu_exec_init(CPUArchState *env); void QEMU_NORETURN cpu_loop_exit(CPUState *cpu); -int page_unprotect(target_ulong address, uintptr_t pc, void *puc); -void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, - int is_cpu_write_access); -void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end, - int is_cpu_write_access); + #if !defined(CONFIG_USER_ONLY) bool qemu_in_vcpu_thread(void); void cpu_reload_memory_map(CPUState *cpu); diff --git a/linux-user/mmap.c b/linux-user/mmap.c index a249f0ceb6..959ff4d003 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -30,6 +30,7 @@ #include "qemu.h" #include "qemu-common.h" +#include "translate-all.h" //#define DEBUG_MMAP diff --git a/translate-all.h b/translate-all.h index b6a07bd5d3..f1a40a53c2 100644 --- a/translate-all.h +++ b/translate-all.h @@ -21,6 +21,14 @@ /* translate-all.c */ void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len); +void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, + int is_cpu_write_access); +void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end, + int is_cpu_write_access); void tb_check_watchpoint(CPUState *cpu); +#ifdef CONFIG_USER_ONLY +int page_unprotect(target_ulong address, uintptr_t pc, void *puc); +#endif + #endif /* TRANSLATE_ALL_H */ diff --git a/user-exec.c b/user-exec.c index 8f57e8acb8..ed9a07f159 100644 --- a/user-exec.c +++ b/user-exec.c @@ -22,6 +22,7 @@ #include "tcg.h" #include "qemu/bitops.h" #include "exec/cpu_ldst.h" +#include "translate-all.h" #undef EAX #undef ECX From 358653391b0c0beaa0e3f9e28304e1918cd223b3 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 22 Apr 2015 14:20:35 +0200 Subject: [PATCH 21/62] translate-all: remove unnecessary argument to tb_invalidate_phys_range The is_cpu_write_access argument is always 0, remove it. Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- exec.c | 2 +- linux-user/mmap.c | 6 +++--- translate-all.c | 5 ++--- translate-all.h | 3 +-- 4 files changed, 7 insertions(+), 9 deletions(-) diff --git a/exec.c b/exec.c index 950fea410e..650cfa8cb8 100644 --- a/exec.c +++ b/exec.c @@ -2256,7 +2256,7 @@ static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr, if (cpu_physical_memory_range_includes_clean(addr, length)) { uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr); if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) { - tb_invalidate_phys_range(addr, addr + length, 0); + tb_invalidate_phys_range(addr, addr + length); dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE); } if (dirty_log_mask) { diff --git a/linux-user/mmap.c b/linux-user/mmap.c index 959ff4d003..78e1b2df43 100644 --- a/linux-user/mmap.c +++ b/linux-user/mmap.c @@ -575,7 +575,7 @@ abi_long target_mmap(abi_ulong start, abi_ulong len, int prot, page_dump(stdout); printf("\n"); #endif - tb_invalidate_phys_range(start, start + len, 0); + tb_invalidate_phys_range(start, start + len); mmap_unlock(); return start; fail: @@ -680,7 +680,7 @@ int target_munmap(abi_ulong start, abi_ulong len) if (ret == 0) { page_set_flags(start, start + len, 0); - tb_invalidate_phys_range(start, start + len, 0); + tb_invalidate_phys_range(start, start + len); } mmap_unlock(); return ret; @@ -759,7 +759,7 @@ abi_long target_mremap(abi_ulong old_addr, abi_ulong old_size, page_set_flags(old_addr, old_addr + old_size, 0); page_set_flags(new_addr, new_addr + new_size, prot | PAGE_VALID); } - tb_invalidate_phys_range(new_addr, new_addr + new_size, 0); + tb_invalidate_phys_range(new_addr, new_addr + new_size); mmap_unlock(); return new_addr; } diff --git a/translate-all.c b/translate-all.c index 536008f52d..d118c6c2af 100644 --- a/translate-all.c +++ b/translate-all.c @@ -1042,11 +1042,10 @@ TranslationBlock *tb_gen_code(CPUState *cpu, * access: the virtual CPU will exit the current TB if code is modified inside * this TB. */ -void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end, - int is_cpu_write_access) +void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end) { while (start < end) { - tb_invalidate_phys_page_range(start, end, is_cpu_write_access); + tb_invalidate_phys_page_range(start, end, 0); start &= TARGET_PAGE_MASK; start += TARGET_PAGE_SIZE; } diff --git a/translate-all.h b/translate-all.h index f1a40a53c2..038464005f 100644 --- a/translate-all.h +++ b/translate-all.h @@ -23,8 +23,7 @@ void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len); void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, int is_cpu_write_access); -void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end, - int is_cpu_write_access); +void tb_invalidate_phys_range(tb_page_addr_t start, tb_page_addr_t end); void tb_check_watchpoint(CPUState *cpu); #ifdef CONFIG_USER_ONLY From 9564f52da7eb061326956ed9a468935e3352512d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 22 Apr 2015 14:24:54 +0200 Subject: [PATCH 22/62] cputlb: remove useless arguments to tlb_unprotect_code_phys, rename These days modification of the TLB is done in notdirty_mem_write, so the virtual address and env pointer as unnecessary. The new name of the function, tlb_unprotect_code, is consistent with tlb_protect_code. Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- cputlb.c | 3 +-- include/exec/cputlb.h | 3 +-- translate-all.c | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/cputlb.c b/cputlb.c index 7606548200..dd1203b938 100644 --- a/cputlb.c +++ b/cputlb.c @@ -131,8 +131,7 @@ void tlb_protect_code(ram_addr_t ram_addr) /* update the TLB so that writes in physical page 'phys_addr' are no longer tested for self modifying code */ -void tlb_unprotect_code_phys(CPUState *cpu, ram_addr_t ram_addr, - target_ulong vaddr) +void tlb_unprotect_code(ram_addr_t ram_addr) { cpu_physical_memory_set_dirty_flag(ram_addr, DIRTY_MEMORY_CODE); } diff --git a/include/exec/cputlb.h b/include/exec/cputlb.h index e0da9d7ad3..360815e1b4 100644 --- a/include/exec/cputlb.h +++ b/include/exec/cputlb.h @@ -22,8 +22,7 @@ #if !defined(CONFIG_USER_ONLY) /* cputlb.c */ void tlb_protect_code(ram_addr_t ram_addr); -void tlb_unprotect_code_phys(CPUState *cpu, ram_addr_t ram_addr, - target_ulong vaddr); +void tlb_unprotect_code(ram_addr_t ram_addr); void tlb_reset_dirty_range(CPUTLBEntry *tlb_entry, uintptr_t start, uintptr_t length); void cpu_tlb_reset_dirty_all(ram_addr_t start1, ram_addr_t length); diff --git a/translate-all.c b/translate-all.c index d118c6c2af..0e2ad8ac64 100644 --- a/translate-all.c +++ b/translate-all.c @@ -1158,7 +1158,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, if (!p->first_tb) { invalidate_page_bitmap(p); if (is_cpu_write_access) { - tlb_unprotect_code_phys(cpu, start, cpu->mem_io_vaddr); + tlb_unprotect_code(start); } } #endif From fc377bcf617a48233a99a9fe0a26247c38b5cb76 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 22 Apr 2015 14:20:35 +0200 Subject: [PATCH 23/62] translate-all: make less of tb_invalidate_phys_page_range depend on is_cpu_write_access is_cpu_write_access is only set if tb_invalidate_phys_page_range is called from tb_invalidate_phys_page_fast, and hence from notdirty_mem_write. However: - the code bitmap can be built directly in tb_invalidate_phys_page_fast (unconditionally, since is_cpu_write_access would always be passed as 1); - the virtual address is not needed to mark the page as "not containing code" (dirty code bitmap = 1), so we can also remove that use of is_cpu_write_access. For calls of tb_invalidate_phys_page_range that do not come from notdirty_mem_write, the next call to notdirty_mem_write will notice that the page does not contain code anymore, and will fix up the TLB entry. The parameter needs to remain in order to guard accesses to cpu->mem_io_pc. Signed-off-by: Paolo Bonzini --- translate-all.c | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/translate-all.c b/translate-all.c index 0e2ad8ac64..e2e7422609 100644 --- a/translate-all.c +++ b/translate-all.c @@ -1082,12 +1082,6 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, if (!p) { return; } - if (!p->code_bitmap && - ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD && - is_cpu_write_access) { - /* build code bitmap */ - build_page_bitmap(p); - } #if defined(TARGET_HAS_PRECISE_SMC) if (cpu != NULL) { env = cpu->env_ptr; @@ -1157,9 +1151,7 @@ void tb_invalidate_phys_page_range(tb_page_addr_t start, tb_page_addr_t end, /* if no code remaining, no need to continue to use slow writes */ if (!p->first_tb) { invalidate_page_bitmap(p); - if (is_cpu_write_access) { - tlb_unprotect_code(start); - } + tlb_unprotect_code(start); } #endif #ifdef TARGET_HAS_PRECISE_SMC @@ -1192,6 +1184,11 @@ void tb_invalidate_phys_page_fast(tb_page_addr_t start, int len) if (!p) { return; } + if (!p->code_bitmap && + ++p->code_write_count >= SMC_BITMAP_USE_THRESHOLD) { + /* build code bitmap */ + build_page_bitmap(p); + } if (p->code_bitmap) { unsigned int nr; unsigned long b; From 58d2707e8713ef17b89b8b4c9ce586c76655a385 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 23 Mar 2015 11:56:01 +0100 Subject: [PATCH 24/62] exec: pass client mask to cpu_physical_memory_set_dirty_range This cuts in half the cost of bitmap operations (which will become more expensive when made atomic) during migration on non-VRAM regions. Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- exec.c | 20 +++++++++++--------- include/exec/ram_addr.h | 33 ++++++++++++++++----------------- memory.c | 3 ++- 3 files changed, 29 insertions(+), 27 deletions(-) diff --git a/exec.c b/exec.c index 650cfa8cb8..fe137bd725 100644 --- a/exec.c +++ b/exec.c @@ -1351,7 +1351,8 @@ int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp) cpu_physical_memory_clear_dirty_range(block->offset, block->used_length); block->used_length = newsize; - cpu_physical_memory_set_dirty_range(block->offset, block->used_length); + cpu_physical_memory_set_dirty_range(block->offset, block->used_length, + DIRTY_CLIENTS_ALL); memory_region_set_size(block->mr, newsize); if (block->resized) { block->resized(block->idstr, newsize, block->host); @@ -1425,7 +1426,8 @@ static ram_addr_t ram_block_add(RAMBlock *new_block, Error **errp) } } cpu_physical_memory_set_dirty_range(new_block->offset, - new_block->used_length); + new_block->used_length, + DIRTY_CLIENTS_ALL); if (new_block->host) { qemu_ram_setup_dump(new_block->host, new_block->max_length); @@ -1813,7 +1815,11 @@ static void notdirty_mem_write(void *opaque, hwaddr ram_addr, default: abort(); } - cpu_physical_memory_set_dirty_range_nocode(ram_addr, size); + /* Set both VGA and migration bits for simplicity and to remove + * the notdirty callback faster. + */ + cpu_physical_memory_set_dirty_range(ram_addr, size, + DIRTY_CLIENTS_NOCODE); /* we remove the notdirty callback only if the code has been flushed */ if (!cpu_physical_memory_is_clean(ram_addr)) { @@ -2259,9 +2265,7 @@ static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr, tb_invalidate_phys_range(addr, addr + length); dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE); } - if (dirty_log_mask) { - cpu_physical_memory_set_dirty_range_nocode(addr, length); - } + cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask); } else { xen_modified_memory(addr, length); } @@ -3014,9 +3018,7 @@ void address_space_stl_notdirty(AddressSpace *as, hwaddr addr, uint32_t val, dirty_log_mask = memory_region_get_dirty_log_mask(mr); dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE); - if (dirty_log_mask) { - cpu_physical_memory_set_dirty_range_nocode(addr1, 4); - } + cpu_physical_memory_set_dirty_range(addr1, 4, dirty_log_mask); r = MEMTX_OK; } if (result) { diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index 7f6e928a7f..5bbc7bb41d 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -41,6 +41,9 @@ void qemu_ram_free_from_ptr(ram_addr_t addr); int qemu_ram_resize(ram_addr_t base, ram_addr_t newsize, Error **errp); +#define DIRTY_CLIENTS_ALL ((1 << DIRTY_MEMORY_NUM) - 1) +#define DIRTY_CLIENTS_NOCODE (DIRTY_CLIENTS_ALL & ~(1 << DIRTY_MEMORY_CODE)) + static inline bool cpu_physical_memory_get_dirty(ram_addr_t start, ram_addr_t length, unsigned client) @@ -103,28 +106,23 @@ static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr, set_bit(addr >> TARGET_PAGE_BITS, ram_list.dirty_memory[client]); } -static inline void cpu_physical_memory_set_dirty_range_nocode(ram_addr_t start, - ram_addr_t length) -{ - unsigned long end, page; - - end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; - page = start >> TARGET_PAGE_BITS; - bitmap_set(ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION], page, end - page); - bitmap_set(ram_list.dirty_memory[DIRTY_MEMORY_VGA], page, end - page); - xen_modified_memory(start, length); -} - static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start, - ram_addr_t length) + ram_addr_t length, + uint8_t mask) { unsigned long end, page; end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; page = start >> TARGET_PAGE_BITS; - bitmap_set(ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION], page, end - page); - bitmap_set(ram_list.dirty_memory[DIRTY_MEMORY_VGA], page, end - page); - bitmap_set(ram_list.dirty_memory[DIRTY_MEMORY_CODE], page, end - page); + if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) { + bitmap_set(ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION], page, end - page); + } + if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) { + bitmap_set(ram_list.dirty_memory[DIRTY_MEMORY_VGA], page, end - page); + } + if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) { + bitmap_set(ram_list.dirty_memory[DIRTY_MEMORY_CODE], page, end - page); + } xen_modified_memory(start, length); } @@ -172,7 +170,8 @@ static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, addr = page_number * TARGET_PAGE_SIZE; ram_addr = start + addr; cpu_physical_memory_set_dirty_range(ram_addr, - TARGET_PAGE_SIZE * hpratio); + TARGET_PAGE_SIZE * hpratio, + DIRTY_CLIENTS_ALL); } while (c != 0); } } diff --git a/memory.c b/memory.c index b7ca987c07..418cac7ce1 100644 --- a/memory.c +++ b/memory.c @@ -1461,7 +1461,8 @@ void memory_region_set_dirty(MemoryRegion *mr, hwaddr addr, hwaddr size) { assert(mr->terminates); - cpu_physical_memory_set_dirty_range(mr->ram_addr + addr, size); + cpu_physical_memory_set_dirty_range(mr->ram_addr + addr, size, + memory_region_get_dirty_log_mask(mr)); } bool memory_region_test_and_clear_dirty(MemoryRegion *mr, hwaddr addr, From 72b47e79cef36ed6ffc718f10e21001d7ec2a66f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 22 Apr 2015 13:48:25 +0200 Subject: [PATCH 25/62] exec: invert return value of cpu_physical_memory_get_clean, rename While it is obvious that cpu_physical_memory_get_dirty returns true even if a single page is dirty, the same is not true for cpu_physical_memory_get_clean; one would expect that it returns true only if all the pages are clean, but it actually looks for even one clean page. (By contrast, the caller of that function, cpu_physical_memory_range_includes_clean, has a good name). To clarify, rename the function to cpu_physical_memory_all_dirty and return true if _all_ the pages are dirty. This is the opposite of the previous meaning, because "all are 1" is the same as "not (any is 0)", so we have to modify cpu_physical_memory_range_includes_clean as well. Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- include/exec/ram_addr.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index 5bbc7bb41d..c221bd7dd0 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -59,7 +59,7 @@ static inline bool cpu_physical_memory_get_dirty(ram_addr_t start, return next < end; } -static inline bool cpu_physical_memory_get_clean(ram_addr_t start, +static inline bool cpu_physical_memory_all_dirty(ram_addr_t start, ram_addr_t length, unsigned client) { @@ -71,7 +71,7 @@ static inline bool cpu_physical_memory_get_clean(ram_addr_t start, page = start >> TARGET_PAGE_BITS; next = find_next_zero_bit(ram_list.dirty_memory[client], end, page); - return next < end; + return next >= end; } static inline bool cpu_physical_memory_get_dirty_flag(ram_addr_t addr, @@ -92,10 +92,10 @@ static inline bool cpu_physical_memory_is_clean(ram_addr_t addr) static inline bool cpu_physical_memory_range_includes_clean(ram_addr_t start, ram_addr_t length) { - bool vga = cpu_physical_memory_get_clean(start, length, DIRTY_MEMORY_VGA); - bool code = cpu_physical_memory_get_clean(start, length, DIRTY_MEMORY_CODE); + bool vga = !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_VGA); + bool code = !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_CODE); bool migration = - cpu_physical_memory_get_clean(start, length, DIRTY_MEMORY_MIGRATION); + !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_MIGRATION); return vga || code || migration; } From e87f7778b64d4a6a78e16c288c7fdc6c15317d5f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 25 Mar 2015 15:21:39 +0100 Subject: [PATCH 26/62] exec: only check relevant bitmaps for cleanliness Most of the time, not all bitmaps have to be marked as dirty; do not do anything if the interesting ones are already dirty. Previously, any clean bitmap would have cause all the bitmaps to be marked dirty. In fact, unless running TCG most of the time bitmap operations need not be done at all, because memory_region_is_logging returns zero. In this case, skip the call to cpu_physical_memory_range_includes_clean altogether as well. With this patch, cpu_physical_memory_set_dirty_range is called unconditionally, so there need not be anymore a separate call to xen_modified_memory. Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- exec.c | 22 +++++++++++++--------- include/exec/ram_addr.h | 25 ++++++++++++++++++------- 2 files changed, 31 insertions(+), 16 deletions(-) diff --git a/exec.c b/exec.c index fe137bd725..162c57908e 100644 --- a/exec.c +++ b/exec.c @@ -2259,16 +2259,20 @@ int cpu_memory_rw_debug(CPUState *cpu, target_ulong addr, static void invalidate_and_set_dirty(MemoryRegion *mr, hwaddr addr, hwaddr length) { - if (cpu_physical_memory_range_includes_clean(addr, length)) { - uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr); - if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) { - tb_invalidate_phys_range(addr, addr + length); - dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE); - } - cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask); - } else { - xen_modified_memory(addr, length); + uint8_t dirty_log_mask = memory_region_get_dirty_log_mask(mr); + /* No early return if dirty_log_mask is or becomes 0, because + * cpu_physical_memory_set_dirty_range will still call + * xen_modified_memory. + */ + if (dirty_log_mask) { + dirty_log_mask = + cpu_physical_memory_range_includes_clean(addr, length, dirty_log_mask); } + if (dirty_log_mask & (1 << DIRTY_MEMORY_CODE)) { + tb_invalidate_phys_range(addr, addr + length); + dirty_log_mask &= ~(1 << DIRTY_MEMORY_CODE); + } + cpu_physical_memory_set_dirty_range(addr, length, dirty_log_mask); } static int memory_access_size(MemoryRegion *mr, unsigned l, hwaddr addr) diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index c221bd7dd0..3e42b4f602 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -89,14 +89,25 @@ static inline bool cpu_physical_memory_is_clean(ram_addr_t addr) return !(vga && code && migration); } -static inline bool cpu_physical_memory_range_includes_clean(ram_addr_t start, - ram_addr_t length) +static inline uint8_t cpu_physical_memory_range_includes_clean(ram_addr_t start, + ram_addr_t length, + uint8_t mask) { - bool vga = !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_VGA); - bool code = !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_CODE); - bool migration = - !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_MIGRATION); - return vga || code || migration; + uint8_t ret = 0; + + if (mask & (1 << DIRTY_MEMORY_VGA) && + !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_VGA)) { + ret |= (1 << DIRTY_MEMORY_VGA); + } + if (mask & (1 << DIRTY_MEMORY_CODE) && + !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_CODE)) { + ret |= (1 << DIRTY_MEMORY_CODE); + } + if (mask & (1 << DIRTY_MEMORY_MIGRATION) && + !cpu_physical_memory_all_dirty(start, length, DIRTY_MEMORY_MIGRATION)) { + ret |= (1 << DIRTY_MEMORY_MIGRATION); + } + return ret; } static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr, From 9460dee4b2258e3990906fb34099481c8334c267 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 23 Mar 2015 11:41:32 +0100 Subject: [PATCH 27/62] memory: do not touch code dirty bitmap unless TCG is enabled cpu_physical_memory_set_dirty_lebitmap unconditionally syncs the DIRTY_MEMORY_CODE bitmap. This however is unused unless TCG is enabled. Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- include/exec/ram_addr.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index 3e42b4f602..98110108b7 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -162,11 +162,14 @@ static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION][page + k] |= temp; ram_list.dirty_memory[DIRTY_MEMORY_VGA][page + k] |= temp; - ram_list.dirty_memory[DIRTY_MEMORY_CODE][page + k] |= temp; + if (tcg_enabled()) { + ram_list.dirty_memory[DIRTY_MEMORY_CODE][page + k] |= temp; + } } } xen_modified_memory(start, pages << TARGET_PAGE_BITS); } else { + uint8_t clients = tcg_enabled() ? DIRTY_CLIENTS_ALL : DIRTY_CLIENTS_NOCODE; /* * bitmap-traveling is faster than memory-traveling (for addr...) * especially when most of the memory is not dirty. @@ -181,8 +184,7 @@ static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, addr = page_number * TARGET_PAGE_SIZE; ram_addr = start + addr; cpu_physical_memory_set_dirty_range(ram_addr, - TARGET_PAGE_SIZE * hpratio, - DIRTY_CLIENTS_ALL); + TARGET_PAGE_SIZE * hpratio, clients); } while (c != 0); } } From 9f02cfc84b85929947b32fe1674fbc6a429f332a Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 2 Dec 2014 11:23:14 +0000 Subject: [PATCH 28/62] bitmap: add atomic set functions Use atomic_or() for atomic bitmaps where several threads may set bits at the same time. This avoids the race condition between threads loading an element, bitwise ORing, and then storing the element. When setting all bits in a word we can avoid atomic ops and instead just use an smp_mb() at the end. Most bitmap users don't need atomicity so introduce new functions. Signed-off-by: Stefan Hajnoczi Message-Id: <1417519399-3166-2-git-send-email-stefanha@redhat.com> [Avoid barrier in the single word case, use full barrier instead of write. - Paolo] Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- include/qemu/bitmap.h | 2 ++ include/qemu/bitops.h | 14 ++++++++++++++ util/bitmap.c | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 54 insertions(+) diff --git a/include/qemu/bitmap.h b/include/qemu/bitmap.h index f0273c965f..3e0a4f3e0c 100644 --- a/include/qemu/bitmap.h +++ b/include/qemu/bitmap.h @@ -39,6 +39,7 @@ * bitmap_empty(src, nbits) Are all bits zero in *src? * bitmap_full(src, nbits) Are all bits set in *src? * bitmap_set(dst, pos, nbits) Set specified bit area + * bitmap_set_atomic(dst, pos, nbits) Set specified bit area with atomic ops * bitmap_clear(dst, pos, nbits) Clear specified bit area * bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area */ @@ -226,6 +227,7 @@ static inline int bitmap_intersects(const unsigned long *src1, } void bitmap_set(unsigned long *map, long i, long len); +void bitmap_set_atomic(unsigned long *map, long i, long len); void bitmap_clear(unsigned long *map, long start, long nr); unsigned long bitmap_find_next_zero_area(unsigned long *map, unsigned long size, diff --git a/include/qemu/bitops.h b/include/qemu/bitops.h index 8abdcf9077..8164225152 100644 --- a/include/qemu/bitops.h +++ b/include/qemu/bitops.h @@ -16,6 +16,7 @@ #include #include "host-utils.h" +#include "atomic.h" #define BITS_PER_BYTE CHAR_BIT #define BITS_PER_LONG (sizeof (unsigned long) * BITS_PER_BYTE) @@ -38,6 +39,19 @@ static inline void set_bit(long nr, unsigned long *addr) *p |= mask; } +/** + * set_bit_atomic - Set a bit in memory atomically + * @nr: the bit to set + * @addr: the address to start counting from + */ +static inline void set_bit_atomic(long nr, unsigned long *addr) +{ + unsigned long mask = BIT_MASK(nr); + unsigned long *p = addr + BIT_WORD(nr); + + atomic_or(p, mask); +} + /** * clear_bit - Clears a bit in memory * @nr: Bit to clear diff --git a/util/bitmap.c b/util/bitmap.c index 9c6bb526f6..39994af675 100644 --- a/util/bitmap.c +++ b/util/bitmap.c @@ -11,6 +11,7 @@ #include "qemu/bitops.h" #include "qemu/bitmap.h" +#include "qemu/atomic.h" /* * bitmaps provide an array of bits, implemented using an an @@ -177,6 +178,43 @@ void bitmap_set(unsigned long *map, long start, long nr) } } +void bitmap_set_atomic(unsigned long *map, long start, long nr) +{ + unsigned long *p = map + BIT_WORD(start); + const long size = start + nr; + int bits_to_set = BITS_PER_LONG - (start % BITS_PER_LONG); + unsigned long mask_to_set = BITMAP_FIRST_WORD_MASK(start); + + /* First word */ + if (nr - bits_to_set > 0) { + atomic_or(p, mask_to_set); + nr -= bits_to_set; + bits_to_set = BITS_PER_LONG; + mask_to_set = ~0UL; + p++; + } + + /* Full words */ + if (bits_to_set == BITS_PER_LONG) { + while (nr >= BITS_PER_LONG) { + *p = ~0UL; + nr -= BITS_PER_LONG; + p++; + } + } + + /* Last word */ + if (nr) { + mask_to_set &= BITMAP_LAST_WORD_MASK(size); + atomic_or(p, mask_to_set); + } else { + /* If we avoided the full barrier in atomic_or(), issue a + * barrier to account for the assignments in the while loop. + */ + smp_mb(); + } +} + void bitmap_clear(unsigned long *map, long start, long nr) { unsigned long *p = map + BIT_WORD(start); From 36546e5b803f6e363906607307f27c489441fd15 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 2 Dec 2014 11:23:15 +0000 Subject: [PATCH 29/62] bitmap: add atomic test and clear The new bitmap_test_and_clear_atomic() function clears a range and returns whether or not the bits were set. Signed-off-by: Stefan Hajnoczi Message-Id: <1417519399-3166-3-git-send-email-stefanha@redhat.com> [Test before xchg; then a full barrier is needed at the end just like in the previous patch. The barrier can be avoided if we did at least one xchg. - Paolo] Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- include/qemu/bitmap.h | 2 ++ util/bitmap.c | 45 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 47 insertions(+) diff --git a/include/qemu/bitmap.h b/include/qemu/bitmap.h index 3e0a4f3e0c..86dd9cd5fc 100644 --- a/include/qemu/bitmap.h +++ b/include/qemu/bitmap.h @@ -41,6 +41,7 @@ * bitmap_set(dst, pos, nbits) Set specified bit area * bitmap_set_atomic(dst, pos, nbits) Set specified bit area with atomic ops * bitmap_clear(dst, pos, nbits) Clear specified bit area + * bitmap_test_and_clear_atomic(dst, pos, nbits) Test and clear area * bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area */ @@ -229,6 +230,7 @@ static inline int bitmap_intersects(const unsigned long *src1, void bitmap_set(unsigned long *map, long i, long len); void bitmap_set_atomic(unsigned long *map, long i, long len); void bitmap_clear(unsigned long *map, long start, long nr); +bool bitmap_test_and_clear_atomic(unsigned long *map, long start, long nr); unsigned long bitmap_find_next_zero_area(unsigned long *map, unsigned long size, unsigned long start, diff --git a/util/bitmap.c b/util/bitmap.c index 39994af675..300a68e38c 100644 --- a/util/bitmap.c +++ b/util/bitmap.c @@ -235,6 +235,51 @@ void bitmap_clear(unsigned long *map, long start, long nr) } } +bool bitmap_test_and_clear_atomic(unsigned long *map, long start, long nr) +{ + unsigned long *p = map + BIT_WORD(start); + const long size = start + nr; + int bits_to_clear = BITS_PER_LONG - (start % BITS_PER_LONG); + unsigned long mask_to_clear = BITMAP_FIRST_WORD_MASK(start); + unsigned long dirty = 0; + unsigned long old_bits; + + /* First word */ + if (nr - bits_to_clear > 0) { + old_bits = atomic_fetch_and(p, ~mask_to_clear); + dirty |= old_bits & mask_to_clear; + nr -= bits_to_clear; + bits_to_clear = BITS_PER_LONG; + mask_to_clear = ~0UL; + p++; + } + + /* Full words */ + if (bits_to_clear == BITS_PER_LONG) { + while (nr >= BITS_PER_LONG) { + if (*p) { + old_bits = atomic_xchg(p, 0); + dirty |= old_bits; + } + nr -= BITS_PER_LONG; + p++; + } + } + + /* Last word */ + if (nr) { + mask_to_clear &= BITMAP_LAST_WORD_MASK(size); + old_bits = atomic_fetch_and(p, ~mask_to_clear); + dirty |= old_bits & mask_to_clear; + } else { + if (!dirty) { + smp_mb(); + } + } + + return dirty != 0; +} + #define ALIGN_MASK(x,mask) (((x)+(mask))&~(mask)) /** From d114875b9a1c21162f69a12d72f69a22e7bab376 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 2 Dec 2014 11:23:16 +0000 Subject: [PATCH 30/62] memory: use atomic ops for setting dirty memory bits Use set_bit_atomic() and bitmap_set_atomic() so that multiple threads can dirty memory without race conditions. Signed-off-by: Stefan Hajnoczi Message-Id: <1417519399-3166-4-git-send-email-stefanha@redhat.com> Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- include/exec/ram_addr.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index 98110108b7..9f73076044 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -114,7 +114,7 @@ static inline void cpu_physical_memory_set_dirty_flag(ram_addr_t addr, unsigned client) { assert(client < DIRTY_MEMORY_NUM); - set_bit(addr >> TARGET_PAGE_BITS, ram_list.dirty_memory[client]); + set_bit_atomic(addr >> TARGET_PAGE_BITS, ram_list.dirty_memory[client]); } static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start, @@ -122,17 +122,18 @@ static inline void cpu_physical_memory_set_dirty_range(ram_addr_t start, uint8_t mask) { unsigned long end, page; + unsigned long **d = ram_list.dirty_memory; end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; page = start >> TARGET_PAGE_BITS; if (likely(mask & (1 << DIRTY_MEMORY_MIGRATION))) { - bitmap_set(ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION], page, end - page); + bitmap_set_atomic(d[DIRTY_MEMORY_MIGRATION], page, end - page); } if (unlikely(mask & (1 << DIRTY_MEMORY_VGA))) { - bitmap_set(ram_list.dirty_memory[DIRTY_MEMORY_VGA], page, end - page); + bitmap_set_atomic(d[DIRTY_MEMORY_VGA], page, end - page); } if (unlikely(mask & (1 << DIRTY_MEMORY_CODE))) { - bitmap_set(ram_list.dirty_memory[DIRTY_MEMORY_CODE], page, end - page); + bitmap_set_atomic(d[DIRTY_MEMORY_CODE], page, end - page); } xen_modified_memory(start, length); } @@ -159,11 +160,12 @@ static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, for (k = 0; k < nr; k++) { if (bitmap[k]) { unsigned long temp = leul_to_cpu(bitmap[k]); + unsigned long **d = ram_list.dirty_memory; - ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION][page + k] |= temp; - ram_list.dirty_memory[DIRTY_MEMORY_VGA][page + k] |= temp; + atomic_or(&d[DIRTY_MEMORY_MIGRATION][page + k], temp); + atomic_or(&d[DIRTY_MEMORY_VGA][page + k], temp); if (tcg_enabled()) { - ram_list.dirty_memory[DIRTY_MEMORY_CODE][page + k] |= temp; + atomic_or(&d[DIRTY_MEMORY_CODE][page + k], temp); } } } From 20015f72bda7d2f356c43580a5542a659afedf83 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 2 Dec 2014 11:23:17 +0000 Subject: [PATCH 31/62] migration: move dirty bitmap sync to ram_addr.h The dirty memory bitmap is managed by ram_addr.h and copied to migration_bitmap[] periodically during live migration. Move the code to sync the bitmap to ram_addr.h where related code lives. Signed-off-by: Stefan Hajnoczi Message-Id: <1417519399-3166-5-git-send-email-stefanha@redhat.com> Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- arch_init.c | 46 ++--------------------------------------- include/exec/ram_addr.h | 44 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 46 insertions(+), 44 deletions(-) diff --git a/arch_init.c b/arch_init.c index b5d90a41fa..d29447497b 100644 --- a/arch_init.c +++ b/arch_init.c @@ -609,52 +609,10 @@ ram_addr_t migration_bitmap_find_and_reset_dirty(MemoryRegion *mr, return (next - base) << TARGET_PAGE_BITS; } -static inline bool migration_bitmap_set_dirty(ram_addr_t addr) -{ - bool ret; - int nr = addr >> TARGET_PAGE_BITS; - - ret = test_and_set_bit(nr, migration_bitmap); - - if (!ret) { - migration_dirty_pages++; - } - return ret; -} - static void migration_bitmap_sync_range(ram_addr_t start, ram_addr_t length) { - ram_addr_t addr; - unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); - - /* start address is aligned at the start of a word? */ - if (((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) { - int k; - int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS); - unsigned long *src = ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION]; - - for (k = page; k < page + nr; k++) { - if (src[k]) { - unsigned long new_dirty; - new_dirty = ~migration_bitmap[k]; - migration_bitmap[k] |= src[k]; - new_dirty &= src[k]; - migration_dirty_pages += ctpopl(new_dirty); - src[k] = 0; - } - } - } else { - for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) { - if (cpu_physical_memory_get_dirty(start + addr, - TARGET_PAGE_SIZE, - DIRTY_MEMORY_MIGRATION)) { - cpu_physical_memory_reset_dirty(start + addr, - TARGET_PAGE_SIZE, - DIRTY_MEMORY_MIGRATION); - migration_bitmap_set_dirty(start + addr); - } - } - } + migration_dirty_pages += + cpu_physical_memory_sync_dirty_bitmap(migration_bitmap, start, length); } diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index 9f73076044..63db371850 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -218,5 +218,49 @@ static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start, void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length, unsigned client); +static inline +uint64_t cpu_physical_memory_sync_dirty_bitmap(unsigned long *dest, + ram_addr_t start, + ram_addr_t length) +{ + ram_addr_t addr; + unsigned long page = BIT_WORD(start >> TARGET_PAGE_BITS); + uint64_t num_dirty = 0; + + /* start address is aligned at the start of a word? */ + if (((page * BITS_PER_LONG) << TARGET_PAGE_BITS) == start) { + int k; + int nr = BITS_TO_LONGS(length >> TARGET_PAGE_BITS); + unsigned long *src = ram_list.dirty_memory[DIRTY_MEMORY_MIGRATION]; + + for (k = page; k < page + nr; k++) { + if (src[k]) { + unsigned long new_dirty; + new_dirty = ~dest[k]; + dest[k] |= src[k]; + new_dirty &= src[k]; + num_dirty += ctpopl(new_dirty); + src[k] = 0; + } + } + } else { + for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) { + if (cpu_physical_memory_get_dirty(start + addr, + TARGET_PAGE_SIZE, + DIRTY_MEMORY_MIGRATION)) { + long k = (start + addr) >> TARGET_PAGE_BITS; + if (!test_and_set_bit(k, dest)) { + num_dirty++; + } + cpu_physical_memory_reset_dirty(start + addr, + TARGET_PAGE_SIZE, + DIRTY_MEMORY_MIGRATION); + } + } + } + + return num_dirty; +} + #endif #endif From 03eebc9e3246b9b3f5925aa41f7dfd7c1e467875 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 2 Dec 2014 11:23:18 +0000 Subject: [PATCH 32/62] memory: replace cpu_physical_memory_reset_dirty() with test-and-clear The cpu_physical_memory_reset_dirty() function is sometimes used together with cpu_physical_memory_get_dirty(). This is not atomic since two separate accesses to the dirty memory bitmap are made. Turn cpu_physical_memory_reset_dirty() and cpu_physical_memory_clear_dirty_range_type() into the atomic cpu_physical_memory_test_and_clear_dirty(). Signed-off-by: Stefan Hajnoczi Message-Id: <1417519399-3166-6-git-send-email-stefanha@redhat.com> Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- cputlb.c | 4 ++-- exec.c | 23 +++++++++++++++++------ include/exec/ram_addr.h | 33 ++++++++++----------------------- memory.c | 11 ++++------- 4 files changed, 33 insertions(+), 38 deletions(-) diff --git a/cputlb.c b/cputlb.c index dd1203b938..a50608676c 100644 --- a/cputlb.c +++ b/cputlb.c @@ -125,8 +125,8 @@ void tlb_flush_page(CPUState *cpu, target_ulong addr) can be detected */ void tlb_protect_code(ram_addr_t ram_addr) { - cpu_physical_memory_reset_dirty(ram_addr, TARGET_PAGE_SIZE, - DIRTY_MEMORY_CODE); + cpu_physical_memory_test_and_clear_dirty(ram_addr, TARGET_PAGE_SIZE, + DIRTY_MEMORY_CODE); } /* update the TLB so that writes in physical page 'phys_addr' are no longer diff --git a/exec.c b/exec.c index 162c57908e..487583b1bd 100644 --- a/exec.c +++ b/exec.c @@ -852,16 +852,27 @@ static void tlb_reset_dirty_range_all(ram_addr_t start, ram_addr_t length) } /* Note: start and end must be within the same ram block. */ -void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length, - unsigned client) +bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, + ram_addr_t length, + unsigned client) { - if (length == 0) - return; - cpu_physical_memory_clear_dirty_range_type(start, length, client); + unsigned long end, page; + bool dirty; - if (tcg_enabled()) { + if (length == 0) { + return false; + } + + end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; + page = start >> TARGET_PAGE_BITS; + dirty = bitmap_test_and_clear_atomic(ram_list.dirty_memory[client], + page, end - page); + + if (dirty && tcg_enabled()) { tlb_reset_dirty_range_all(start, length); } + + return dirty; } /* Called from RCU critical section */ diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index 63db371850..a2beea7925 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -194,30 +194,19 @@ static inline void cpu_physical_memory_set_dirty_lebitmap(unsigned long *bitmap, } #endif /* not _WIN32 */ -static inline void cpu_physical_memory_clear_dirty_range_type(ram_addr_t start, - ram_addr_t length, - unsigned client) -{ - unsigned long end, page; - - assert(client < DIRTY_MEMORY_NUM); - end = TARGET_PAGE_ALIGN(start + length) >> TARGET_PAGE_BITS; - page = start >> TARGET_PAGE_BITS; - bitmap_clear(ram_list.dirty_memory[client], page, end - page); -} +bool cpu_physical_memory_test_and_clear_dirty(ram_addr_t start, + ram_addr_t length, + unsigned client); static inline void cpu_physical_memory_clear_dirty_range(ram_addr_t start, ram_addr_t length) { - cpu_physical_memory_clear_dirty_range_type(start, length, DIRTY_MEMORY_MIGRATION); - cpu_physical_memory_clear_dirty_range_type(start, length, DIRTY_MEMORY_VGA); - cpu_physical_memory_clear_dirty_range_type(start, length, DIRTY_MEMORY_CODE); + cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_MIGRATION); + cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_VGA); + cpu_physical_memory_test_and_clear_dirty(start, length, DIRTY_MEMORY_CODE); } -void cpu_physical_memory_reset_dirty(ram_addr_t start, ram_addr_t length, - unsigned client); - static inline uint64_t cpu_physical_memory_sync_dirty_bitmap(unsigned long *dest, ram_addr_t start, @@ -245,16 +234,14 @@ uint64_t cpu_physical_memory_sync_dirty_bitmap(unsigned long *dest, } } else { for (addr = 0; addr < length; addr += TARGET_PAGE_SIZE) { - if (cpu_physical_memory_get_dirty(start + addr, - TARGET_PAGE_SIZE, - DIRTY_MEMORY_MIGRATION)) { + if (cpu_physical_memory_test_and_clear_dirty( + start + addr, + TARGET_PAGE_SIZE, + DIRTY_MEMORY_MIGRATION)) { long k = (start + addr) >> TARGET_PAGE_BITS; if (!test_and_set_bit(k, dest)) { num_dirty++; } - cpu_physical_memory_reset_dirty(start + addr, - TARGET_PAGE_SIZE, - DIRTY_MEMORY_MIGRATION); } } } diff --git a/memory.c b/memory.c index 418cac7ce1..0c5d32807e 100644 --- a/memory.c +++ b/memory.c @@ -1468,13 +1468,9 @@ void memory_region_set_dirty(MemoryRegion *mr, hwaddr addr, bool memory_region_test_and_clear_dirty(MemoryRegion *mr, hwaddr addr, hwaddr size, unsigned client) { - bool ret; assert(mr->terminates); - ret = cpu_physical_memory_get_dirty(mr->ram_addr + addr, size, client); - if (ret) { - cpu_physical_memory_reset_dirty(mr->ram_addr + addr, size, client); - } - return ret; + return cpu_physical_memory_test_and_clear_dirty(mr->ram_addr + addr, + size, client); } @@ -1518,7 +1514,8 @@ void memory_region_reset_dirty(MemoryRegion *mr, hwaddr addr, hwaddr size, unsigned client) { assert(mr->terminates); - cpu_physical_memory_reset_dirty(mr->ram_addr + addr, size, client); + cpu_physical_memory_test_and_clear_dirty(mr->ram_addr + addr, size, + client); } int memory_region_get_fd(MemoryRegion *mr) From 5f2cb94688bd0b2c88e0fc1ac3c4582965b7b106 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 2 Dec 2014 11:23:19 +0000 Subject: [PATCH 33/62] memory: make cpu_physical_memory_sync_dirty_bitmap() fully atomic The fast path of cpu_physical_memory_sync_dirty_bitmap() directly manipulates the dirty bitmap. Use atomic_xchg() to make the test-and-clear atomic. Signed-off-by: Stefan Hajnoczi Message-Id: <1417519399-3166-7-git-send-email-stefanha@redhat.com> [Only do xchg on nonzero words. - Paolo] Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- include/exec/ram_addr.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/include/exec/ram_addr.h b/include/exec/ram_addr.h index a2beea7925..c113f21140 100644 --- a/include/exec/ram_addr.h +++ b/include/exec/ram_addr.h @@ -224,12 +224,12 @@ uint64_t cpu_physical_memory_sync_dirty_bitmap(unsigned long *dest, for (k = page; k < page + nr; k++) { if (src[k]) { + unsigned long bits = atomic_xchg(&src[k], 0); unsigned long new_dirty; new_dirty = ~dest[k]; - dest[k] |= src[k]; - new_dirty &= src[k]; + dest[k] |= bits; + new_dirty &= bits; num_dirty += ctpopl(new_dirty); - src[k] = 0; } } } else { From ec05ec26f940564b1e07bf88857035ec27e21dd8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sun, 29 Mar 2015 09:31:43 +0200 Subject: [PATCH 34/62] memory: use mr->ram_addr in "is this RAM?" assertions mr->terminates alone doesn't guarantee that we are looking at a RAM region. mr->ram_addr also has to be checked, in order to distinguish RAM and I/O regions. So, do the following: 1) add a new define RAM_ADDR_INVALID, and test it in the assertions instead of mr->terminates 2) IOMMU regions were not setting mr->ram_addr to a bogus value, initialize it in the instance_init function so that the new assertions would fire for IOMMU regions as well. Reviewed-by: Fam Zheng Signed-off-by: Paolo Bonzini --- memory.c | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/memory.c b/memory.c index 0c5d32807e..3ac0bd20d2 100644 --- a/memory.c +++ b/memory.c @@ -28,6 +28,8 @@ //#define DEBUG_UNASSIGNED +#define RAM_ADDR_INVALID (~(ram_addr_t)0) + static unsigned memory_region_transaction_depth; static bool memory_region_update_pending; static bool ioeventfd_update_pending; @@ -1007,6 +1009,7 @@ static void memory_region_initfn(Object *obj) ObjectProperty *op; mr->ops = &unassigned_mem_ops; + mr->ram_addr = RAM_ADDR_INVALID; mr->enabled = true; mr->romd_mode = true; mr->destructor = memory_region_destructor_none; @@ -1198,7 +1201,6 @@ void memory_region_init_io(MemoryRegion *mr, mr->ops = ops; mr->opaque = opaque; mr->terminates = true; - mr->ram_addr = ~(ram_addr_t)0; } void memory_region_init_ram(MemoryRegion *mr, @@ -1453,14 +1455,14 @@ void memory_region_set_log(MemoryRegion *mr, bool log, unsigned client) bool memory_region_get_dirty(MemoryRegion *mr, hwaddr addr, hwaddr size, unsigned client) { - assert(mr->terminates); + assert(mr->ram_addr != RAM_ADDR_INVALID); return cpu_physical_memory_get_dirty(mr->ram_addr + addr, size, client); } void memory_region_set_dirty(MemoryRegion *mr, hwaddr addr, hwaddr size) { - assert(mr->terminates); + assert(mr->ram_addr != RAM_ADDR_INVALID); cpu_physical_memory_set_dirty_range(mr->ram_addr + addr, size, memory_region_get_dirty_log_mask(mr)); } @@ -1468,7 +1470,7 @@ void memory_region_set_dirty(MemoryRegion *mr, hwaddr addr, bool memory_region_test_and_clear_dirty(MemoryRegion *mr, hwaddr addr, hwaddr size, unsigned client) { - assert(mr->terminates); + assert(mr->ram_addr != RAM_ADDR_INVALID); return cpu_physical_memory_test_and_clear_dirty(mr->ram_addr + addr, size, client); } @@ -1513,7 +1515,7 @@ void memory_region_rom_device_set_romd(MemoryRegion *mr, bool romd_mode) void memory_region_reset_dirty(MemoryRegion *mr, hwaddr addr, hwaddr size, unsigned client) { - assert(mr->terminates); + assert(mr->ram_addr != RAM_ADDR_INVALID); cpu_physical_memory_test_and_clear_dirty(mr->ram_addr + addr, size, client); } @@ -1524,7 +1526,7 @@ int memory_region_get_fd(MemoryRegion *mr) return memory_region_get_fd(mr->alias); } - assert(mr->terminates); + assert(mr->ram_addr != RAM_ADDR_INVALID); return qemu_get_ram_fd(mr->ram_addr & TARGET_PAGE_MASK); } @@ -1535,14 +1537,14 @@ void *memory_region_get_ram_ptr(MemoryRegion *mr) return memory_region_get_ram_ptr(mr->alias) + mr->alias_offset; } - assert(mr->terminates); + assert(mr->ram_addr != RAM_ADDR_INVALID); return qemu_get_ram_ptr(mr->ram_addr & TARGET_PAGE_MASK); } void memory_region_ram_resize(MemoryRegion *mr, ram_addr_t newsize, Error **errp) { - assert(mr->terminates); + assert(mr->ram_addr != RAM_ADDR_INVALID); qemu_ram_resize(mr->ram_addr, newsize, errp); } From 5045e9d912588a7421ab899ba510025722666fd1 Mon Sep 17 00:00:00 2001 From: Victor CLEMENT Date: Fri, 29 May 2015 17:14:04 +0200 Subject: [PATCH 35/62] icount: implement a new icount_sleep mode toggleing real-time cpu sleep When the icount_sleep mode is disabled, the QEMU_VIRTUAL_CLOCK runs at the maximum possible speed by warping the sleep times of the virtual cpu to the soonest clock deadline. The virtual clock will be updated only according the instruction counter. Signed-off-by: Victor CLEMENT Message-Id: <1432912446-9811-2-git-send-email-victor.clement@openwide.fr> Signed-off-by: Paolo Bonzini --- cpus.c | 70 ++++++++++++++++++++++++++++++++++++---------------------- 1 file changed, 44 insertions(+), 26 deletions(-) diff --git a/cpus.c b/cpus.c index de6469fa8d..688eb73ec2 100644 --- a/cpus.c +++ b/cpus.c @@ -105,6 +105,7 @@ static bool all_cpu_threads_idle(void) /* Protected by TimersState seqlock */ +static bool icount_sleep = true; static int64_t vm_clock_warp_start = -1; /* Conversion factor from emulated instructions to virtual clock ticks. */ static int icount_time_shift; @@ -393,15 +394,18 @@ void qemu_clock_warp(QEMUClockType type) return; } - /* - * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now. - * This ensures that the deadline for the timer is computed correctly below. - * This also makes sure that the insn counter is synchronized before the - * CPU starts running, in case the CPU is woken by an event other than - * the earliest QEMU_CLOCK_VIRTUAL timer. - */ - icount_warp_rt(NULL); - timer_del(icount_warp_timer); + if (icount_sleep) { + /* + * If the CPUs have been sleeping, advance QEMU_CLOCK_VIRTUAL timer now. + * This ensures that the deadline for the timer is computed correctly + * below. + * This also makes sure that the insn counter is synchronized before + * the CPU starts running, in case the CPU is woken by an event other + * than the earliest QEMU_CLOCK_VIRTUAL timer. + */ + icount_warp_rt(NULL); + timer_del(icount_warp_timer); + } if (!all_cpu_threads_idle()) { return; } @@ -425,23 +429,35 @@ void qemu_clock_warp(QEMUClockType type) * interrupt to wake it up, but the interrupt never comes because * the vCPU isn't running any insns and thus doesn't advance the * QEMU_CLOCK_VIRTUAL. - * - * An extreme solution for this problem would be to never let VCPUs - * sleep in icount mode if there is a pending QEMU_CLOCK_VIRTUAL - * timer; rather time could just advance to the next QEMU_CLOCK_VIRTUAL - * event. Instead, we do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL - * after some "real" time, (related to the time left until the next - * event) has passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this. - * This avoids that the warps are visible externally; for example, - * you will not be sending network packets continuously instead of - * every 100ms. */ - seqlock_write_lock(&timers_state.vm_clock_seqlock); - if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) { - vm_clock_warp_start = clock; + if (!icount_sleep) { + /* + * We never let VCPUs sleep in no sleep icount mode. + * If there is a pending QEMU_CLOCK_VIRTUAL timer we just advance + * to the next QEMU_CLOCK_VIRTUAL event and notify it. + * It is useful when we want a deterministic execution time, + * isolated from host latencies. + */ + seqlock_write_lock(&timers_state.vm_clock_seqlock); + timers_state.qemu_icount_bias += deadline; + seqlock_write_unlock(&timers_state.vm_clock_seqlock); + qemu_clock_notify(QEMU_CLOCK_VIRTUAL); + } else { + /* + * We do stop VCPUs and only advance QEMU_CLOCK_VIRTUAL after some + * "real" time, (related to the time left until the next event) has + * passed. The QEMU_CLOCK_VIRTUAL_RT clock will do this. + * This avoids that the warps are visible externally; for example, + * you will not be sending network packets continuously instead of + * every 100ms. + */ + seqlock_write_lock(&timers_state.vm_clock_seqlock); + if (vm_clock_warp_start == -1 || vm_clock_warp_start > clock) { + vm_clock_warp_start = clock; + } + seqlock_write_unlock(&timers_state.vm_clock_seqlock); + timer_mod_anticipate(icount_warp_timer, clock + deadline); } - seqlock_write_unlock(&timers_state.vm_clock_seqlock); - timer_mod_anticipate(icount_warp_timer, clock + deadline); } else if (deadline == 0) { qemu_clock_notify(QEMU_CLOCK_VIRTUAL); } @@ -504,9 +520,11 @@ void configure_icount(QemuOpts *opts, Error **errp) } return; } + if (icount_sleep) { + icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT, + icount_warp_rt, NULL); + } icount_align_option = qemu_opt_get_bool(opts, "align", false); - icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT, - icount_warp_rt, NULL); if (strcmp(option, "auto") != 0) { errno = 0; icount_time_shift = strtol(option, &rem_str, 0); From f1f4b57e88ff7c9cb20b074ff6106fd8f4397baa Mon Sep 17 00:00:00 2001 From: Victor CLEMENT Date: Fri, 29 May 2015 17:14:05 +0200 Subject: [PATCH 36/62] icount: add sleep parameter to the icount option to set icount_sleep mode The 'sleep' parameter sets the icount_sleep mode, which is enabled by default. To disable it, add the 'sleep=no' parameter (or 'nosleep') to the qemu -icount option. Signed-off-by: Victor CLEMENT Message-Id: <1432912446-9811-3-git-send-email-victor.clement@openwide.fr> Signed-off-by: Paolo Bonzini --- cpus.c | 9 +++++++++ qemu-options.hx | 12 ++++++++++-- vl.c | 3 +++ 3 files changed, 22 insertions(+), 2 deletions(-) diff --git a/cpus.c b/cpus.c index 688eb73ec2..4e90e63b43 100644 --- a/cpus.c +++ b/cpus.c @@ -520,11 +520,18 @@ void configure_icount(QemuOpts *opts, Error **errp) } return; } + + icount_sleep = qemu_opt_get_bool(opts, "sleep", true); if (icount_sleep) { icount_warp_timer = timer_new_ns(QEMU_CLOCK_VIRTUAL_RT, icount_warp_rt, NULL); } + icount_align_option = qemu_opt_get_bool(opts, "align", false); + + if (icount_align_option && !icount_sleep) { + error_setg(errp, "align=on and sleep=no are incompatible"); + } if (strcmp(option, "auto") != 0) { errno = 0; icount_time_shift = strtol(option, &rem_str, 0); @@ -535,6 +542,8 @@ void configure_icount(QemuOpts *opts, Error **errp) return; } else if (icount_align_option) { error_setg(errp, "shift=auto and align=on are incompatible"); + } else if (!icount_sleep) { + error_setg(errp, "shift=auto and sleep=no are incompatible"); } use_icount = 2; diff --git a/qemu-options.hx b/qemu-options.hx index b3db6cbe86..85883f165a 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -3100,9 +3100,10 @@ re-inject them. ETEXI DEF("icount", HAS_ARG, QEMU_OPTION_icount, \ - "-icount [shift=N|auto][,align=on|off]\n" \ + "-icount [shift=N|auto][,align=on|off][,sleep=no]\n" \ " enable virtual instruction counter with 2^N clock ticks per\n" \ - " instruction and enable aligning the host and virtual clocks\n", QEMU_ARCH_ALL) + " instruction, enable aligning the host and virtual clocks\n" \ + " or disable real time cpu sleeping\n", QEMU_ARCH_ALL) STEXI @item -icount [shift=@var{N}|auto] @findex -icount @@ -3111,6 +3112,13 @@ instruction every 2^@var{N} ns of virtual time. If @code{auto} is specified then the virtual cpu speed will be automatically adjusted to keep virtual time within a few seconds of real time. +When the virtual cpu is sleeping, the virtual time will advance at default +speed unless @option{sleep=no} is specified. +With @option{sleep=no}, the virtual time will jump to the next timer deadline +instantly whenever the virtual cpu goes to sleep mode and will not advance +if no timer is enabled. This behavior give deterministic execution times from +the guest point of view. + Note that while this option can give deterministic behavior, it does not provide cycle accurate emulation. Modern CPUs contain superscalar out of order cores with complex cache hierarchies. The number of instructions diff --git a/vl.c b/vl.c index cdd81b4486..df5a7273c9 100644 --- a/vl.c +++ b/vl.c @@ -468,6 +468,9 @@ static QemuOptsList qemu_icount_opts = { }, { .name = "align", .type = QEMU_OPT_BOOL, + }, { + .name = "sleep", + .type = QEMU_OPT_BOOL, }, { /* end of list */ } }, From d7a0f71d9aac33e58d39fdbe4861d440af44fa8b Mon Sep 17 00:00:00 2001 From: Victor CLEMENT Date: Fri, 29 May 2015 17:14:06 +0200 Subject: [PATCH 37/62] icount: print a warning if there is no more deadline in sleep=no mode While qemu is running in sleep=no mode, a warning will be printed when no timer deadline is set. As this mode is intended for getting deterministic virtual time, if no timer is set on the virtual clock this determinism is broken. Signed-off-by: Victor CLEMENT Message-Id: <1432912446-9811-4-git-send-email-victor.clement@openwide.fr> Signed-off-by: Paolo Bonzini --- cpus.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/cpus.c b/cpus.c index 4e90e63b43..f38b858f9b 100644 --- a/cpus.c +++ b/cpus.c @@ -419,6 +419,11 @@ void qemu_clock_warp(QEMUClockType type) clock = qemu_clock_get_ns(QEMU_CLOCK_VIRTUAL_RT); deadline = qemu_clock_deadline_ns_all(QEMU_CLOCK_VIRTUAL); if (deadline < 0) { + static bool notified; + if (!icount_sleep && !notified) { + error_report("WARNING: icount sleep disabled and no active timers"); + notified = true; + } return; } From f794aa4a2fd772a3ec413c4e478cc23857cfee98 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 8 Apr 2015 14:52:04 +0200 Subject: [PATCH 38/62] target-i386: introduce cpu_get_mem_attrs Signed-off-by: Paolo Bonzini --- include/exec/memattrs.h | 4 +++- target-i386/cpu.h | 5 +++++ target-i386/helper.c | 3 ++- target-i386/kvm.c | 2 +- 4 files changed, 11 insertions(+), 3 deletions(-) diff --git a/include/exec/memattrs.h b/include/exec/memattrs.h index 96dc440423..f8537a8d91 100644 --- a/include/exec/memattrs.h +++ b/include/exec/memattrs.h @@ -29,7 +29,9 @@ typedef struct MemTxAttrs { * "didn't specify" if necessary. */ unsigned int unspecified:1; - /* ARM/AMBA TrustZone Secure access */ + /* ARM/AMBA: TrustZone Secure access + * x86: System Management Mode access + */ unsigned int secure:1; /* Memory access is usermode (unprivileged) */ unsigned int user:1; diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 26182bdc7e..74e8819dba 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -1292,6 +1292,11 @@ static inline void cpu_load_efer(CPUX86State *env, uint64_t val) } } +static inline MemTxAttrs cpu_get_mem_attrs(CPUX86State *env) +{ + return ((MemTxAttrs) { .secure = (env->hflags & HF_SMM_MASK) != 0 }); +} + /* fpu_helper.c */ void cpu_set_mxcsr(CPUX86State *env, uint32_t val); void cpu_set_fpuc(CPUX86State *env, uint16_t val); diff --git a/target-i386/helper.c b/target-i386/helper.c index 4f1ddf701e..62e801b28e 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -771,7 +771,8 @@ do_check_protect_pse36: page_offset = vaddr & (page_size - 1); paddr = pte + page_offset; - tlb_set_page(cs, vaddr, paddr, prot, mmu_idx, page_size); + tlb_set_page_with_attrs(cs, vaddr, paddr, cpu_get_mem_attrs(env), + prot, mmu_idx, page_size); return 0; do_fault_rsvd: error_code |= PG_ERROR_RSVD_MASK; diff --git a/target-i386/kvm.c b/target-i386/kvm.c index ca2da84501..5a236e3103 100644 --- a/target-i386/kvm.c +++ b/target-i386/kvm.c @@ -2259,7 +2259,7 @@ MemTxAttrs kvm_arch_post_run(CPUState *cpu, struct kvm_run *run) } cpu_set_apic_tpr(x86_cpu->apic_state, run->cr8); cpu_set_apic_base(x86_cpu->apic_state, run->apic_base); - return MEMTXATTRS_UNSPECIFIED; + return cpu_get_mem_attrs(env); } int kvm_arch_process_async_events(CPUState *cs) From b216aa6c0fcbaa8ff4128969c14594896a5485a4 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 8 Apr 2015 13:39:37 +0200 Subject: [PATCH 39/62] target-i386: Use correct memory attributes for memory accesses These include page table walks, SVM accesses and SMM state save accesses. The bulk of the patch is obtained with sed -i 's/\(\<[a-z_]*_phys\(_notdirty\)\?\>(cs\)->as,/x86_\1,/' Signed-off-by: Paolo Bonzini --- target-i386/cpu.h | 12 ++ target-i386/helper.c | 132 ++++++++++++++++--- target-i386/seg_helper.c | 12 +- target-i386/smm_helper.c | 272 +++++++++++++++++++-------------------- target-i386/svm_helper.c | 230 ++++++++++++++++----------------- 5 files changed, 381 insertions(+), 277 deletions(-) diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 74e8819dba..7a06495834 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -1105,6 +1105,18 @@ int x86_cpu_handle_mmu_fault(CPUState *cpu, vaddr addr, int is_write, int mmu_idx); void x86_cpu_set_a20(X86CPU *cpu, int a20_state); +#ifndef CONFIG_USER_ONLY +uint8_t x86_ldub_phys(CPUState *cs, hwaddr addr); +uint32_t x86_lduw_phys(CPUState *cs, hwaddr addr); +uint32_t x86_ldl_phys(CPUState *cs, hwaddr addr); +uint64_t x86_ldq_phys(CPUState *cs, hwaddr addr); +void x86_stb_phys(CPUState *cs, hwaddr addr, uint8_t val); +void x86_stl_phys_notdirty(CPUState *cs, hwaddr addr, uint32_t val); +void x86_stw_phys(CPUState *cs, hwaddr addr, uint32_t val); +void x86_stl_phys(CPUState *cs, hwaddr addr, uint32_t val); +void x86_stq_phys(CPUState *cs, hwaddr addr, uint64_t val); +#endif + static inline bool hw_local_breakpoint_enabled(unsigned long dr7, int index) { return (dr7 >> (index * 2)) & 1; diff --git a/target-i386/helper.c b/target-i386/helper.c index 62e801b28e..5480a96a0f 100644 --- a/target-i386/helper.c +++ b/target-i386/helper.c @@ -565,7 +565,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) & env->a20_mask; - pml4e = ldq_phys(cs->as, pml4e_addr); + pml4e = x86_ldq_phys(cs, pml4e_addr); if (!(pml4e & PG_PRESENT_MASK)) { goto do_fault; } @@ -574,12 +574,12 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, } if (!(pml4e & PG_ACCESSED_MASK)) { pml4e |= PG_ACCESSED_MASK; - stl_phys_notdirty(cs->as, pml4e_addr, pml4e); + x86_stl_phys_notdirty(cs, pml4e_addr, pml4e); } ptep = pml4e ^ PG_NX_MASK; pdpe_addr = ((pml4e & PG_ADDRESS_MASK) + (((addr >> 30) & 0x1ff) << 3)) & env->a20_mask; - pdpe = ldq_phys(cs->as, pdpe_addr); + pdpe = x86_ldq_phys(cs, pdpe_addr); if (!(pdpe & PG_PRESENT_MASK)) { goto do_fault; } @@ -589,7 +589,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, ptep &= pdpe ^ PG_NX_MASK; if (!(pdpe & PG_ACCESSED_MASK)) { pdpe |= PG_ACCESSED_MASK; - stl_phys_notdirty(cs->as, pdpe_addr, pdpe); + x86_stl_phys_notdirty(cs, pdpe_addr, pdpe); } if (pdpe & PG_PSE_MASK) { /* 1 GB page */ @@ -604,7 +604,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, /* XXX: load them when cr3 is loaded ? */ pdpe_addr = ((env->cr[3] & ~0x1f) + ((addr >> 27) & 0x18)) & env->a20_mask; - pdpe = ldq_phys(cs->as, pdpe_addr); + pdpe = x86_ldq_phys(cs, pdpe_addr); if (!(pdpe & PG_PRESENT_MASK)) { goto do_fault; } @@ -617,7 +617,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, pde_addr = ((pdpe & PG_ADDRESS_MASK) + (((addr >> 21) & 0x1ff) << 3)) & env->a20_mask; - pde = ldq_phys(cs->as, pde_addr); + pde = x86_ldq_phys(cs, pde_addr); if (!(pde & PG_PRESENT_MASK)) { goto do_fault; } @@ -635,11 +635,11 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, /* 4 KB page */ if (!(pde & PG_ACCESSED_MASK)) { pde |= PG_ACCESSED_MASK; - stl_phys_notdirty(cs->as, pde_addr, pde); + x86_stl_phys_notdirty(cs, pde_addr, pde); } pte_addr = ((pde & PG_ADDRESS_MASK) + (((addr >> 12) & 0x1ff) << 3)) & env->a20_mask; - pte = ldq_phys(cs->as, pte_addr); + pte = x86_ldq_phys(cs, pte_addr); if (!(pte & PG_PRESENT_MASK)) { goto do_fault; } @@ -655,7 +655,7 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, /* page directory entry */ pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) & env->a20_mask; - pde = ldl_phys(cs->as, pde_addr); + pde = x86_ldl_phys(cs, pde_addr); if (!(pde & PG_PRESENT_MASK)) { goto do_fault; } @@ -676,13 +676,13 @@ int x86_cpu_handle_mmu_fault(CPUState *cs, vaddr addr, if (!(pde & PG_ACCESSED_MASK)) { pde |= PG_ACCESSED_MASK; - stl_phys_notdirty(cs->as, pde_addr, pde); + x86_stl_phys_notdirty(cs, pde_addr, pde); } /* page directory entry */ pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) & env->a20_mask; - pte = ldl_phys(cs->as, pte_addr); + pte = x86_ldl_phys(cs, pte_addr); if (!(pte & PG_PRESENT_MASK)) { goto do_fault; } @@ -737,7 +737,7 @@ do_check_protect_pse36: if (is_dirty) { pte |= PG_DIRTY_MASK; } - stl_phys_notdirty(cs->as, pte_addr, pte); + x86_stl_phys_notdirty(cs, pte_addr, pte); } /* the page can be put in the TLB */ @@ -789,7 +789,7 @@ do_check_protect_pse36: error_code |= PG_ERROR_I_D_MASK; if (env->intercept_exceptions & (1 << EXCP0E_PAGE)) { /* cr2 is not modified in case of exceptions */ - stq_phys(cs->as, + x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2), addr); } else { @@ -828,13 +828,13 @@ hwaddr x86_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) } pml4e_addr = ((env->cr[3] & ~0xfff) + (((addr >> 39) & 0x1ff) << 3)) & env->a20_mask; - pml4e = ldq_phys(cs->as, pml4e_addr); + pml4e = x86_ldq_phys(cs, pml4e_addr); if (!(pml4e & PG_PRESENT_MASK)) { return -1; } pdpe_addr = ((pml4e & PG_ADDRESS_MASK) + (((addr >> 30) & 0x1ff) << 3)) & env->a20_mask; - pdpe = ldq_phys(cs->as, pdpe_addr); + pdpe = x86_ldq_phys(cs, pdpe_addr); if (!(pdpe & PG_PRESENT_MASK)) { return -1; } @@ -849,14 +849,14 @@ hwaddr x86_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) { pdpe_addr = ((env->cr[3] & ~0x1f) + ((addr >> 27) & 0x18)) & env->a20_mask; - pdpe = ldq_phys(cs->as, pdpe_addr); + pdpe = x86_ldq_phys(cs, pdpe_addr); if (!(pdpe & PG_PRESENT_MASK)) return -1; } pde_addr = ((pdpe & PG_ADDRESS_MASK) + (((addr >> 21) & 0x1ff) << 3)) & env->a20_mask; - pde = ldq_phys(cs->as, pde_addr); + pde = x86_ldq_phys(cs, pde_addr); if (!(pde & PG_PRESENT_MASK)) { return -1; } @@ -869,7 +869,7 @@ hwaddr x86_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) pte_addr = ((pde & PG_ADDRESS_MASK) + (((addr >> 12) & 0x1ff) << 3)) & env->a20_mask; page_size = 4096; - pte = ldq_phys(cs->as, pte_addr); + pte = x86_ldq_phys(cs, pte_addr); } if (!(pte & PG_PRESENT_MASK)) { return -1; @@ -879,7 +879,7 @@ hwaddr x86_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) /* page directory entry */ pde_addr = ((env->cr[3] & ~0xfff) + ((addr >> 20) & 0xffc)) & env->a20_mask; - pde = ldl_phys(cs->as, pde_addr); + pde = x86_ldl_phys(cs, pde_addr); if (!(pde & PG_PRESENT_MASK)) return -1; if ((pde & PG_PSE_MASK) && (env->cr[4] & CR4_PSE_MASK)) { @@ -888,7 +888,7 @@ hwaddr x86_cpu_get_phys_page_debug(CPUState *cs, vaddr addr) } else { /* page directory entry */ pte_addr = ((pde & ~0xfff) + ((addr >> 10) & 0xffc)) & env->a20_mask; - pte = ldl_phys(cs->as, pte_addr); + pte = x86_ldl_phys(cs, pte_addr); if (!(pte & PG_PRESENT_MASK)) { return -1; } @@ -1277,3 +1277,95 @@ void x86_cpu_exec_exit(CPUState *cs) env->eflags = cpu_compute_eflags(env); } + +#ifndef CONFIG_USER_ONLY +uint8_t x86_ldub_phys(CPUState *cs, hwaddr addr) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + + return address_space_ldub(cs->as, addr, + cpu_get_mem_attrs(env), + NULL); +} + +uint32_t x86_lduw_phys(CPUState *cs, hwaddr addr) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + + return address_space_lduw(cs->as, addr, + cpu_get_mem_attrs(env), + NULL); +} + +uint32_t x86_ldl_phys(CPUState *cs, hwaddr addr) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + + return address_space_ldl(cs->as, addr, + cpu_get_mem_attrs(env), + NULL); +} + +uint64_t x86_ldq_phys(CPUState *cs, hwaddr addr) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + + return address_space_ldq(cs->as, addr, + cpu_get_mem_attrs(env), + NULL); +} + +void x86_stb_phys(CPUState *cs, hwaddr addr, uint8_t val) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + + address_space_stb(cs->as, addr, val, + cpu_get_mem_attrs(env), + NULL); +} + +void x86_stl_phys_notdirty(CPUState *cs, hwaddr addr, uint32_t val) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + + address_space_stl_notdirty(cs->as, addr, val, + cpu_get_mem_attrs(env), + NULL); +} + +void x86_stw_phys(CPUState *cs, hwaddr addr, uint32_t val) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + + address_space_stw(cs->as, addr, val, + cpu_get_mem_attrs(env), + NULL); +} + +void x86_stl_phys(CPUState *cs, hwaddr addr, uint32_t val) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + + address_space_stl(cs->as, addr, val, + cpu_get_mem_attrs(env), + NULL); +} + +void x86_stq_phys(CPUState *cs, hwaddr addr, uint64_t val) +{ + X86CPU *cpu = X86_CPU(cs); + CPUX86State *env = &cpu->env; + + address_space_stq(cs->as, addr, val, + cpu_get_mem_attrs(env), + NULL); +} +#endif diff --git a/target-i386/seg_helper.c b/target-i386/seg_helper.c index 2bc757af31..8a4271ebe2 100644 --- a/target-i386/seg_helper.c +++ b/target-i386/seg_helper.c @@ -1144,7 +1144,7 @@ static void handle_even_inj(CPUX86State *env, int intno, int is_int, int error_code, int is_hw, int rm) { CPUState *cs = CPU(x86_env_get_cpu(env)); - uint32_t event_inj = ldl_phys(cs->as, env->vm_vmcb + offsetof(struct vmcb, + uint32_t event_inj = x86_ldl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.event_inj)); if (!(event_inj & SVM_EVTINJ_VALID)) { @@ -1158,11 +1158,11 @@ static void handle_even_inj(CPUX86State *env, int intno, int is_int, event_inj = intno | type | SVM_EVTINJ_VALID; if (!rm && exception_has_error_code(intno)) { event_inj |= SVM_EVTINJ_VALID_ERR; - stl_phys(cs->as, env->vm_vmcb + offsetof(struct vmcb, + x86_stl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err), error_code); } - stl_phys(cs->as, + x86_stl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj); } @@ -1240,11 +1240,11 @@ static void do_interrupt_all(X86CPU *cpu, int intno, int is_int, #if !defined(CONFIG_USER_ONLY) if (env->hflags & HF_SVMI_MASK) { CPUState *cs = CPU(cpu); - uint32_t event_inj = ldl_phys(cs->as, env->vm_vmcb + + uint32_t event_inj = x86_ldl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.event_inj)); - stl_phys(cs->as, + x86_stl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.event_inj), event_inj & ~SVM_EVTINJ_VALID); } @@ -1339,7 +1339,7 @@ bool x86_cpu_exec_interrupt(CPUState *cs, int interrupt_request) int intno; /* FIXME: this should respect TPR */ cpu_svm_check_intercept_param(env, SVM_EXIT_VINTR, 0); - intno = ldl_phys(cs->as, env->vm_vmcb + intno = x86_ldl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.int_vector)); qemu_log_mask(CPU_LOG_TB_IN_ASM, "Servicing virtual hardware INT=0x%02x\n", intno); diff --git a/target-i386/smm_helper.c b/target-i386/smm_helper.c index c62f46847c..b9971b6e19 100644 --- a/target-i386/smm_helper.c +++ b/target-i386/smm_helper.c @@ -60,83 +60,83 @@ void do_smm_enter(X86CPU *cpu) for (i = 0; i < 6; i++) { dt = &env->segs[i]; offset = 0x7e00 + i * 16; - stw_phys(cs->as, sm_state + offset, dt->selector); - stw_phys(cs->as, sm_state + offset + 2, (dt->flags >> 8) & 0xf0ff); - stl_phys(cs->as, sm_state + offset + 4, dt->limit); - stq_phys(cs->as, sm_state + offset + 8, dt->base); + x86_stw_phys(cs, sm_state + offset, dt->selector); + x86_stw_phys(cs, sm_state + offset + 2, (dt->flags >> 8) & 0xf0ff); + x86_stl_phys(cs, sm_state + offset + 4, dt->limit); + x86_stq_phys(cs, sm_state + offset + 8, dt->base); } - stq_phys(cs->as, sm_state + 0x7e68, env->gdt.base); - stl_phys(cs->as, sm_state + 0x7e64, env->gdt.limit); + x86_stq_phys(cs, sm_state + 0x7e68, env->gdt.base); + x86_stl_phys(cs, sm_state + 0x7e64, env->gdt.limit); - stw_phys(cs->as, sm_state + 0x7e70, env->ldt.selector); - stq_phys(cs->as, sm_state + 0x7e78, env->ldt.base); - stl_phys(cs->as, sm_state + 0x7e74, env->ldt.limit); - stw_phys(cs->as, sm_state + 0x7e72, (env->ldt.flags >> 8) & 0xf0ff); + x86_stw_phys(cs, sm_state + 0x7e70, env->ldt.selector); + x86_stq_phys(cs, sm_state + 0x7e78, env->ldt.base); + x86_stl_phys(cs, sm_state + 0x7e74, env->ldt.limit); + x86_stw_phys(cs, sm_state + 0x7e72, (env->ldt.flags >> 8) & 0xf0ff); - stq_phys(cs->as, sm_state + 0x7e88, env->idt.base); - stl_phys(cs->as, sm_state + 0x7e84, env->idt.limit); + x86_stq_phys(cs, sm_state + 0x7e88, env->idt.base); + x86_stl_phys(cs, sm_state + 0x7e84, env->idt.limit); - stw_phys(cs->as, sm_state + 0x7e90, env->tr.selector); - stq_phys(cs->as, sm_state + 0x7e98, env->tr.base); - stl_phys(cs->as, sm_state + 0x7e94, env->tr.limit); - stw_phys(cs->as, sm_state + 0x7e92, (env->tr.flags >> 8) & 0xf0ff); + x86_stw_phys(cs, sm_state + 0x7e90, env->tr.selector); + x86_stq_phys(cs, sm_state + 0x7e98, env->tr.base); + x86_stl_phys(cs, sm_state + 0x7e94, env->tr.limit); + x86_stw_phys(cs, sm_state + 0x7e92, (env->tr.flags >> 8) & 0xf0ff); - stq_phys(cs->as, sm_state + 0x7ed0, env->efer); + x86_stq_phys(cs, sm_state + 0x7ed0, env->efer); - stq_phys(cs->as, sm_state + 0x7ff8, env->regs[R_EAX]); - stq_phys(cs->as, sm_state + 0x7ff0, env->regs[R_ECX]); - stq_phys(cs->as, sm_state + 0x7fe8, env->regs[R_EDX]); - stq_phys(cs->as, sm_state + 0x7fe0, env->regs[R_EBX]); - stq_phys(cs->as, sm_state + 0x7fd8, env->regs[R_ESP]); - stq_phys(cs->as, sm_state + 0x7fd0, env->regs[R_EBP]); - stq_phys(cs->as, sm_state + 0x7fc8, env->regs[R_ESI]); - stq_phys(cs->as, sm_state + 0x7fc0, env->regs[R_EDI]); + x86_stq_phys(cs, sm_state + 0x7ff8, env->regs[R_EAX]); + x86_stq_phys(cs, sm_state + 0x7ff0, env->regs[R_ECX]); + x86_stq_phys(cs, sm_state + 0x7fe8, env->regs[R_EDX]); + x86_stq_phys(cs, sm_state + 0x7fe0, env->regs[R_EBX]); + x86_stq_phys(cs, sm_state + 0x7fd8, env->regs[R_ESP]); + x86_stq_phys(cs, sm_state + 0x7fd0, env->regs[R_EBP]); + x86_stq_phys(cs, sm_state + 0x7fc8, env->regs[R_ESI]); + x86_stq_phys(cs, sm_state + 0x7fc0, env->regs[R_EDI]); for (i = 8; i < 16; i++) { - stq_phys(cs->as, sm_state + 0x7ff8 - i * 8, env->regs[i]); + x86_stq_phys(cs, sm_state + 0x7ff8 - i * 8, env->regs[i]); } - stq_phys(cs->as, sm_state + 0x7f78, env->eip); - stl_phys(cs->as, sm_state + 0x7f70, cpu_compute_eflags(env)); - stl_phys(cs->as, sm_state + 0x7f68, env->dr[6]); - stl_phys(cs->as, sm_state + 0x7f60, env->dr[7]); + x86_stq_phys(cs, sm_state + 0x7f78, env->eip); + x86_stl_phys(cs, sm_state + 0x7f70, cpu_compute_eflags(env)); + x86_stl_phys(cs, sm_state + 0x7f68, env->dr[6]); + x86_stl_phys(cs, sm_state + 0x7f60, env->dr[7]); - stl_phys(cs->as, sm_state + 0x7f48, env->cr[4]); - stq_phys(cs->as, sm_state + 0x7f50, env->cr[3]); - stl_phys(cs->as, sm_state + 0x7f58, env->cr[0]); + x86_stl_phys(cs, sm_state + 0x7f48, env->cr[4]); + x86_stq_phys(cs, sm_state + 0x7f50, env->cr[3]); + x86_stl_phys(cs, sm_state + 0x7f58, env->cr[0]); - stl_phys(cs->as, sm_state + 0x7efc, SMM_REVISION_ID); - stl_phys(cs->as, sm_state + 0x7f00, env->smbase); + x86_stl_phys(cs, sm_state + 0x7efc, SMM_REVISION_ID); + x86_stl_phys(cs, sm_state + 0x7f00, env->smbase); #else - stl_phys(cs->as, sm_state + 0x7ffc, env->cr[0]); - stl_phys(cs->as, sm_state + 0x7ff8, env->cr[3]); - stl_phys(cs->as, sm_state + 0x7ff4, cpu_compute_eflags(env)); - stl_phys(cs->as, sm_state + 0x7ff0, env->eip); - stl_phys(cs->as, sm_state + 0x7fec, env->regs[R_EDI]); - stl_phys(cs->as, sm_state + 0x7fe8, env->regs[R_ESI]); - stl_phys(cs->as, sm_state + 0x7fe4, env->regs[R_EBP]); - stl_phys(cs->as, sm_state + 0x7fe0, env->regs[R_ESP]); - stl_phys(cs->as, sm_state + 0x7fdc, env->regs[R_EBX]); - stl_phys(cs->as, sm_state + 0x7fd8, env->regs[R_EDX]); - stl_phys(cs->as, sm_state + 0x7fd4, env->regs[R_ECX]); - stl_phys(cs->as, sm_state + 0x7fd0, env->regs[R_EAX]); - stl_phys(cs->as, sm_state + 0x7fcc, env->dr[6]); - stl_phys(cs->as, sm_state + 0x7fc8, env->dr[7]); + x86_stl_phys(cs, sm_state + 0x7ffc, env->cr[0]); + x86_stl_phys(cs, sm_state + 0x7ff8, env->cr[3]); + x86_stl_phys(cs, sm_state + 0x7ff4, cpu_compute_eflags(env)); + x86_stl_phys(cs, sm_state + 0x7ff0, env->eip); + x86_stl_phys(cs, sm_state + 0x7fec, env->regs[R_EDI]); + x86_stl_phys(cs, sm_state + 0x7fe8, env->regs[R_ESI]); + x86_stl_phys(cs, sm_state + 0x7fe4, env->regs[R_EBP]); + x86_stl_phys(cs, sm_state + 0x7fe0, env->regs[R_ESP]); + x86_stl_phys(cs, sm_state + 0x7fdc, env->regs[R_EBX]); + x86_stl_phys(cs, sm_state + 0x7fd8, env->regs[R_EDX]); + x86_stl_phys(cs, sm_state + 0x7fd4, env->regs[R_ECX]); + x86_stl_phys(cs, sm_state + 0x7fd0, env->regs[R_EAX]); + x86_stl_phys(cs, sm_state + 0x7fcc, env->dr[6]); + x86_stl_phys(cs, sm_state + 0x7fc8, env->dr[7]); - stl_phys(cs->as, sm_state + 0x7fc4, env->tr.selector); - stl_phys(cs->as, sm_state + 0x7f64, env->tr.base); - stl_phys(cs->as, sm_state + 0x7f60, env->tr.limit); - stl_phys(cs->as, sm_state + 0x7f5c, (env->tr.flags >> 8) & 0xf0ff); + x86_stl_phys(cs, sm_state + 0x7fc4, env->tr.selector); + x86_stl_phys(cs, sm_state + 0x7f64, env->tr.base); + x86_stl_phys(cs, sm_state + 0x7f60, env->tr.limit); + x86_stl_phys(cs, sm_state + 0x7f5c, (env->tr.flags >> 8) & 0xf0ff); - stl_phys(cs->as, sm_state + 0x7fc0, env->ldt.selector); - stl_phys(cs->as, sm_state + 0x7f80, env->ldt.base); - stl_phys(cs->as, sm_state + 0x7f7c, env->ldt.limit); - stl_phys(cs->as, sm_state + 0x7f78, (env->ldt.flags >> 8) & 0xf0ff); + x86_stl_phys(cs, sm_state + 0x7fc0, env->ldt.selector); + x86_stl_phys(cs, sm_state + 0x7f80, env->ldt.base); + x86_stl_phys(cs, sm_state + 0x7f7c, env->ldt.limit); + x86_stl_phys(cs, sm_state + 0x7f78, (env->ldt.flags >> 8) & 0xf0ff); - stl_phys(cs->as, sm_state + 0x7f74, env->gdt.base); - stl_phys(cs->as, sm_state + 0x7f70, env->gdt.limit); + x86_stl_phys(cs, sm_state + 0x7f74, env->gdt.base); + x86_stl_phys(cs, sm_state + 0x7f70, env->gdt.limit); - stl_phys(cs->as, sm_state + 0x7f58, env->idt.base); - stl_phys(cs->as, sm_state + 0x7f54, env->idt.limit); + x86_stl_phys(cs, sm_state + 0x7f58, env->idt.base); + x86_stl_phys(cs, sm_state + 0x7f54, env->idt.limit); for (i = 0; i < 6; i++) { dt = &env->segs[i]; @@ -145,15 +145,15 @@ void do_smm_enter(X86CPU *cpu) } else { offset = 0x7f2c + (i - 3) * 12; } - stl_phys(cs->as, sm_state + 0x7fa8 + i * 4, dt->selector); - stl_phys(cs->as, sm_state + offset + 8, dt->base); - stl_phys(cs->as, sm_state + offset + 4, dt->limit); - stl_phys(cs->as, sm_state + offset, (dt->flags >> 8) & 0xf0ff); + x86_stl_phys(cs, sm_state + 0x7fa8 + i * 4, dt->selector); + x86_stl_phys(cs, sm_state + offset + 8, dt->base); + x86_stl_phys(cs, sm_state + offset + 4, dt->limit); + x86_stl_phys(cs, sm_state + offset, (dt->flags >> 8) & 0xf0ff); } - stl_phys(cs->as, sm_state + 0x7f14, env->cr[4]); + x86_stl_phys(cs, sm_state + 0x7f14, env->cr[4]); - stl_phys(cs->as, sm_state + 0x7efc, SMM_REVISION_ID); - stl_phys(cs->as, sm_state + 0x7ef8, env->smbase); + x86_stl_phys(cs, sm_state + 0x7efc, SMM_REVISION_ID); + x86_stl_phys(cs, sm_state + 0x7ef8, env->smbase); #endif /* init SMM cpu state */ @@ -200,91 +200,91 @@ void helper_rsm(CPUX86State *env) sm_state = env->smbase + 0x8000; #ifdef TARGET_X86_64 - cpu_load_efer(env, ldq_phys(cs->as, sm_state + 0x7ed0)); + cpu_load_efer(env, x86_ldq_phys(cs, sm_state + 0x7ed0)); - env->gdt.base = ldq_phys(cs->as, sm_state + 0x7e68); - env->gdt.limit = ldl_phys(cs->as, sm_state + 0x7e64); + env->gdt.base = x86_ldq_phys(cs, sm_state + 0x7e68); + env->gdt.limit = x86_ldl_phys(cs, sm_state + 0x7e64); - env->ldt.selector = lduw_phys(cs->as, sm_state + 0x7e70); - env->ldt.base = ldq_phys(cs->as, sm_state + 0x7e78); - env->ldt.limit = ldl_phys(cs->as, sm_state + 0x7e74); - env->ldt.flags = (lduw_phys(cs->as, sm_state + 0x7e72) & 0xf0ff) << 8; + env->ldt.selector = x86_lduw_phys(cs, sm_state + 0x7e70); + env->ldt.base = x86_ldq_phys(cs, sm_state + 0x7e78); + env->ldt.limit = x86_ldl_phys(cs, sm_state + 0x7e74); + env->ldt.flags = (x86_lduw_phys(cs, sm_state + 0x7e72) & 0xf0ff) << 8; - env->idt.base = ldq_phys(cs->as, sm_state + 0x7e88); - env->idt.limit = ldl_phys(cs->as, sm_state + 0x7e84); + env->idt.base = x86_ldq_phys(cs, sm_state + 0x7e88); + env->idt.limit = x86_ldl_phys(cs, sm_state + 0x7e84); - env->tr.selector = lduw_phys(cs->as, sm_state + 0x7e90); - env->tr.base = ldq_phys(cs->as, sm_state + 0x7e98); - env->tr.limit = ldl_phys(cs->as, sm_state + 0x7e94); - env->tr.flags = (lduw_phys(cs->as, sm_state + 0x7e92) & 0xf0ff) << 8; + env->tr.selector = x86_lduw_phys(cs, sm_state + 0x7e90); + env->tr.base = x86_ldq_phys(cs, sm_state + 0x7e98); + env->tr.limit = x86_ldl_phys(cs, sm_state + 0x7e94); + env->tr.flags = (x86_lduw_phys(cs, sm_state + 0x7e92) & 0xf0ff) << 8; - env->regs[R_EAX] = ldq_phys(cs->as, sm_state + 0x7ff8); - env->regs[R_ECX] = ldq_phys(cs->as, sm_state + 0x7ff0); - env->regs[R_EDX] = ldq_phys(cs->as, sm_state + 0x7fe8); - env->regs[R_EBX] = ldq_phys(cs->as, sm_state + 0x7fe0); - env->regs[R_ESP] = ldq_phys(cs->as, sm_state + 0x7fd8); - env->regs[R_EBP] = ldq_phys(cs->as, sm_state + 0x7fd0); - env->regs[R_ESI] = ldq_phys(cs->as, sm_state + 0x7fc8); - env->regs[R_EDI] = ldq_phys(cs->as, sm_state + 0x7fc0); + env->regs[R_EAX] = x86_ldq_phys(cs, sm_state + 0x7ff8); + env->regs[R_ECX] = x86_ldq_phys(cs, sm_state + 0x7ff0); + env->regs[R_EDX] = x86_ldq_phys(cs, sm_state + 0x7fe8); + env->regs[R_EBX] = x86_ldq_phys(cs, sm_state + 0x7fe0); + env->regs[R_ESP] = x86_ldq_phys(cs, sm_state + 0x7fd8); + env->regs[R_EBP] = x86_ldq_phys(cs, sm_state + 0x7fd0); + env->regs[R_ESI] = x86_ldq_phys(cs, sm_state + 0x7fc8); + env->regs[R_EDI] = x86_ldq_phys(cs, sm_state + 0x7fc0); for (i = 8; i < 16; i++) { - env->regs[i] = ldq_phys(cs->as, sm_state + 0x7ff8 - i * 8); + env->regs[i] = x86_ldq_phys(cs, sm_state + 0x7ff8 - i * 8); } - env->eip = ldq_phys(cs->as, sm_state + 0x7f78); - cpu_load_eflags(env, ldl_phys(cs->as, sm_state + 0x7f70), + env->eip = x86_ldq_phys(cs, sm_state + 0x7f78); + cpu_load_eflags(env, x86_ldl_phys(cs, sm_state + 0x7f70), ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK)); - env->dr[6] = ldl_phys(cs->as, sm_state + 0x7f68); - env->dr[7] = ldl_phys(cs->as, sm_state + 0x7f60); + env->dr[6] = x86_ldl_phys(cs, sm_state + 0x7f68); + env->dr[7] = x86_ldl_phys(cs, sm_state + 0x7f60); - cpu_x86_update_cr4(env, ldl_phys(cs->as, sm_state + 0x7f48)); - cpu_x86_update_cr3(env, ldq_phys(cs->as, sm_state + 0x7f50)); - cpu_x86_update_cr0(env, ldl_phys(cs->as, sm_state + 0x7f58)); + cpu_x86_update_cr4(env, x86_ldl_phys(cs, sm_state + 0x7f48)); + cpu_x86_update_cr3(env, x86_ldq_phys(cs, sm_state + 0x7f50)); + cpu_x86_update_cr0(env, x86_ldl_phys(cs, sm_state + 0x7f58)); for (i = 0; i < 6; i++) { offset = 0x7e00 + i * 16; cpu_x86_load_seg_cache(env, i, - lduw_phys(cs->as, sm_state + offset), - ldq_phys(cs->as, sm_state + offset + 8), - ldl_phys(cs->as, sm_state + offset + 4), - (lduw_phys(cs->as, sm_state + offset + 2) & + x86_lduw_phys(cs, sm_state + offset), + x86_ldq_phys(cs, sm_state + offset + 8), + x86_ldl_phys(cs, sm_state + offset + 4), + (x86_lduw_phys(cs, sm_state + offset + 2) & 0xf0ff) << 8); } - val = ldl_phys(cs->as, sm_state + 0x7efc); /* revision ID */ + val = x86_ldl_phys(cs, sm_state + 0x7efc); /* revision ID */ if (val & 0x20000) { - env->smbase = ldl_phys(cs->as, sm_state + 0x7f00) & ~0x7fff; + env->smbase = x86_ldl_phys(cs, sm_state + 0x7f00) & ~0x7fff; } #else - cpu_x86_update_cr0(env, ldl_phys(cs->as, sm_state + 0x7ffc)); - cpu_x86_update_cr3(env, ldl_phys(cs->as, sm_state + 0x7ff8)); - cpu_load_eflags(env, ldl_phys(cs->as, sm_state + 0x7ff4), + cpu_x86_update_cr0(env, x86_ldl_phys(cs, sm_state + 0x7ffc)); + cpu_x86_update_cr3(env, x86_ldl_phys(cs, sm_state + 0x7ff8)); + cpu_load_eflags(env, x86_ldl_phys(cs, sm_state + 0x7ff4), ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK)); - env->eip = ldl_phys(cs->as, sm_state + 0x7ff0); - env->regs[R_EDI] = ldl_phys(cs->as, sm_state + 0x7fec); - env->regs[R_ESI] = ldl_phys(cs->as, sm_state + 0x7fe8); - env->regs[R_EBP] = ldl_phys(cs->as, sm_state + 0x7fe4); - env->regs[R_ESP] = ldl_phys(cs->as, sm_state + 0x7fe0); - env->regs[R_EBX] = ldl_phys(cs->as, sm_state + 0x7fdc); - env->regs[R_EDX] = ldl_phys(cs->as, sm_state + 0x7fd8); - env->regs[R_ECX] = ldl_phys(cs->as, sm_state + 0x7fd4); - env->regs[R_EAX] = ldl_phys(cs->as, sm_state + 0x7fd0); - env->dr[6] = ldl_phys(cs->as, sm_state + 0x7fcc); - env->dr[7] = ldl_phys(cs->as, sm_state + 0x7fc8); + env->eip = x86_ldl_phys(cs, sm_state + 0x7ff0); + env->regs[R_EDI] = x86_ldl_phys(cs, sm_state + 0x7fec); + env->regs[R_ESI] = x86_ldl_phys(cs, sm_state + 0x7fe8); + env->regs[R_EBP] = x86_ldl_phys(cs, sm_state + 0x7fe4); + env->regs[R_ESP] = x86_ldl_phys(cs, sm_state + 0x7fe0); + env->regs[R_EBX] = x86_ldl_phys(cs, sm_state + 0x7fdc); + env->regs[R_EDX] = x86_ldl_phys(cs, sm_state + 0x7fd8); + env->regs[R_ECX] = x86_ldl_phys(cs, sm_state + 0x7fd4); + env->regs[R_EAX] = x86_ldl_phys(cs, sm_state + 0x7fd0); + env->dr[6] = x86_ldl_phys(cs, sm_state + 0x7fcc); + env->dr[7] = x86_ldl_phys(cs, sm_state + 0x7fc8); - env->tr.selector = ldl_phys(cs->as, sm_state + 0x7fc4) & 0xffff; - env->tr.base = ldl_phys(cs->as, sm_state + 0x7f64); - env->tr.limit = ldl_phys(cs->as, sm_state + 0x7f60); - env->tr.flags = (ldl_phys(cs->as, sm_state + 0x7f5c) & 0xf0ff) << 8; + env->tr.selector = x86_ldl_phys(cs, sm_state + 0x7fc4) & 0xffff; + env->tr.base = x86_ldl_phys(cs, sm_state + 0x7f64); + env->tr.limit = x86_ldl_phys(cs, sm_state + 0x7f60); + env->tr.flags = (x86_ldl_phys(cs, sm_state + 0x7f5c) & 0xf0ff) << 8; - env->ldt.selector = ldl_phys(cs->as, sm_state + 0x7fc0) & 0xffff; - env->ldt.base = ldl_phys(cs->as, sm_state + 0x7f80); - env->ldt.limit = ldl_phys(cs->as, sm_state + 0x7f7c); - env->ldt.flags = (ldl_phys(cs->as, sm_state + 0x7f78) & 0xf0ff) << 8; + env->ldt.selector = x86_ldl_phys(cs, sm_state + 0x7fc0) & 0xffff; + env->ldt.base = x86_ldl_phys(cs, sm_state + 0x7f80); + env->ldt.limit = x86_ldl_phys(cs, sm_state + 0x7f7c); + env->ldt.flags = (x86_ldl_phys(cs, sm_state + 0x7f78) & 0xf0ff) << 8; - env->gdt.base = ldl_phys(cs->as, sm_state + 0x7f74); - env->gdt.limit = ldl_phys(cs->as, sm_state + 0x7f70); + env->gdt.base = x86_ldl_phys(cs, sm_state + 0x7f74); + env->gdt.limit = x86_ldl_phys(cs, sm_state + 0x7f70); - env->idt.base = ldl_phys(cs->as, sm_state + 0x7f58); - env->idt.limit = ldl_phys(cs->as, sm_state + 0x7f54); + env->idt.base = x86_ldl_phys(cs, sm_state + 0x7f58); + env->idt.limit = x86_ldl_phys(cs, sm_state + 0x7f54); for (i = 0; i < 6; i++) { if (i < 3) { @@ -293,18 +293,18 @@ void helper_rsm(CPUX86State *env) offset = 0x7f2c + (i - 3) * 12; } cpu_x86_load_seg_cache(env, i, - ldl_phys(cs->as, + x86_ldl_phys(cs, sm_state + 0x7fa8 + i * 4) & 0xffff, - ldl_phys(cs->as, sm_state + offset + 8), - ldl_phys(cs->as, sm_state + offset + 4), - (ldl_phys(cs->as, + x86_ldl_phys(cs, sm_state + offset + 8), + x86_ldl_phys(cs, sm_state + offset + 4), + (x86_ldl_phys(cs, sm_state + offset) & 0xf0ff) << 8); } - cpu_x86_update_cr4(env, ldl_phys(cs->as, sm_state + 0x7f14)); + cpu_x86_update_cr4(env, x86_ldl_phys(cs, sm_state + 0x7f14)); - val = ldl_phys(cs->as, sm_state + 0x7efc); /* revision ID */ + val = x86_ldl_phys(cs, sm_state + 0x7efc); /* revision ID */ if (val & 0x20000) { - env->smbase = ldl_phys(cs->as, sm_state + 0x7ef8) & ~0x7fff; + env->smbase = x86_ldl_phys(cs, sm_state + 0x7ef8) & ~0x7fff; } #endif env->hflags &= ~HF_SMM_MASK; diff --git a/target-i386/svm_helper.c b/target-i386/svm_helper.c index 429d029a3d..f1fabf54e7 100644 --- a/target-i386/svm_helper.c +++ b/target-i386/svm_helper.c @@ -87,13 +87,13 @@ static inline void svm_save_seg(CPUX86State *env, hwaddr addr, { CPUState *cs = CPU(x86_env_get_cpu(env)); - stw_phys(cs->as, addr + offsetof(struct vmcb_seg, selector), + x86_stw_phys(cs, addr + offsetof(struct vmcb_seg, selector), sc->selector); - stq_phys(cs->as, addr + offsetof(struct vmcb_seg, base), + x86_stq_phys(cs, addr + offsetof(struct vmcb_seg, base), sc->base); - stl_phys(cs->as, addr + offsetof(struct vmcb_seg, limit), + x86_stl_phys(cs, addr + offsetof(struct vmcb_seg, limit), sc->limit); - stw_phys(cs->as, addr + offsetof(struct vmcb_seg, attrib), + x86_stw_phys(cs, addr + offsetof(struct vmcb_seg, attrib), ((sc->flags >> 8) & 0xff) | ((sc->flags >> 12) & 0x0f00)); } @@ -103,11 +103,11 @@ static inline void svm_load_seg(CPUX86State *env, hwaddr addr, CPUState *cs = CPU(x86_env_get_cpu(env)); unsigned int flags; - sc->selector = lduw_phys(cs->as, + sc->selector = x86_lduw_phys(cs, addr + offsetof(struct vmcb_seg, selector)); - sc->base = ldq_phys(cs->as, addr + offsetof(struct vmcb_seg, base)); - sc->limit = ldl_phys(cs->as, addr + offsetof(struct vmcb_seg, limit)); - flags = lduw_phys(cs->as, addr + offsetof(struct vmcb_seg, attrib)); + sc->base = x86_ldq_phys(cs, addr + offsetof(struct vmcb_seg, base)); + sc->limit = x86_ldl_phys(cs, addr + offsetof(struct vmcb_seg, limit)); + flags = x86_lduw_phys(cs, addr + offsetof(struct vmcb_seg, attrib)); sc->flags = ((flags & 0xff) << 8) | ((flags & 0x0f00) << 12); } @@ -141,32 +141,32 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) env->vm_vmcb = addr; /* save the current CPU state in the hsave page */ - stq_phys(cs->as, env->vm_hsave + offsetof(struct vmcb, save.gdtr.base), + x86_stq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.gdtr.base), env->gdt.base); - stl_phys(cs->as, env->vm_hsave + offsetof(struct vmcb, save.gdtr.limit), + x86_stl_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.gdtr.limit), env->gdt.limit); - stq_phys(cs->as, env->vm_hsave + offsetof(struct vmcb, save.idtr.base), + x86_stq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.idtr.base), env->idt.base); - stl_phys(cs->as, env->vm_hsave + offsetof(struct vmcb, save.idtr.limit), + x86_stl_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.idtr.limit), env->idt.limit); - stq_phys(cs->as, + x86_stq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.cr0), env->cr[0]); - stq_phys(cs->as, + x86_stq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.cr2), env->cr[2]); - stq_phys(cs->as, + x86_stq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.cr3), env->cr[3]); - stq_phys(cs->as, + x86_stq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.cr4), env->cr[4]); - stq_phys(cs->as, + x86_stq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.dr6), env->dr[6]); - stq_phys(cs->as, + x86_stq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.dr7), env->dr[7]); - stq_phys(cs->as, + x86_stq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.efer), env->efer); - stq_phys(cs->as, + x86_stq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.rflags), cpu_compute_eflags(env)); @@ -179,30 +179,30 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) svm_save_seg(env, env->vm_hsave + offsetof(struct vmcb, save.ds), &env->segs[R_DS]); - stq_phys(cs->as, env->vm_hsave + offsetof(struct vmcb, save.rip), + x86_stq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.rip), env->eip + next_eip_addend); - stq_phys(cs->as, + x86_stq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.rsp), env->regs[R_ESP]); - stq_phys(cs->as, + x86_stq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.rax), env->regs[R_EAX]); /* load the interception bitmaps so we do not need to access the vmcb in svm mode */ - env->intercept = ldq_phys(cs->as, env->vm_vmcb + offsetof(struct vmcb, + env->intercept = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.intercept)); - env->intercept_cr_read = lduw_phys(cs->as, env->vm_vmcb + + env->intercept_cr_read = x86_lduw_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.intercept_cr_read)); - env->intercept_cr_write = lduw_phys(cs->as, env->vm_vmcb + + env->intercept_cr_write = x86_lduw_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.intercept_cr_write)); - env->intercept_dr_read = lduw_phys(cs->as, env->vm_vmcb + + env->intercept_dr_read = x86_lduw_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.intercept_dr_read)); - env->intercept_dr_write = lduw_phys(cs->as, env->vm_vmcb + + env->intercept_dr_write = x86_lduw_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.intercept_dr_write)); - env->intercept_exceptions = ldl_phys(cs->as, env->vm_vmcb + + env->intercept_exceptions = x86_ldl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.intercept_exceptions )); @@ -210,35 +210,35 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) /* enable intercepts */ env->hflags |= HF_SVMI_MASK; - env->tsc_offset = ldq_phys(cs->as, env->vm_vmcb + + env->tsc_offset = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.tsc_offset)); - env->gdt.base = ldq_phys(cs->as, env->vm_vmcb + offsetof(struct vmcb, + env->gdt.base = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.gdtr.base)); - env->gdt.limit = ldl_phys(cs->as, env->vm_vmcb + offsetof(struct vmcb, + env->gdt.limit = x86_ldl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.gdtr.limit)); - env->idt.base = ldq_phys(cs->as, env->vm_vmcb + offsetof(struct vmcb, + env->idt.base = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.idtr.base)); - env->idt.limit = ldl_phys(cs->as, env->vm_vmcb + offsetof(struct vmcb, + env->idt.limit = x86_ldl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.idtr.limit)); /* clear exit_info_2 so we behave like the real hardware */ - stq_phys(cs->as, + x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2), 0); - cpu_x86_update_cr0(env, ldq_phys(cs->as, + cpu_x86_update_cr0(env, x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.cr0))); - cpu_x86_update_cr4(env, ldq_phys(cs->as, + cpu_x86_update_cr4(env, x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.cr4))); - cpu_x86_update_cr3(env, ldq_phys(cs->as, + cpu_x86_update_cr3(env, x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.cr3))); - env->cr[2] = ldq_phys(cs->as, + env->cr[2] = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.cr2)); - int_ctl = ldl_phys(cs->as, + int_ctl = x86_ldl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.int_ctl)); env->hflags2 &= ~(HF2_HIF_MASK | HF2_VINTR_MASK); if (int_ctl & V_INTR_MASKING_MASK) { @@ -250,10 +250,10 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) } cpu_load_efer(env, - ldq_phys(cs->as, + x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.efer))); env->eflags = 0; - cpu_load_eflags(env, ldq_phys(cs->as, + cpu_load_eflags(env, x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.rflags)), ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK)); @@ -267,21 +267,21 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) svm_load_seg_cache(env, env->vm_vmcb + offsetof(struct vmcb, save.ds), R_DS); - env->eip = ldq_phys(cs->as, + env->eip = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.rip)); - env->regs[R_ESP] = ldq_phys(cs->as, + env->regs[R_ESP] = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.rsp)); - env->regs[R_EAX] = ldq_phys(cs->as, + env->regs[R_EAX] = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.rax)); - env->dr[7] = ldq_phys(cs->as, + env->dr[7] = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.dr7)); - env->dr[6] = ldq_phys(cs->as, + env->dr[6] = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.dr6)); /* FIXME: guest state consistency checks */ - switch (ldub_phys(cs->as, + switch (x86_ldub_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.tlb_ctl))) { case TLB_CONTROL_DO_NOTHING: break; @@ -300,12 +300,12 @@ void helper_vmrun(CPUX86State *env, int aflag, int next_eip_addend) } /* maybe we need to inject an event */ - event_inj = ldl_phys(cs->as, env->vm_vmcb + offsetof(struct vmcb, + event_inj = x86_ldl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.event_inj)); if (event_inj & SVM_EVTINJ_VALID) { uint8_t vector = event_inj & SVM_EVTINJ_VEC_MASK; uint16_t valid_err = event_inj & SVM_EVTINJ_VALID_ERR; - uint32_t event_inj_err = ldl_phys(cs->as, env->vm_vmcb + + uint32_t event_inj_err = x86_ldl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err)); @@ -372,7 +372,7 @@ void helper_vmload(CPUX86State *env, int aflag) qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmload! " TARGET_FMT_lx "\nFS: %016" PRIx64 " | " TARGET_FMT_lx "\n", - addr, ldq_phys(cs->as, addr + offsetof(struct vmcb, + addr, x86_ldq_phys(cs, addr + offsetof(struct vmcb, save.fs.base)), env->segs[R_FS].base); @@ -382,18 +382,18 @@ void helper_vmload(CPUX86State *env, int aflag) svm_load_seg(env, addr + offsetof(struct vmcb, save.ldtr), &env->ldt); #ifdef TARGET_X86_64 - env->kernelgsbase = ldq_phys(cs->as, addr + offsetof(struct vmcb, + env->kernelgsbase = x86_ldq_phys(cs, addr + offsetof(struct vmcb, save.kernel_gs_base)); - env->lstar = ldq_phys(cs->as, addr + offsetof(struct vmcb, save.lstar)); - env->cstar = ldq_phys(cs->as, addr + offsetof(struct vmcb, save.cstar)); - env->fmask = ldq_phys(cs->as, addr + offsetof(struct vmcb, save.sfmask)); + env->lstar = x86_ldq_phys(cs, addr + offsetof(struct vmcb, save.lstar)); + env->cstar = x86_ldq_phys(cs, addr + offsetof(struct vmcb, save.cstar)); + env->fmask = x86_ldq_phys(cs, addr + offsetof(struct vmcb, save.sfmask)); #endif - env->star = ldq_phys(cs->as, addr + offsetof(struct vmcb, save.star)); - env->sysenter_cs = ldq_phys(cs->as, + env->star = x86_ldq_phys(cs, addr + offsetof(struct vmcb, save.star)); + env->sysenter_cs = x86_ldq_phys(cs, addr + offsetof(struct vmcb, save.sysenter_cs)); - env->sysenter_esp = ldq_phys(cs->as, addr + offsetof(struct vmcb, + env->sysenter_esp = x86_ldq_phys(cs, addr + offsetof(struct vmcb, save.sysenter_esp)); - env->sysenter_eip = ldq_phys(cs->as, addr + offsetof(struct vmcb, + env->sysenter_eip = x86_ldq_phys(cs, addr + offsetof(struct vmcb, save.sysenter_eip)); } @@ -412,7 +412,7 @@ void helper_vmsave(CPUX86State *env, int aflag) qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmsave! " TARGET_FMT_lx "\nFS: %016" PRIx64 " | " TARGET_FMT_lx "\n", - addr, ldq_phys(cs->as, + addr, x86_ldq_phys(cs, addr + offsetof(struct vmcb, save.fs.base)), env->segs[R_FS].base); @@ -426,18 +426,18 @@ void helper_vmsave(CPUX86State *env, int aflag) &env->ldt); #ifdef TARGET_X86_64 - stq_phys(cs->as, addr + offsetof(struct vmcb, save.kernel_gs_base), + x86_stq_phys(cs, addr + offsetof(struct vmcb, save.kernel_gs_base), env->kernelgsbase); - stq_phys(cs->as, addr + offsetof(struct vmcb, save.lstar), env->lstar); - stq_phys(cs->as, addr + offsetof(struct vmcb, save.cstar), env->cstar); - stq_phys(cs->as, addr + offsetof(struct vmcb, save.sfmask), env->fmask); + x86_stq_phys(cs, addr + offsetof(struct vmcb, save.lstar), env->lstar); + x86_stq_phys(cs, addr + offsetof(struct vmcb, save.cstar), env->cstar); + x86_stq_phys(cs, addr + offsetof(struct vmcb, save.sfmask), env->fmask); #endif - stq_phys(cs->as, addr + offsetof(struct vmcb, save.star), env->star); - stq_phys(cs->as, + x86_stq_phys(cs, addr + offsetof(struct vmcb, save.star), env->star); + x86_stq_phys(cs, addr + offsetof(struct vmcb, save.sysenter_cs), env->sysenter_cs); - stq_phys(cs->as, addr + offsetof(struct vmcb, save.sysenter_esp), + x86_stq_phys(cs, addr + offsetof(struct vmcb, save.sysenter_esp), env->sysenter_esp); - stq_phys(cs->as, addr + offsetof(struct vmcb, save.sysenter_eip), + x86_stq_phys(cs, addr + offsetof(struct vmcb, save.sysenter_eip), env->sysenter_eip); } @@ -515,7 +515,7 @@ void helper_svm_check_intercept_param(CPUX86State *env, uint32_t type, case SVM_EXIT_MSR: if (env->intercept & (1ULL << (SVM_EXIT_MSR - SVM_EXIT_INTR))) { /* FIXME: this should be read in at vmrun (faster this way?) */ - uint64_t addr = ldq_phys(cs->as, env->vm_vmcb + + uint64_t addr = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.msrpm_base_pa)); uint32_t t0, t1; @@ -541,7 +541,7 @@ void helper_svm_check_intercept_param(CPUX86State *env, uint32_t type, t1 = 0; break; } - if (ldub_phys(cs->as, addr + t1) & ((1 << param) << t0)) { + if (x86_ldub_phys(cs, addr + t1) & ((1 << param) << t0)) { helper_vmexit(env, type, param); } } @@ -567,13 +567,13 @@ void helper_svm_check_io(CPUX86State *env, uint32_t port, uint32_t param, if (env->intercept & (1ULL << (SVM_EXIT_IOIO - SVM_EXIT_INTR))) { /* FIXME: this should be read in at vmrun (faster this way?) */ - uint64_t addr = ldq_phys(cs->as, env->vm_vmcb + + uint64_t addr = x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.iopm_base_pa)); uint16_t mask = (1 << ((param >> 4) & 7)) - 1; - if (lduw_phys(cs->as, addr + port / 8) & (mask << (port & 7))) { + if (x86_lduw_phys(cs, addr + port / 8) & (mask << (port & 7))) { /* next env->eip */ - stq_phys(cs->as, + x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2), env->eip + next_eip_addend); helper_vmexit(env, SVM_EXIT_IOIO, param | (port << 16)); @@ -590,17 +590,17 @@ void helper_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1) qemu_log_mask(CPU_LOG_TB_IN_ASM, "vmexit(%08x, %016" PRIx64 ", %016" PRIx64 ", " TARGET_FMT_lx ")!\n", exit_code, exit_info_1, - ldq_phys(cs->as, env->vm_vmcb + offsetof(struct vmcb, + x86_ldq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.exit_info_2)), env->eip); if (env->hflags & HF_INHIBIT_IRQ_MASK) { - stl_phys(cs->as, + x86_stl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.int_state), SVM_INTERRUPT_SHADOW_MASK); env->hflags &= ~HF_INHIBIT_IRQ_MASK; } else { - stl_phys(cs->as, + x86_stl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.int_state), 0); } @@ -614,50 +614,50 @@ void helper_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1) svm_save_seg(env, env->vm_vmcb + offsetof(struct vmcb, save.ds), &env->segs[R_DS]); - stq_phys(cs->as, env->vm_vmcb + offsetof(struct vmcb, save.gdtr.base), + x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.gdtr.base), env->gdt.base); - stl_phys(cs->as, env->vm_vmcb + offsetof(struct vmcb, save.gdtr.limit), + x86_stl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.gdtr.limit), env->gdt.limit); - stq_phys(cs->as, env->vm_vmcb + offsetof(struct vmcb, save.idtr.base), + x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.idtr.base), env->idt.base); - stl_phys(cs->as, env->vm_vmcb + offsetof(struct vmcb, save.idtr.limit), + x86_stl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.idtr.limit), env->idt.limit); - stq_phys(cs->as, + x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.efer), env->efer); - stq_phys(cs->as, + x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.cr0), env->cr[0]); - stq_phys(cs->as, + x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.cr2), env->cr[2]); - stq_phys(cs->as, + x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.cr3), env->cr[3]); - stq_phys(cs->as, + x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.cr4), env->cr[4]); - int_ctl = ldl_phys(cs->as, + int_ctl = x86_ldl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.int_ctl)); int_ctl &= ~(V_TPR_MASK | V_IRQ_MASK); int_ctl |= env->v_tpr & V_TPR_MASK; if (cs->interrupt_request & CPU_INTERRUPT_VIRQ) { int_ctl |= V_IRQ_MASK; } - stl_phys(cs->as, + x86_stl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.int_ctl), int_ctl); - stq_phys(cs->as, env->vm_vmcb + offsetof(struct vmcb, save.rflags), + x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.rflags), cpu_compute_eflags(env)); - stq_phys(cs->as, env->vm_vmcb + offsetof(struct vmcb, save.rip), + x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.rip), env->eip); - stq_phys(cs->as, + x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.rsp), env->regs[R_ESP]); - stq_phys(cs->as, + x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.rax), env->regs[R_EAX]); - stq_phys(cs->as, + x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.dr7), env->dr[7]); - stq_phys(cs->as, + x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.dr6), env->dr[6]); - stb_phys(cs->as, env->vm_vmcb + offsetof(struct vmcb, save.cpl), + x86_stb_phys(cs, env->vm_vmcb + offsetof(struct vmcb, save.cpl), env->hflags & HF_CPL_MASK); /* Reload the host state from vm_hsave */ @@ -668,32 +668,32 @@ void helper_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1) cs->interrupt_request &= ~CPU_INTERRUPT_VIRQ; env->tsc_offset = 0; - env->gdt.base = ldq_phys(cs->as, env->vm_hsave + offsetof(struct vmcb, + env->gdt.base = x86_ldq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.gdtr.base)); - env->gdt.limit = ldl_phys(cs->as, env->vm_hsave + offsetof(struct vmcb, + env->gdt.limit = x86_ldl_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.gdtr.limit)); - env->idt.base = ldq_phys(cs->as, env->vm_hsave + offsetof(struct vmcb, + env->idt.base = x86_ldq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.idtr.base)); - env->idt.limit = ldl_phys(cs->as, env->vm_hsave + offsetof(struct vmcb, + env->idt.limit = x86_ldl_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.idtr.limit)); - cpu_x86_update_cr0(env, ldq_phys(cs->as, + cpu_x86_update_cr0(env, x86_ldq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.cr0)) | CR0_PE_MASK); - cpu_x86_update_cr4(env, ldq_phys(cs->as, + cpu_x86_update_cr4(env, x86_ldq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.cr4))); - cpu_x86_update_cr3(env, ldq_phys(cs->as, + cpu_x86_update_cr3(env, x86_ldq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.cr3))); /* we need to set the efer after the crs so the hidden flags get set properly */ - cpu_load_efer(env, ldq_phys(cs->as, env->vm_hsave + offsetof(struct vmcb, + cpu_load_efer(env, x86_ldq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.efer))); env->eflags = 0; - cpu_load_eflags(env, ldq_phys(cs->as, + cpu_load_eflags(env, x86_ldq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.rflags)), ~(CC_O | CC_S | CC_Z | CC_A | CC_P | CC_C | DF_MASK | @@ -708,33 +708,33 @@ void helper_vmexit(CPUX86State *env, uint32_t exit_code, uint64_t exit_info_1) svm_load_seg_cache(env, env->vm_hsave + offsetof(struct vmcb, save.ds), R_DS); - env->eip = ldq_phys(cs->as, + env->eip = x86_ldq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.rip)); - env->regs[R_ESP] = ldq_phys(cs->as, env->vm_hsave + + env->regs[R_ESP] = x86_ldq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.rsp)); - env->regs[R_EAX] = ldq_phys(cs->as, env->vm_hsave + + env->regs[R_EAX] = x86_ldq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.rax)); - env->dr[6] = ldq_phys(cs->as, + env->dr[6] = x86_ldq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.dr6)); - env->dr[7] = ldq_phys(cs->as, + env->dr[7] = x86_ldq_phys(cs, env->vm_hsave + offsetof(struct vmcb, save.dr7)); /* other setups */ - stq_phys(cs->as, env->vm_vmcb + offsetof(struct vmcb, control.exit_code), + x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.exit_code), exit_code); - stq_phys(cs->as, env->vm_vmcb + offsetof(struct vmcb, control.exit_info_1), + x86_stq_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.exit_info_1), exit_info_1); - stl_phys(cs->as, + x86_stl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.exit_int_info), - ldl_phys(cs->as, env->vm_vmcb + offsetof(struct vmcb, + x86_ldl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.event_inj))); - stl_phys(cs->as, + x86_stl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.exit_int_info_err), - ldl_phys(cs->as, env->vm_vmcb + offsetof(struct vmcb, + x86_ldl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.event_inj_err))); - stl_phys(cs->as, + x86_stl_phys(cs, env->vm_vmcb + offsetof(struct vmcb, control.event_inj), 0); env->hflags2 &= ~HF2_GIF_MASK; From 3f7d84648607cc0fcb3812bb4b88978e2a7aa24f Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 8 Apr 2015 14:45:53 +0200 Subject: [PATCH 40/62] target-i386: Use correct memory attributes for ioport accesses In order to do this, stop using the cpu_in*/out* helpers, and instead access address_space_io directly. cpu_in* and cpu_out* remain for usage in the monitor, in qtest, and in Xen. Signed-off-by: Paolo Bonzini --- target-i386/Makefile.objs | 2 -- target-i386/helper.h | 12 ++++---- target-i386/ioport-user.c | 60 --------------------------------------- target-i386/misc_helper.c | 59 +++++++++++++++++++++++++++++--------- target-i386/translate.c | 12 ++++---- 5 files changed, 58 insertions(+), 87 deletions(-) delete mode 100644 target-i386/ioport-user.c diff --git a/target-i386/Makefile.objs b/target-i386/Makefile.objs index 027b94e1d3..7a1df2c983 100644 --- a/target-i386/Makefile.objs +++ b/target-i386/Makefile.objs @@ -5,5 +5,3 @@ obj-y += gdbstub.o obj-$(CONFIG_SOFTMMU) += machine.o arch_memory_mapping.o arch_dump.o obj-$(CONFIG_KVM) += kvm.o obj-$(call lnot,$(CONFIG_KVM)) += kvm-stub.o -obj-$(CONFIG_LINUX_USER) += ioport-user.o -obj-$(CONFIG_BSD_USER) += ioport-user.o diff --git a/target-i386/helper.h b/target-i386/helper.h index 8eb0145039..74308f442e 100644 --- a/target-i386/helper.h +++ b/target-i386/helper.h @@ -86,12 +86,12 @@ DEF_HELPER_1(wrmsr, void, env) DEF_HELPER_2(check_iob, void, env, i32) DEF_HELPER_2(check_iow, void, env, i32) DEF_HELPER_2(check_iol, void, env, i32) -DEF_HELPER_2(outb, void, i32, i32) -DEF_HELPER_1(inb, tl, i32) -DEF_HELPER_2(outw, void, i32, i32) -DEF_HELPER_1(inw, tl, i32) -DEF_HELPER_2(outl, void, i32, i32) -DEF_HELPER_1(inl, tl, i32) +DEF_HELPER_3(outb, void, env, i32, i32) +DEF_HELPER_2(inb, tl, env, i32) +DEF_HELPER_3(outw, void, env, i32, i32) +DEF_HELPER_2(inw, tl, env, i32) +DEF_HELPER_3(outl, void, env, i32, i32) +DEF_HELPER_2(inl, tl, env, i32) DEF_HELPER_3(svm_check_intercept_param, void, env, i32, i64) DEF_HELPER_3(vmexit, void, env, i32, i64) diff --git a/target-i386/ioport-user.c b/target-i386/ioport-user.c deleted file mode 100644 index f7636e0a87..0000000000 --- a/target-i386/ioport-user.c +++ /dev/null @@ -1,60 +0,0 @@ -/* - * qemu user ioport functions - * - * Copyright (c) 2003-2008 Fabrice Bellard - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ - -#include - -#include "qemu.h" -#include "qemu-common.h" -#include "exec/ioport.h" - -void cpu_outb(pio_addr_t addr, uint8_t val) -{ - fprintf(stderr, "outb: port=0x%04"FMT_pioaddr", data=%02"PRIx8"\n", - addr, val); -} - -void cpu_outw(pio_addr_t addr, uint16_t val) -{ - fprintf(stderr, "outw: port=0x%04"FMT_pioaddr", data=%04"PRIx16"\n", - addr, val); -} - -void cpu_outl(pio_addr_t addr, uint32_t val) -{ - fprintf(stderr, "outl: port=0x%04"FMT_pioaddr", data=%08"PRIx32"\n", - addr, val); -} - -uint8_t cpu_inb(pio_addr_t addr) -{ - fprintf(stderr, "inb: port=0x%04"FMT_pioaddr"\n", addr); - return 0; -} - -uint16_t cpu_inw(pio_addr_t addr) -{ - fprintf(stderr, "inw: port=0x%04"FMT_pioaddr"\n", addr); - return 0; -} - -uint32_t cpu_inl(pio_addr_t addr) -{ - fprintf(stderr, "inl: port=0x%04"FMT_pioaddr"\n", addr); - return 0; -} diff --git a/target-i386/misc_helper.c b/target-i386/misc_helper.c index 4aaf1e4d95..52c5d65e91 100644 --- a/target-i386/misc_helper.c +++ b/target-i386/misc_helper.c @@ -18,38 +18,71 @@ */ #include "cpu.h" -#include "exec/ioport.h" #include "exec/helper-proto.h" #include "exec/cpu_ldst.h" +#include "exec/address-spaces.h" -void helper_outb(uint32_t port, uint32_t data) +void helper_outb(CPUX86State *env, uint32_t port, uint32_t data) { - cpu_outb(port, data & 0xff); +#ifdef CONFIG_USER_ONLY + fprintf(stderr, "outb: port=0x%04x, data=%02x\n", port, data); +#else + address_space_stb(&address_space_io, port, data, + cpu_get_mem_attrs(env), NULL); +#endif } -target_ulong helper_inb(uint32_t port) +target_ulong helper_inb(CPUX86State *env, uint32_t port) { - return cpu_inb(port); +#ifdef CONFIG_USER_ONLY + fprintf(stderr, "inb: port=0x%04x\n", port); + return 0; +#else + return address_space_ldub(&address_space_io, port, + cpu_get_mem_attrs(env), NULL); +#endif } -void helper_outw(uint32_t port, uint32_t data) +void helper_outw(CPUX86State *env, uint32_t port, uint32_t data) { - cpu_outw(port, data & 0xffff); +#ifdef CONFIG_USER_ONLY + fprintf(stderr, "outw: port=0x%04x, data=%04x\n", port, data); +#else + address_space_stw(&address_space_io, port, data, + cpu_get_mem_attrs(env), NULL); +#endif } -target_ulong helper_inw(uint32_t port) +target_ulong helper_inw(CPUX86State *env, uint32_t port) { - return cpu_inw(port); +#ifdef CONFIG_USER_ONLY + fprintf(stderr, "inw: port=0x%04x\n", port); + return 0; +#else + return address_space_lduw(&address_space_io, port, + cpu_get_mem_attrs(env), NULL); +#endif } -void helper_outl(uint32_t port, uint32_t data) +void helper_outl(CPUX86State *env, uint32_t port, uint32_t data) { - cpu_outl(port, data); +#ifdef CONFIG_USER_ONLY + fprintf(stderr, "outw: port=0x%04x, data=%08x\n", port, data); +#else + address_space_stl(&address_space_io, port, data, + cpu_get_mem_attrs(env), NULL); +#endif } -target_ulong helper_inl(uint32_t port) +target_ulong helper_inl(CPUX86State *env, uint32_t port) { - return cpu_inl(port); +#ifdef CONFIG_USER_ONLY + fprintf(stderr, "inl: port=0x%04x\n", port); + return 0; +#else + return address_space_ldl(&address_space_io, port, + cpu_get_mem_attrs(env), NULL); +#endif } void helper_into(CPUX86State *env, int next_eip_addend) diff --git a/target-i386/translate.c b/target-i386/translate.c index 305ce5077c..58b1959154 100644 --- a/target-i386/translate.c +++ b/target-i386/translate.c @@ -631,13 +631,13 @@ static void gen_helper_in_func(TCGMemOp ot, TCGv v, TCGv_i32 n) { switch (ot) { case MO_8: - gen_helper_inb(v, n); + gen_helper_inb(v, cpu_env, n); break; case MO_16: - gen_helper_inw(v, n); + gen_helper_inw(v, cpu_env, n); break; case MO_32: - gen_helper_inl(v, n); + gen_helper_inl(v, cpu_env, n); break; default: tcg_abort(); @@ -648,13 +648,13 @@ static void gen_helper_out_func(TCGMemOp ot, TCGv_i32 v, TCGv_i32 n) { switch (ot) { case MO_8: - gen_helper_outb(v, n); + gen_helper_outb(cpu_env, v, n); break; case MO_16: - gen_helper_outw(v, n); + gen_helper_outw(cpu_env, v, n); break; case MO_32: - gen_helper_outl(v, n); + gen_helper_outl(cpu_env, v, n); break; default: tcg_abort(); From 9982f74bad70479939491b69522da047a3be5a0d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 22 Apr 2015 11:40:41 +0200 Subject: [PATCH 41/62] target-i386: mask NMIs on entry to SMM QEMU is not blocking NMIs on entry to SMM. Implementing this has to cover a few corner cases, because: - NMIs can then be enabled by an IRET instruction and there is no mechanism to _set_ the "NMIs masked" flag on exit from SMM: "A special case can occur if an SMI handler nests inside an NMI handler and then another NMI occurs. [...] When the processor enters SMM while executing an NMI handler, the processor saves the SMRAM state save map but does not save the attribute to keep NMI interrupts disabled. - However, there is some hidden state, because "If NMIs were blocked before the SMI occurred [and no IRET is executed while in SMM], they are blocked after execution of RSM." This is represented by the new HF2_SMM_INSIDE_NMI_MASK bit. If it is zero, NMIs are _unblocked_ on exit from RSM. Signed-off-by: Paolo Bonzini --- target-i386/cpu.h | 18 ++++++++++-------- target-i386/smm_helper.c | 9 +++++++++ 2 files changed, 19 insertions(+), 8 deletions(-) diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 7a06495834..9dae9ab854 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -180,15 +180,17 @@ /* hflags2 */ -#define HF2_GIF_SHIFT 0 /* if set CPU takes interrupts */ -#define HF2_HIF_SHIFT 1 /* value of IF_MASK when entering SVM */ -#define HF2_NMI_SHIFT 2 /* CPU serving NMI */ -#define HF2_VINTR_SHIFT 3 /* value of V_INTR_MASKING bit */ +#define HF2_GIF_SHIFT 0 /* if set CPU takes interrupts */ +#define HF2_HIF_SHIFT 1 /* value of IF_MASK when entering SVM */ +#define HF2_NMI_SHIFT 2 /* CPU serving NMI */ +#define HF2_VINTR_SHIFT 3 /* value of V_INTR_MASKING bit */ +#define HF2_SMM_INSIDE_NMI_SHIFT 4 /* CPU serving SMI nested inside NMI */ -#define HF2_GIF_MASK (1 << HF2_GIF_SHIFT) -#define HF2_HIF_MASK (1 << HF2_HIF_SHIFT) -#define HF2_NMI_MASK (1 << HF2_NMI_SHIFT) -#define HF2_VINTR_MASK (1 << HF2_VINTR_SHIFT) +#define HF2_GIF_MASK (1 << HF2_GIF_SHIFT) +#define HF2_HIF_MASK (1 << HF2_HIF_SHIFT) +#define HF2_NMI_MASK (1 << HF2_NMI_SHIFT) +#define HF2_VINTR_MASK (1 << HF2_VINTR_SHIFT) +#define HF2_SMM_INSIDE_NMI_MASK (1 << HF2_SMM_INSIDE_NMI_SHIFT) #define CR0_PE_SHIFT 0 #define CR0_MP_SHIFT 1 diff --git a/target-i386/smm_helper.c b/target-i386/smm_helper.c index b9971b6e19..6207c3a143 100644 --- a/target-i386/smm_helper.c +++ b/target-i386/smm_helper.c @@ -52,6 +52,11 @@ void do_smm_enter(X86CPU *cpu) log_cpu_state_mask(CPU_LOG_INT, CPU(cpu), CPU_DUMP_CCOP); env->hflags |= HF_SMM_MASK; + if (env->hflags2 & HF2_NMI_MASK) { + env->hflags2 |= HF2_SMM_INSIDE_NMI_MASK; + } else { + env->hflags2 |= HF2_NMI_MASK; + } cpu_smm_update(env); sm_state = env->smbase + 0x8000; @@ -307,6 +312,10 @@ void helper_rsm(CPUX86State *env) env->smbase = x86_ldl_phys(cs, sm_state + 0x7ef8) & ~0x7fff; } #endif + if ((env->hflags2 & HF2_SMM_INSIDE_NMI_MASK) == 0) { + env->hflags2 &= ~HF2_NMI_MASK; + } + env->hflags2 &= ~HF2_SMM_INSIDE_NMI_MASK; env->hflags &= ~HF_SMM_MASK; cpu_smm_update(env); From b4854f1384176d897747de236f426d020668fa3c Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 30 Apr 2015 12:02:46 +0200 Subject: [PATCH 42/62] target-i386: set G=1 in SMM big real mode selectors Because the limit field's bits 31:20 is 1, G should be 1. VMX actually enforces this, let's do it for completeness in QEMU as well. Signed-off-by: Paolo Bonzini --- target-i386/smm_helper.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/target-i386/smm_helper.c b/target-i386/smm_helper.c index 6207c3a143..5617a14854 100644 --- a/target-i386/smm_helper.c +++ b/target-i386/smm_helper.c @@ -177,22 +177,22 @@ void do_smm_enter(X86CPU *cpu) cpu_x86_load_seg_cache(env, R_CS, (env->smbase >> 4) & 0xffff, env->smbase, 0xffffffff, DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | - DESC_A_MASK); + DESC_G_MASK | DESC_A_MASK); cpu_x86_load_seg_cache(env, R_DS, 0, 0, 0xffffffff, DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | - DESC_A_MASK); + DESC_G_MASK | DESC_A_MASK); cpu_x86_load_seg_cache(env, R_ES, 0, 0, 0xffffffff, DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | - DESC_A_MASK); + DESC_G_MASK | DESC_A_MASK); cpu_x86_load_seg_cache(env, R_SS, 0, 0, 0xffffffff, DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | - DESC_A_MASK); + DESC_G_MASK | DESC_A_MASK); cpu_x86_load_seg_cache(env, R_FS, 0, 0, 0xffffffff, DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | - DESC_A_MASK); + DESC_G_MASK | DESC_A_MASK); cpu_x86_load_seg_cache(env, R_GS, 0, 0, 0xffffffff, DESC_P_MASK | DESC_S_MASK | DESC_W_MASK | - DESC_A_MASK); + DESC_G_MASK | DESC_A_MASK); } void helper_rsm(CPUX86State *env) From a9bad65d2c1f61af74ce2ff43238d4b20bf81c3a Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 19 May 2015 13:46:47 +0200 Subject: [PATCH 43/62] target-i386: wake up processors that receive an SMI An SMI should definitely wake up a processor in halted state! This lets OVMF boot with SMM on multiprocessor systems, although it halts very soon after that with a "CpuIndex != BspIndex" assertion failure. Signed-off-by: Paolo Bonzini --- target-i386/cpu.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 99ad551bee..0faca03595 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -3063,7 +3063,9 @@ static bool x86_cpu_has_work(CPUState *cs) (cs->interrupt_request & (CPU_INTERRUPT_NMI | CPU_INTERRUPT_INIT | CPU_INTERRUPT_SIPI | - CPU_INTERRUPT_MCE)); + CPU_INTERRUPT_MCE)) || + ((cs->interrupt_request & CPU_INTERRUPT_SMI) && + !(env->hflags & HF_SMM_MASK)); } static Property x86_cpu_properties[] = { From e98094221ec336fcfd0c72c66f280f1cabb16c72 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 8 Apr 2015 13:53:29 +0200 Subject: [PATCH 44/62] pflash_cfi01: change big-endian property to BIT type Make this consistent with the secure property, added in the next patch. Signed-off-by: Paolo Bonzini --- hw/arm/vexpress.c | 2 +- hw/arm/virt.c | 2 +- hw/block/pflash_cfi01.c | 11 +++++++---- 3 files changed, 9 insertions(+), 6 deletions(-) diff --git a/hw/arm/vexpress.c b/hw/arm/vexpress.c index 8f1a5ea992..da217884e6 100644 --- a/hw/arm/vexpress.c +++ b/hw/arm/vexpress.c @@ -525,7 +525,7 @@ static pflash_t *ve_pflash_cfi01_register(hwaddr base, const char *name, qdev_prop_set_uint64(dev, "sector-length", VEXPRESS_FLASH_SECT_SIZE); qdev_prop_set_uint8(dev, "width", 4); qdev_prop_set_uint8(dev, "device-width", 2); - qdev_prop_set_uint8(dev, "big-endian", 0); + qdev_prop_set_bit(dev, "big-endian", false); qdev_prop_set_uint16(dev, "id0", 0x89); qdev_prop_set_uint16(dev, "id1", 0x18); qdev_prop_set_uint16(dev, "id2", 0x00); diff --git a/hw/arm/virt.c b/hw/arm/virt.c index 0a75cc83ee..1b1cc716ad 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -555,7 +555,7 @@ static void create_one_flash(const char *name, hwaddr flashbase, qdev_prop_set_uint64(dev, "sector-length", sectorlength); qdev_prop_set_uint8(dev, "width", 4); qdev_prop_set_uint8(dev, "device-width", 2); - qdev_prop_set_uint8(dev, "big-endian", 0); + qdev_prop_set_bit(dev, "big-endian", false); qdev_prop_set_uint16(dev, "id0", 0x89); qdev_prop_set_uint16(dev, "id1", 0x18); qdev_prop_set_uint16(dev, "id2", 0x00); diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c index d282695086..bf06b0b1ee 100644 --- a/hw/block/pflash_cfi01.c +++ b/hw/block/pflash_cfi01.c @@ -64,6 +64,8 @@ do { \ #define TYPE_CFI_PFLASH01 "cfi.pflash01" #define CFI_PFLASH01(obj) OBJECT_CHECK(pflash_t, (obj), TYPE_CFI_PFLASH01) +#define PFLASH_BE 0 + struct pflash_t { /*< private >*/ SysBusDevice parent_obj; @@ -75,7 +77,7 @@ struct pflash_t { uint8_t bank_width; uint8_t device_width; /* If 0, device width not specified. */ uint8_t max_device_width; /* max device width in bytes */ - uint8_t be; + uint32_t features; uint8_t wcycle; /* if 0, the flash is read normally */ int ro; uint8_t cmd; @@ -773,7 +775,8 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp) memory_region_init_rom_device( &pfl->mem, OBJECT(dev), - pfl->be ? &pflash_cfi01_ops_be : &pflash_cfi01_ops_le, pfl, + pfl->features & (1 << PFLASH_BE) ? &pflash_cfi01_ops_be : &pflash_cfi01_ops_le, + pfl, pfl->name, total_len, &local_err); if (local_err) { error_propagate(errp, local_err); @@ -925,7 +928,7 @@ static Property pflash_cfi01_properties[] = { DEFINE_PROP_UINT8("width", struct pflash_t, bank_width, 0), DEFINE_PROP_UINT8("device-width", struct pflash_t, device_width, 0), DEFINE_PROP_UINT8("max-device-width", struct pflash_t, max_device_width, 0), - DEFINE_PROP_UINT8("big-endian", struct pflash_t, be, 0), + DEFINE_PROP_BIT("big-endian", struct pflash_t, features, PFLASH_BE, 0), DEFINE_PROP_UINT16("id0", struct pflash_t, ident0, 0), DEFINE_PROP_UINT16("id1", struct pflash_t, ident1, 0), DEFINE_PROP_UINT16("id2", struct pflash_t, ident2, 0), @@ -975,7 +978,7 @@ pflash_t *pflash_cfi01_register(hwaddr base, qdev_prop_set_uint32(dev, "num-blocks", nb_blocs); qdev_prop_set_uint64(dev, "sector-length", sector_len); qdev_prop_set_uint8(dev, "width", bank_width); - qdev_prop_set_uint8(dev, "big-endian", !!be); + qdev_prop_set_bit(dev, "big-endian", !!be); qdev_prop_set_uint16(dev, "id0", id0); qdev_prop_set_uint16(dev, "id1", id1); qdev_prop_set_uint16(dev, "id2", id2); From 5aa113f0a2c245b0a77865e1dd2445bdd24c3ef8 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 8 Apr 2015 14:00:53 +0200 Subject: [PATCH 45/62] pflash_cfi01: change to new-style MMIO accessors This is a required step to implement read_with_attrs and write_with_attrs. Signed-off-by: Paolo Bonzini --- hw/block/pflash_cfi01.c | 96 +++++------------------------------------ 1 file changed, 10 insertions(+), 86 deletions(-) diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c index bf06b0b1ee..f99951ad8b 100644 --- a/hw/block/pflash_cfi01.c +++ b/hw/block/pflash_cfi01.c @@ -650,101 +650,25 @@ static void pflash_write(pflash_t *pfl, hwaddr offset, } -static uint32_t pflash_readb_be(void *opaque, hwaddr addr) -{ - return pflash_read(opaque, addr, 1, 1); -} - -static uint32_t pflash_readb_le(void *opaque, hwaddr addr) -{ - return pflash_read(opaque, addr, 1, 0); -} - -static uint32_t pflash_readw_be(void *opaque, hwaddr addr) +static uint64_t pflash_mem_read(void *opaque, hwaddr addr, unsigned len) { pflash_t *pfl = opaque; + bool be = !!(pfl->features & (1 << PFLASH_BE)); - return pflash_read(pfl, addr, 2, 1); + return pflash_read(pfl, addr, len, be); } -static uint32_t pflash_readw_le(void *opaque, hwaddr addr) +static void pflash_mem_write(void *opaque, hwaddr addr, uint64_t value, unsigned len) { pflash_t *pfl = opaque; + bool be = !!(pfl->features & (1 << PFLASH_BE)); - return pflash_read(pfl, addr, 2, 0); + pflash_write(pfl, addr, value, len, be); } -static uint32_t pflash_readl_be(void *opaque, hwaddr addr) -{ - pflash_t *pfl = opaque; - - return pflash_read(pfl, addr, 4, 1); -} - -static uint32_t pflash_readl_le(void *opaque, hwaddr addr) -{ - pflash_t *pfl = opaque; - - return pflash_read(pfl, addr, 4, 0); -} - -static void pflash_writeb_be(void *opaque, hwaddr addr, - uint32_t value) -{ - pflash_write(opaque, addr, value, 1, 1); -} - -static void pflash_writeb_le(void *opaque, hwaddr addr, - uint32_t value) -{ - pflash_write(opaque, addr, value, 1, 0); -} - -static void pflash_writew_be(void *opaque, hwaddr addr, - uint32_t value) -{ - pflash_t *pfl = opaque; - - pflash_write(pfl, addr, value, 2, 1); -} - -static void pflash_writew_le(void *opaque, hwaddr addr, - uint32_t value) -{ - pflash_t *pfl = opaque; - - pflash_write(pfl, addr, value, 2, 0); -} - -static void pflash_writel_be(void *opaque, hwaddr addr, - uint32_t value) -{ - pflash_t *pfl = opaque; - - pflash_write(pfl, addr, value, 4, 1); -} - -static void pflash_writel_le(void *opaque, hwaddr addr, - uint32_t value) -{ - pflash_t *pfl = opaque; - - pflash_write(pfl, addr, value, 4, 0); -} - -static const MemoryRegionOps pflash_cfi01_ops_be = { - .old_mmio = { - .read = { pflash_readb_be, pflash_readw_be, pflash_readl_be, }, - .write = { pflash_writeb_be, pflash_writew_be, pflash_writel_be, }, - }, - .endianness = DEVICE_NATIVE_ENDIAN, -}; - -static const MemoryRegionOps pflash_cfi01_ops_le = { - .old_mmio = { - .read = { pflash_readb_le, pflash_readw_le, pflash_readl_le, }, - .write = { pflash_writeb_le, pflash_writew_le, pflash_writel_le, }, - }, +static const MemoryRegionOps pflash_cfi01_ops = { + .read = pflash_mem_read, + .write = pflash_mem_write, .endianness = DEVICE_NATIVE_ENDIAN, }; @@ -775,7 +699,7 @@ static void pflash_cfi01_realize(DeviceState *dev, Error **errp) memory_region_init_rom_device( &pfl->mem, OBJECT(dev), - pfl->features & (1 << PFLASH_BE) ? &pflash_cfi01_ops_be : &pflash_cfi01_ops_le, + &pflash_cfi01_ops, pfl, pfl->name, total_len, &local_err); if (local_err) { From f71e42a5c98722d6faa5be84a34fbad90d27dc04 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 8 Apr 2015 14:09:43 +0200 Subject: [PATCH 46/62] pflash_cfi01: add secure property When this property is set, MMIO accesses are only allowed with the MEMTXATTRS_SECURE attribute. This is used for secure access to UEFI variables stored in flash. Signed-off-by: Paolo Bonzini --- hw/block/pflash_cfi01.c | 111 ++++++++++++++++++++++++---------------- 1 file changed, 67 insertions(+), 44 deletions(-) diff --git a/hw/block/pflash_cfi01.c b/hw/block/pflash_cfi01.c index f99951ad8b..2ba6c77293 100644 --- a/hw/block/pflash_cfi01.c +++ b/hw/block/pflash_cfi01.c @@ -65,6 +65,7 @@ do { \ #define CFI_PFLASH01(obj) OBJECT_CHECK(pflash_t, (obj), TYPE_CFI_PFLASH01) #define PFLASH_BE 0 +#define PFLASH_SECURE 1 struct pflash_t { /*< private >*/ @@ -237,12 +238,57 @@ static uint32_t pflash_devid_query(pflash_t *pfl, hwaddr offset) return resp; } +static uint32_t pflash_data_read(pflash_t *pfl, hwaddr offset, + int width, int be) +{ + uint8_t *p; + uint32_t ret; + + p = pfl->storage; + switch (width) { + case 1: + ret = p[offset]; + DPRINTF("%s: data offset " TARGET_FMT_plx " %02x\n", + __func__, offset, ret); + break; + case 2: + if (be) { + ret = p[offset] << 8; + ret |= p[offset + 1]; + } else { + ret = p[offset]; + ret |= p[offset + 1] << 8; + } + DPRINTF("%s: data offset " TARGET_FMT_plx " %04x\n", + __func__, offset, ret); + break; + case 4: + if (be) { + ret = p[offset] << 24; + ret |= p[offset + 1] << 16; + ret |= p[offset + 2] << 8; + ret |= p[offset + 3]; + } else { + ret = p[offset]; + ret |= p[offset + 1] << 8; + ret |= p[offset + 2] << 16; + ret |= p[offset + 3] << 24; + } + DPRINTF("%s: data offset " TARGET_FMT_plx " %08x\n", + __func__, offset, ret); + break; + default: + DPRINTF("BUG in %s\n", __func__); + abort(); + } + return ret; +} + static uint32_t pflash_read (pflash_t *pfl, hwaddr offset, int width, int be) { hwaddr boff; uint32_t ret; - uint8_t *p; ret = -1; @@ -259,43 +305,7 @@ static uint32_t pflash_read (pflash_t *pfl, hwaddr offset, /* fall through to read code */ case 0x00: /* Flash area read */ - p = pfl->storage; - switch (width) { - case 1: - ret = p[offset]; - DPRINTF("%s: data offset " TARGET_FMT_plx " %02x\n", - __func__, offset, ret); - break; - case 2: - if (be) { - ret = p[offset] << 8; - ret |= p[offset + 1]; - } else { - ret = p[offset]; - ret |= p[offset + 1] << 8; - } - DPRINTF("%s: data offset " TARGET_FMT_plx " %04x\n", - __func__, offset, ret); - break; - case 4: - if (be) { - ret = p[offset] << 24; - ret |= p[offset + 1] << 16; - ret |= p[offset + 2] << 8; - ret |= p[offset + 3]; - } else { - ret = p[offset]; - ret |= p[offset + 1] << 8; - ret |= p[offset + 2] << 16; - ret |= p[offset + 3] << 24; - } - DPRINTF("%s: data offset " TARGET_FMT_plx " %08x\n", - __func__, offset, ret); - break; - default: - DPRINTF("BUG in %s\n", __func__); - } - + ret = pflash_data_read(pfl, offset, width, be); break; case 0x10: /* Single byte program */ case 0x20: /* Block erase */ @@ -650,25 +660,37 @@ static void pflash_write(pflash_t *pfl, hwaddr offset, } -static uint64_t pflash_mem_read(void *opaque, hwaddr addr, unsigned len) +static MemTxResult pflash_mem_read_with_attrs(void *opaque, hwaddr addr, uint64_t *value, + unsigned len, MemTxAttrs attrs) { pflash_t *pfl = opaque; bool be = !!(pfl->features & (1 << PFLASH_BE)); - return pflash_read(pfl, addr, len, be); + if ((pfl->features & (1 << PFLASH_SECURE)) && !attrs.secure) { + *value = pflash_data_read(opaque, addr, len, be); + } else { + *value = pflash_read(opaque, addr, len, be); + } + return MEMTX_OK; } -static void pflash_mem_write(void *opaque, hwaddr addr, uint64_t value, unsigned len) +static MemTxResult pflash_mem_write_with_attrs(void *opaque, hwaddr addr, uint64_t value, + unsigned len, MemTxAttrs attrs) { pflash_t *pfl = opaque; bool be = !!(pfl->features & (1 << PFLASH_BE)); - pflash_write(pfl, addr, value, len, be); + if ((pfl->features & (1 << PFLASH_SECURE)) && !attrs.secure) { + return MEMTX_ERROR; + } else { + pflash_write(opaque, addr, value, len, be); + return MEMTX_OK; + } } static const MemoryRegionOps pflash_cfi01_ops = { - .read = pflash_mem_read, - .write = pflash_mem_write, + .read_with_attrs = pflash_mem_read_with_attrs, + .write_with_attrs = pflash_mem_write_with_attrs, .endianness = DEVICE_NATIVE_ENDIAN, }; @@ -853,6 +875,7 @@ static Property pflash_cfi01_properties[] = { DEFINE_PROP_UINT8("device-width", struct pflash_t, device_width, 0), DEFINE_PROP_UINT8("max-device-width", struct pflash_t, max_device_width, 0), DEFINE_PROP_BIT("big-endian", struct pflash_t, features, PFLASH_BE, 0), + DEFINE_PROP_BIT("secure", struct pflash_t, features, PFLASH_SECURE, 0), DEFINE_PROP_UINT16("id0", struct pflash_t, ident0, 0), DEFINE_PROP_UINT16("id1", struct pflash_t, ident1, 0), DEFINE_PROP_UINT16("id2", struct pflash_t, ident2, 0), From 3751d7c43f795b45ffdb9429cfb09c6beea55c68 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 9 Apr 2015 14:16:19 +0200 Subject: [PATCH 47/62] vl: allow full-blown QemuOpts syntax for -global -global does not work for drivers that have a dot in their name, such as cfi.pflash01. This is just a parsing limitation, because such globals can be declared easily inside a -readconfig file. To allow this usage, support the full QemuOpts key/value syntax for -global too, for example "-global driver=cfi.pflash01,property=secure,value=on". The two formats do not conflict, because the key/value syntax does not have a period before the first equal sign. Signed-off-by: Paolo Bonzini --- qdev-monitor.c | 18 +++++++++++------- qemu-options.hx | 7 ++++++- 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/qdev-monitor.c b/qdev-monitor.c index 1d87f573e8..9f17c81d9f 100644 --- a/qdev-monitor.c +++ b/qdev-monitor.c @@ -822,15 +822,19 @@ int qemu_global_option(const char *str) QemuOpts *opts; int rc, offset; - rc = sscanf(str, "%63[^.].%63[^=]%n", driver, property, &offset); - if (rc < 2 || str[offset] != '=') { - error_report("can't parse: \"%s\"", str); + rc = sscanf(str, "%63[^.=].%63[^=]%n", driver, property, &offset); + if (rc == 2 && str[offset] == '=') { + opts = qemu_opts_create(&qemu_global_opts, NULL, 0, &error_abort); + qemu_opt_set(opts, "driver", driver, &error_abort); + qemu_opt_set(opts, "property", property, &error_abort); + qemu_opt_set(opts, "value", str + offset + 1, &error_abort); + return 0; + } + + opts = qemu_opts_parse(&qemu_global_opts, str, false); + if (!opts) { return -1; } - opts = qemu_opts_create(&qemu_global_opts, NULL, 0, &error_abort); - qemu_opt_set(opts, "driver", driver, &error_abort); - qemu_opt_set(opts, "property", property, &error_abort); - qemu_opt_set(opts, "value", str + offset + 1, &error_abort); return 0; } diff --git a/qemu-options.hx b/qemu-options.hx index 85883f165a..3085c1f83e 100644 --- a/qemu-options.hx +++ b/qemu-options.hx @@ -171,11 +171,13 @@ Set parameter @var{arg} for item @var{id} of type @var{group}\n" ETEXI DEF("global", HAS_ARG, QEMU_OPTION_global, - "-global driver.prop=value\n" + "-global driver.property=value\n" + "-global driver=driver,property=property,value=value\n" " set a global default for a driver property\n", QEMU_ARCH_ALL) STEXI @item -global @var{driver}.@var{prop}=@var{value} +@itemx -global driver=@var{driver},property=@var{property},value=@var{value} @findex -global Set default value of @var{driver}'s property @var{prop} to @var{value}, e.g.: @@ -186,6 +188,9 @@ qemu-system-i386 -global ide-drive.physical_block_size=4096 -drive file=file,if= In particular, you can use this to set driver properties for devices which are created automatically by the machine model. To create a device which is not created automatically and set properties on it, use -@option{device}. + +The two syntaxes are equivalent. The longer one works for drivers whose name +contains a dot. ETEXI DEF("boot", HAS_ARG, QEMU_OPTION_boot, From fb9e7e334b54350e8e3b62bd7892b78f63a9d848 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 5 May 2015 18:29:00 +0200 Subject: [PATCH 48/62] qom: add object_property_add_const_link Suggested-by: Eduardo Habkost Signed-off-by: Paolo Bonzini --- include/qom/object.h | 18 ++++++++++++++++++ qom/object.c | 16 ++++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/include/qom/object.h b/include/qom/object.h index d2d7748f62..0505f20e71 100644 --- a/include/qom/object.h +++ b/include/qom/object.h @@ -1289,6 +1289,24 @@ void object_property_add_alias(Object *obj, const char *name, Object *target_obj, const char *target_name, Error **errp); +/** + * object_property_add_const_link: + * @obj: the object to add a property to + * @name: the name of the property + * @target: the object to be referred by the link + * @errp: if an error occurs, a pointer to an area to store the error + * + * Add an unmodifiable link for a property on an object. This function will + * add a property of type link where TYPE is the type of @target. + * + * The caller must ensure that @target stays alive as long as + * this property exists. In the case @target is a child of @obj, + * this will be the case. Otherwise, the caller is responsible for + * taking a reference. + */ +void object_property_add_const_link(Object *obj, const char *name, + Object *target, Error **errp); + /** * object_property_set_description: * @obj: the object owning the property diff --git a/qom/object.c b/qom/object.c index b8dff43297..96abd347b1 100644 --- a/qom/object.c +++ b/qom/object.c @@ -1266,6 +1266,22 @@ out: g_free(full_type); } +void object_property_add_const_link(Object *obj, const char *name, + Object *target, Error **errp) +{ + char *link_type; + ObjectProperty *op; + + link_type = g_strdup_printf("link<%s>", object_get_typename(target)); + op = object_property_add(obj, name, link_type, + object_get_child_property, NULL, + NULL, target, errp); + if (op != NULL) { + op->resolve = object_resolve_child_property; + } + g_free(link_type); +} + gchar *object_get_canonical_path_component(Object *obj) { ObjectProperty *prop = NULL; From 71cdd1cb914e24000273bbbfa5fb226cdb8ea265 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 31 Mar 2015 14:01:06 +0200 Subject: [PATCH 49/62] vl: run "late" notifiers immediately If a machine_init_done notifier is added late, as part of a hot-plugged device, run it immediately. Signed-off-by: Paolo Bonzini --- vl.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/vl.c b/vl.c index df5a7273c9..66ccd06be8 100644 --- a/vl.c +++ b/vl.c @@ -2500,14 +2500,20 @@ static void qemu_run_exit_notifiers(void) notifier_list_notify(&exit_notifiers, NULL); } +static bool machine_init_done; + void qemu_add_machine_init_done_notifier(Notifier *notify) { notifier_list_add(&machine_init_done_notifiers, notify); + if (machine_init_done) { + notify->notify(notify, NULL); + } } static void qemu_run_machine_init_done_notifiers(void) { notifier_list_notify(&machine_init_done_notifiers, NULL); + machine_init_done = true; } static const QEMUOption *lookup_opt(int argc, char **argv, From 2001d0cd6d55e5efa9956fa8ff8b89034d6a4329 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 31 Mar 2015 14:11:09 +0200 Subject: [PATCH 50/62] target-i386: create a separate AddressSpace for each CPU Different CPUs can be in SMM or not at the same time, thus they will see different things where the chipset places SMRAM. Signed-off-by: Paolo Bonzini --- target-i386/cpu-qom.h | 1 + target-i386/cpu.c | 14 ++++++++++++++ 2 files changed, 15 insertions(+) diff --git a/target-i386/cpu-qom.h b/target-i386/cpu-qom.h index 31a0c1e776..39cd878fad 100644 --- a/target-i386/cpu-qom.h +++ b/target-i386/cpu-qom.h @@ -111,6 +111,7 @@ typedef struct X86CPU { /* in order to simplify APIC support, we leave this pointer to the user */ struct DeviceState *apic_state; + struct MemoryRegion *cpu_as_root; } X86CPU; static inline X86CPU *x86_env_get_cpu(CPUX86State *env) diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 0faca03595..051abc92cb 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -44,6 +44,7 @@ #include "hw/qdev-properties.h" #include "hw/cpu/icc_bus.h" #ifndef CONFIG_USER_ONLY +#include "exec/address-spaces.h" #include "hw/xen/xen.h" #include "hw/i386/apic_internal.h" #endif @@ -2811,6 +2812,18 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) #endif mce_init(cpu); + +#ifndef CONFIG_USER_ONLY + if (tcg_enabled()) { + cpu->cpu_as_root = g_new(MemoryRegion, 1); + cs->as = g_new(AddressSpace, 1); + memory_region_init_alias(cpu->cpu_as_root, OBJECT(cpu), "memory", + get_system_memory(), 0, ~0ull); + memory_region_set_enabled(cpu->cpu_as_root, true); + address_space_init(cs->as, cpu->cpu_as_root, "CPU"); + } +#endif + qemu_init_vcpu(cs); /* Only Intel CPUs support hyperthreading. Even though QEMU fixes this @@ -2834,6 +2847,7 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) cpu_reset(cs); xcc->parent_realize(dev, &local_err); + out: if (local_err != NULL) { error_propagate(errp, local_err); From fe6567d5fddfb7501a352c5e080a9eecf7b89177 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 31 Mar 2015 14:10:22 +0200 Subject: [PATCH 51/62] hw/i386: add a separate region that tracks the SMRAME bit This region is exported at /machine/smram. It is "empty" if SMRAME=0 and points to SMRAM if SMRAME=1. The CPU will enable/disable it as it enters or exits SMRAM. While touching nearby code, the existing memory region setup was slightly inconsistent. The smram_region is *disabled* in order to open SMRAM (because the smram_region shows the low VRAM instead of the RAM at 0xa0000). Because SMRAM is closed at startup, the smram_region must be enabled when creating the i440fx or q35 devices. Acked-by: Michael S. Tsirkin Signed-off-by: Paolo Bonzini --- hw/pci-host/piix.c | 17 ++++++++++++++++- hw/pci-host/q35.c | 17 +++++++++++++++-- include/hw/pci-host/q35.h | 1 + 3 files changed, 32 insertions(+), 3 deletions(-) diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c index 723836fb0e..0e439c5d52 100644 --- a/hw/pci-host/piix.c +++ b/hw/pci-host/piix.c @@ -105,6 +105,7 @@ struct PCII440FXState { MemoryRegion *ram_memory; PAMMemoryRegion pam_regions[13]; MemoryRegion smram_region; + MemoryRegion smram, low_smram; uint8_t smm_enabled; }; @@ -139,6 +140,8 @@ static void i440fx_update_memory_mappings(PCII440FXState *d) pd->config[I440FX_PAM + ((i + 1) / 2)]); } smram_update(&d->smram_region, pd->config[I440FX_SMRAM], d->smm_enabled); + memory_region_set_enabled(&d->smram, + pd->config[I440FX_SMRAM] & SMRAM_G_SMRAME); memory_region_transaction_commit(); } @@ -346,11 +349,23 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state, pc_pci_as_mapping_init(OBJECT(f), f->system_memory, f->pci_address_space); + /* if *disabled* show SMRAM to all CPUs */ memory_region_init_alias(&f->smram_region, OBJECT(d), "smram-region", f->pci_address_space, 0xa0000, 0x20000); memory_region_add_subregion_overlap(f->system_memory, 0xa0000, &f->smram_region, 1); - memory_region_set_enabled(&f->smram_region, false); + memory_region_set_enabled(&f->smram_region, true); + + /* smram, as seen by SMM CPUs */ + memory_region_init(&f->smram, OBJECT(d), "smram", 1ull << 32); + memory_region_set_enabled(&f->smram, true); + memory_region_init_alias(&f->low_smram, OBJECT(d), "smram-low", + f->system_memory, 0xa0000, 0x20000); + memory_region_set_enabled(&f->low_smram, true); + memory_region_add_subregion(&f->smram, 0xa0000, &f->low_smram); + object_property_add_const_link(qdev_get_machine(), "smram", + OBJECT(&f->smram), &error_abort); + init_pam(dev, f->ram_memory, f->system_memory, f->pci_address_space, &f->pam_regions[0], PAM_BIOS_BASE, PAM_BIOS_SIZE); for (i = 0; i < 12; ++i) { diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index c8827cc000..db4d871908 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -270,6 +270,8 @@ static void mch_update_smram(MCHPCIState *mch) memory_region_transaction_begin(); smram_update(&mch->smram_region, pd->config[MCH_HOST_BRIDGE_SMRAM], mch->smm_enabled); + memory_region_set_enabled(&mch->smram, + pd->config[MCH_HOST_BRIDGE_SMRAM] & SMRAM_G_SMRAME); memory_region_transaction_commit(); } @@ -399,13 +401,24 @@ static void mch_realize(PCIDevice *d, Error **errp) pc_pci_as_mapping_init(OBJECT(mch), mch->system_memory, mch->pci_address_space); - /* smram */ + /* if *disabled* show SMRAM to all CPUs */ cpu_smm_register(&mch_set_smm, mch); memory_region_init_alias(&mch->smram_region, OBJECT(mch), "smram-region", mch->pci_address_space, 0xa0000, 0x20000); memory_region_add_subregion_overlap(mch->system_memory, 0xa0000, &mch->smram_region, 1); - memory_region_set_enabled(&mch->smram_region, false); + memory_region_set_enabled(&mch->smram_region, true); + + /* smram, as seen by SMM CPUs */ + memory_region_init(&mch->smram, OBJECT(mch), "smram", 1ull << 32); + memory_region_set_enabled(&mch->smram, true); + memory_region_init_alias(&mch->low_smram, OBJECT(mch), "smram-low", + mch->system_memory, 0xa0000, 0x20000); + memory_region_set_enabled(&mch->low_smram, true); + memory_region_add_subregion(&mch->smram, 0xa0000, &mch->low_smram); + object_property_add_const_link(qdev_get_machine(), "smram", + OBJECT(&mch->smram), &error_abort); + init_pam(DEVICE(mch), mch->ram_memory, mch->system_memory, mch->pci_address_space, &mch->pam_regions[0], PAM_BIOS_BASE, PAM_BIOS_SIZE); diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h index 96d4cdc713..4c9eacc587 100644 --- a/include/hw/pci-host/q35.h +++ b/include/hw/pci-host/q35.h @@ -53,6 +53,7 @@ typedef struct MCHPCIState { MemoryRegion *address_space_io; PAMMemoryRegion pam_regions[13]; MemoryRegion smram_region; + MemoryRegion smram, low_smram; PcPciInfo pci_info; uint8_t smm_enabled; ram_addr_t below_4g_mem_size; From f809c605122df291bbb9004dc487bde0969134b5 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 31 Mar 2015 14:12:25 +0200 Subject: [PATCH 52/62] target-i386: use memory API to implement SMRAM Remove cpu_smm_register and cpu_smm_update. Instead, each CPU address space gets an extra region which is an alias of /machine/smram. This extra region is enabled or disabled as the CPU enters/exits SMM. Signed-off-by: Paolo Bonzini --- bsd-user/main.c | 4 ---- hw/i386/pc.c | 21 --------------------- hw/pci-host/pam.c | 18 ++---------------- hw/pci-host/piix.c | 28 ++++++++-------------------- hw/pci-host/q35.c | 22 ++++++---------------- include/hw/i386/pc.h | 1 - include/hw/pci-host/pam.h | 5 +---- include/hw/pci-host/q35.h | 1 - linux-user/main.c | 4 ---- target-i386/cpu-qom.h | 4 +++- target-i386/cpu.c | 33 +++++++++++++++++++++++++++++++-- target-i386/cpu.h | 3 ++- target-i386/machine.c | 3 +++ target-i386/smm_helper.c | 14 ++++++++++++-- 14 files changed, 68 insertions(+), 93 deletions(-) diff --git a/bsd-user/main.c b/bsd-user/main.c index 5bfaf5c421..ba0b9981f5 100644 --- a/bsd-user/main.c +++ b/bsd-user/main.c @@ -108,10 +108,6 @@ void cpu_list_unlock(void) /***********************************************************/ /* CPUX86 core interface */ -void cpu_smm_update(CPUX86State *env) -{ -} - uint64_t cpu_get_tsc(CPUX86State *env) { return cpu_get_real_ticks(); diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 2baff4a660..3f0d435da9 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -164,27 +164,6 @@ uint64_t cpu_get_tsc(CPUX86State *env) return cpu_get_ticks(); } -/* SMM support */ - -static cpu_set_smm_t smm_set; -static void *smm_arg; - -void cpu_smm_register(cpu_set_smm_t callback, void *arg) -{ - assert(smm_set == NULL); - assert(smm_arg == NULL); - smm_set = callback; - smm_arg = arg; -} - -void cpu_smm_update(CPUX86State *env) -{ - if (smm_set && smm_arg && CPU(x86_env_get_cpu(env)) == first_cpu) { - smm_set(!!(env->hflags & HF_SMM_MASK), smm_arg); - } -} - - /* IRQ handling */ int cpu_get_pic_interrupt(CPUX86State *env) { diff --git a/hw/pci-host/pam.c b/hw/pci-host/pam.c index 8272de3f28..99d7af97e7 100644 --- a/hw/pci-host/pam.c +++ b/hw/pci-host/pam.c @@ -31,26 +31,12 @@ #include "sysemu/sysemu.h" #include "hw/pci-host/pam.h" -void smram_update(MemoryRegion *smram_region, uint8_t smram, - uint8_t smm_enabled) +void smram_update(MemoryRegion *smram_region, uint8_t smram) { - bool smram_enabled; - - smram_enabled = ((smm_enabled && (smram & SMRAM_G_SMRAME)) || - (smram & SMRAM_D_OPEN)); + bool smram_enabled = (smram & SMRAM_D_OPEN); memory_region_set_enabled(smram_region, !smram_enabled); } -void smram_set_smm(uint8_t *host_smm_enabled, int smm, uint8_t smram, - MemoryRegion *smram_region) -{ - uint8_t smm_enabled = (smm != 0); - if (*host_smm_enabled != smm_enabled) { - *host_smm_enabled = smm_enabled; - smram_update(smram_region, smram, *host_smm_enabled); - } -} - void init_pam(DeviceState *dev, MemoryRegion *ram_memory, MemoryRegion *system_memory, MemoryRegion *pci_address_space, PAMMemoryRegion *mem, uint32_t start, uint32_t size) diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c index 0e439c5d52..a91ad73705 100644 --- a/hw/pci-host/piix.c +++ b/hw/pci-host/piix.c @@ -106,7 +106,6 @@ struct PCII440FXState { PAMMemoryRegion pam_regions[13]; MemoryRegion smram_region; MemoryRegion smram, low_smram; - uint8_t smm_enabled; }; @@ -139,23 +138,12 @@ static void i440fx_update_memory_mappings(PCII440FXState *d) pam_update(&d->pam_regions[i], i, pd->config[I440FX_PAM + ((i + 1) / 2)]); } - smram_update(&d->smram_region, pd->config[I440FX_SMRAM], d->smm_enabled); + smram_update(&d->smram_region, pd->config[I440FX_SMRAM]); memory_region_set_enabled(&d->smram, pd->config[I440FX_SMRAM] & SMRAM_G_SMRAME); memory_region_transaction_commit(); } -static void i440fx_set_smm(int val, void *arg) -{ - PCII440FXState *d = arg; - PCIDevice *pd = PCI_DEVICE(d); - - memory_region_transaction_begin(); - smram_set_smm(&d->smm_enabled, val, pd->config[I440FX_SMRAM], - &d->smram_region); - memory_region_transaction_commit(); -} - static void i440fx_write_config(PCIDevice *dev, uint32_t address, uint32_t val, int len) @@ -175,12 +163,13 @@ static int i440fx_load_old(QEMUFile* f, void *opaque, int version_id) PCII440FXState *d = opaque; PCIDevice *pd = PCI_DEVICE(d); int ret, i; + uint8_t smm_enabled; ret = pci_device_load(pd, f); if (ret < 0) return ret; i440fx_update_memory_mappings(d); - qemu_get_8s(f, &d->smm_enabled); + qemu_get_8s(f, &smm_enabled); if (version_id == 2) { for (i = 0; i < PIIX_NUM_PIRQS; i++) { @@ -208,7 +197,10 @@ static const VMStateDescription vmstate_i440fx = { .post_load = i440fx_post_load, .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(parent_obj, PCII440FXState), - VMSTATE_UINT8(smm_enabled, PCII440FXState), + /* Used to be smm_enabled, which was basically always zero because + * SeaBIOS hardly uses SMM. SMRAM is now handled by CPU code. + */ + VMSTATE_UNUSED(1), VMSTATE_END_OF_LIST() } }; @@ -300,11 +292,7 @@ static void i440fx_pcihost_realize(DeviceState *dev, Error **errp) static void i440fx_realize(PCIDevice *dev, Error **errp) { - PCII440FXState *d = I440FX_PCI_DEVICE(dev); - dev->config[I440FX_SMRAM] = 0x02; - - cpu_smm_register(&i440fx_set_smm, d); } PCIBus *i440fx_init(PCII440FXState **pi440fx_state, @@ -360,7 +348,7 @@ PCIBus *i440fx_init(PCII440FXState **pi440fx_state, memory_region_init(&f->smram, OBJECT(d), "smram", 1ull << 32); memory_region_set_enabled(&f->smram, true); memory_region_init_alias(&f->low_smram, OBJECT(d), "smram-low", - f->system_memory, 0xa0000, 0x20000); + f->ram_memory, 0xa0000, 0x20000); memory_region_set_enabled(&f->low_smram, true); memory_region_add_subregion(&f->smram, 0xa0000, &f->low_smram); object_property_add_const_link(qdev_get_machine(), "smram", diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index db4d871908..eefa199335 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -268,24 +268,12 @@ static void mch_update_smram(MCHPCIState *mch) PCIDevice *pd = PCI_DEVICE(mch); memory_region_transaction_begin(); - smram_update(&mch->smram_region, pd->config[MCH_HOST_BRIDGE_SMRAM], - mch->smm_enabled); + smram_update(&mch->smram_region, pd->config[MCH_HOST_BRIDGE_SMRAM]); memory_region_set_enabled(&mch->smram, pd->config[MCH_HOST_BRIDGE_SMRAM] & SMRAM_G_SMRAME); memory_region_transaction_commit(); } -static void mch_set_smm(int smm, void *arg) -{ - MCHPCIState *mch = arg; - PCIDevice *pd = PCI_DEVICE(mch); - - memory_region_transaction_begin(); - smram_set_smm(&mch->smm_enabled, smm, pd->config[MCH_HOST_BRIDGE_SMRAM], - &mch->smram_region); - memory_region_transaction_commit(); -} - static void mch_write_config(PCIDevice *d, uint32_t address, uint32_t val, int len) { @@ -331,7 +319,10 @@ static const VMStateDescription vmstate_mch = { .post_load = mch_post_load, .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(parent_obj, MCHPCIState), - VMSTATE_UINT8(smm_enabled, MCHPCIState), + /* Used to be smm_enabled, which was basically always zero because + * SeaBIOS hardly uses SMM. SMRAM is now handled by CPU code. + */ + VMSTATE_UNUSED(1), VMSTATE_END_OF_LIST() } }; @@ -402,7 +393,6 @@ static void mch_realize(PCIDevice *d, Error **errp) mch->pci_address_space); /* if *disabled* show SMRAM to all CPUs */ - cpu_smm_register(&mch_set_smm, mch); memory_region_init_alias(&mch->smram_region, OBJECT(mch), "smram-region", mch->pci_address_space, 0xa0000, 0x20000); memory_region_add_subregion_overlap(mch->system_memory, 0xa0000, @@ -413,7 +403,7 @@ static void mch_realize(PCIDevice *d, Error **errp) memory_region_init(&mch->smram, OBJECT(mch), "smram", 1ull << 32); memory_region_set_enabled(&mch->smram, true); memory_region_init_alias(&mch->low_smram, OBJECT(mch), "smram-low", - mch->system_memory, 0xa0000, 0x20000); + mch->ram_memory, 0xa0000, 0x20000); memory_region_set_enabled(&mch->low_smram, true); memory_region_add_subregion(&mch->smram, 0xa0000, &mch->low_smram); object_property_add_const_link(qdev_get_machine(), "smram", diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index bec6de1ddf..86c565147c 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -210,7 +210,6 @@ void pc_nic_init(ISABus *isa_bus, PCIBus *pci_bus); void pc_pci_device_init(PCIBus *pci_bus); typedef void (*cpu_set_smm_t)(int smm, void *arg); -void cpu_smm_register(cpu_set_smm_t callback, void *arg); void ioapic_init_gsi(GSIState *gsi_state, const char *parent_name); diff --git a/include/hw/pci-host/pam.h b/include/hw/pci-host/pam.h index 4d03e4bf18..80dd605b58 100644 --- a/include/hw/pci-host/pam.h +++ b/include/hw/pci-host/pam.h @@ -86,10 +86,7 @@ typedef struct PAMMemoryRegion { unsigned current; } PAMMemoryRegion; -void smram_update(MemoryRegion *smram_region, uint8_t smram, - uint8_t smm_enabled); -void smram_set_smm(uint8_t *host_smm_enabled, int smm, uint8_t smram, - MemoryRegion *smram_region); +void smram_update(MemoryRegion *smram_region, uint8_t smram); void init_pam(DeviceState *dev, MemoryRegion *ram, MemoryRegion *system, MemoryRegion *pci, PAMMemoryRegion *mem, uint32_t start, uint32_t size); void pam_update(PAMMemoryRegion *mem, int idx, uint8_t val); diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h index 4c9eacc587..17adeaaafd 100644 --- a/include/hw/pci-host/q35.h +++ b/include/hw/pci-host/q35.h @@ -55,7 +55,6 @@ typedef struct MCHPCIState { MemoryRegion smram_region; MemoryRegion smram, low_smram; PcPciInfo pci_info; - uint8_t smm_enabled; ram_addr_t below_4g_mem_size; ram_addr_t above_4g_mem_size; uint64_t pci_hole64_size; diff --git a/linux-user/main.c b/linux-user/main.c index 3f32db0afd..6989b82455 100644 --- a/linux-user/main.c +++ b/linux-user/main.c @@ -215,10 +215,6 @@ void cpu_list_unlock(void) /***********************************************************/ /* CPUX86 core interface */ -void cpu_smm_update(CPUX86State *env) -{ -} - uint64_t cpu_get_tsc(CPUX86State *env) { return cpu_get_real_ticks(); diff --git a/target-i386/cpu-qom.h b/target-i386/cpu-qom.h index 39cd878fad..7a4fddd85f 100644 --- a/target-i386/cpu-qom.h +++ b/target-i386/cpu-qom.h @@ -23,6 +23,7 @@ #include "qom/cpu.h" #include "cpu.h" #include "qapi/error.h" +#include "qemu/notify.h" #ifdef TARGET_X86_64 #define TYPE_X86_CPU "x86_64-cpu" @@ -111,7 +112,8 @@ typedef struct X86CPU { /* in order to simplify APIC support, we leave this pointer to the user */ struct DeviceState *apic_state; - struct MemoryRegion *cpu_as_root; + struct MemoryRegion *cpu_as_root, *cpu_as_mem, *smram; + Notifier machine_done; } X86CPU; static inline X86CPU *x86_env_get_cpu(CPUX86State *env) diff --git a/target-i386/cpu.c b/target-i386/cpu.c index 051abc92cb..4e7cdaaaa5 100644 --- a/target-i386/cpu.c +++ b/target-i386/cpu.c @@ -2751,6 +2751,21 @@ static void x86_cpu_apic_realize(X86CPU *cpu, Error **errp) object_property_set_bool(OBJECT(cpu->apic_state), true, "realized", errp); } + +static void x86_cpu_machine_done(Notifier *n, void *unused) +{ + X86CPU *cpu = container_of(n, X86CPU, machine_done); + MemoryRegion *smram = + (MemoryRegion *) object_resolve_path("/machine/smram", NULL); + + if (smram) { + cpu->smram = g_new(MemoryRegion, 1); + memory_region_init_alias(cpu->smram, OBJECT(cpu), "smram", + smram, 0, 1ull << 32); + memory_region_set_enabled(cpu->smram, false); + memory_region_add_subregion_overlap(cpu->cpu_as_root, 0, cpu->smram, 1); + } +} #else static void x86_cpu_apic_realize(X86CPU *cpu, Error **errp) { @@ -2815,12 +2830,26 @@ static void x86_cpu_realizefn(DeviceState *dev, Error **errp) #ifndef CONFIG_USER_ONLY if (tcg_enabled()) { + cpu->cpu_as_mem = g_new(MemoryRegion, 1); cpu->cpu_as_root = g_new(MemoryRegion, 1); cs->as = g_new(AddressSpace, 1); - memory_region_init_alias(cpu->cpu_as_root, OBJECT(cpu), "memory", - get_system_memory(), 0, ~0ull); + + /* Outer container... */ + memory_region_init(cpu->cpu_as_root, OBJECT(cpu), "memory", ~0ull); memory_region_set_enabled(cpu->cpu_as_root, true); + + /* ... with two regions inside: normal system memory with low + * priority, and... + */ + memory_region_init_alias(cpu->cpu_as_mem, OBJECT(cpu), "memory", + get_system_memory(), 0, ~0ull); + memory_region_add_subregion_overlap(cpu->cpu_as_root, 0, cpu->cpu_as_mem, 0); + memory_region_set_enabled(cpu->cpu_as_mem, true); address_space_init(cs->as, cpu->cpu_as_root, "CPU"); + + /* ... SMRAM with higher priority, linked from /machine/smram. */ + cpu->machine_done.notify = x86_cpu_machine_done; + qemu_add_machine_init_done_notifier(&cpu->machine_done); } #endif diff --git a/target-i386/cpu.h b/target-i386/cpu.h index 9dae9ab854..603aaf0924 100644 --- a/target-i386/cpu.h +++ b/target-i386/cpu.h @@ -1157,7 +1157,6 @@ void cpu_x86_update_cr3(CPUX86State *env, target_ulong new_cr3); void cpu_x86_update_cr4(CPUX86State *env, uint32_t new_cr4); /* hw/pc.c */ -void cpu_smm_update(CPUX86State *env); uint64_t cpu_get_tsc(CPUX86State *env); #define TARGET_PAGE_BITS 12 @@ -1323,7 +1322,9 @@ void cpu_vmexit(CPUX86State *nenv, uint32_t exit_code, uint64_t exit_info_1); /* seg_helper.c */ void do_interrupt_x86_hardirq(CPUX86State *env, int intno, int is_hw); +/* smm_helper.c */ void do_smm_enter(X86CPU *cpu); +void cpu_smm_update(X86CPU *cpu); void cpu_report_tpr_access(CPUX86State *env, TPRAccess access); diff --git a/target-i386/machine.c b/target-i386/machine.c index cd1ddd29e9..69d86cb476 100644 --- a/target-i386/machine.c +++ b/target-i386/machine.c @@ -372,6 +372,9 @@ static int cpu_post_load(void *opaque, int version_id) } tlb_flush(cs, 1); + if (tcg_enabled()) { + cpu_smm_update(cpu); + } return 0; } diff --git a/target-i386/smm_helper.c b/target-i386/smm_helper.c index 5617a14854..02e24b9236 100644 --- a/target-i386/smm_helper.c +++ b/target-i386/smm_helper.c @@ -40,6 +40,16 @@ void helper_rsm(CPUX86State *env) #define SMM_REVISION_ID 0x00020000 #endif +void cpu_smm_update(X86CPU *cpu) +{ + CPUX86State *env = &cpu->env; + bool smm_enabled = (env->hflags & HF_SMM_MASK); + + if (cpu->smram) { + memory_region_set_enabled(cpu->smram, smm_enabled); + } +} + void do_smm_enter(X86CPU *cpu) { CPUX86State *env = &cpu->env; @@ -57,7 +67,7 @@ void do_smm_enter(X86CPU *cpu) } else { env->hflags2 |= HF2_NMI_MASK; } - cpu_smm_update(env); + cpu_smm_update(cpu); sm_state = env->smbase + 0x8000; @@ -317,7 +327,7 @@ void helper_rsm(CPUX86State *env) } env->hflags2 &= ~HF2_SMM_INSIDE_NMI_MASK; env->hflags &= ~HF_SMM_MASK; - cpu_smm_update(env); + cpu_smm_update(cpu); qemu_log_mask(CPU_LOG_INT, "SMM: after RSM\n"); log_cpu_state_mask(CPU_LOG_INT, CPU(cpu), CPU_DUMP_CCOP); From 3de70c0899db2712a5ae321093aa6173d6f76706 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 31 Mar 2015 14:14:28 +0200 Subject: [PATCH 53/62] hw/i386: remove smram_update It's easier to inline it now that most of its work is done by the CPU (rather than the chipset) through /machine/smram and the memory API. Acked-by: Michael S. Tsirkin Signed-off-by: Paolo Bonzini --- hw/pci-host/pam.c | 6 ------ hw/pci-host/piix.c | 3 ++- hw/pci-host/q35.c | 3 ++- include/hw/pci-host/pam.h | 1 - 4 files changed, 4 insertions(+), 9 deletions(-) diff --git a/hw/pci-host/pam.c b/hw/pci-host/pam.c index 99d7af97e7..17d826cba5 100644 --- a/hw/pci-host/pam.c +++ b/hw/pci-host/pam.c @@ -31,12 +31,6 @@ #include "sysemu/sysemu.h" #include "hw/pci-host/pam.h" -void smram_update(MemoryRegion *smram_region, uint8_t smram) -{ - bool smram_enabled = (smram & SMRAM_D_OPEN); - memory_region_set_enabled(smram_region, !smram_enabled); -} - void init_pam(DeviceState *dev, MemoryRegion *ram_memory, MemoryRegion *system_memory, MemoryRegion *pci_address_space, PAMMemoryRegion *mem, uint32_t start, uint32_t size) diff --git a/hw/pci-host/piix.c b/hw/pci-host/piix.c index a91ad73705..f1712b86fe 100644 --- a/hw/pci-host/piix.c +++ b/hw/pci-host/piix.c @@ -138,7 +138,8 @@ static void i440fx_update_memory_mappings(PCII440FXState *d) pam_update(&d->pam_regions[i], i, pd->config[I440FX_PAM + ((i + 1) / 2)]); } - smram_update(&d->smram_region, pd->config[I440FX_SMRAM]); + memory_region_set_enabled(&d->smram_region, + !(pd->config[I440FX_SMRAM] & SMRAM_D_OPEN)); memory_region_set_enabled(&d->smram, pd->config[I440FX_SMRAM] & SMRAM_G_SMRAME); memory_region_transaction_commit(); diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index eefa199335..24829e0d52 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -268,7 +268,8 @@ static void mch_update_smram(MCHPCIState *mch) PCIDevice *pd = PCI_DEVICE(mch); memory_region_transaction_begin(); - smram_update(&mch->smram_region, pd->config[MCH_HOST_BRIDGE_SMRAM]); + memory_region_set_enabled(&mch->smram_region, + !(pd->config[MCH_HOST_BRIDGE_SMRAM] & SMRAM_D_OPEN)); memory_region_set_enabled(&mch->smram, pd->config[MCH_HOST_BRIDGE_SMRAM] & SMRAM_G_SMRAME); memory_region_transaction_commit(); diff --git a/include/hw/pci-host/pam.h b/include/hw/pci-host/pam.h index 80dd605b58..6116c638f9 100644 --- a/include/hw/pci-host/pam.h +++ b/include/hw/pci-host/pam.h @@ -86,7 +86,6 @@ typedef struct PAMMemoryRegion { unsigned current; } PAMMemoryRegion; -void smram_update(MemoryRegion *smram_region, uint8_t smram); void init_pam(DeviceState *dev, MemoryRegion *ram, MemoryRegion *system, MemoryRegion *pci, PAMMemoryRegion *mem, uint32_t start, uint32_t size); void pam_update(PAMMemoryRegion *mem, int idx, uint8_t val); From 64130fa4a1514ae7a580b8d46290a11784770600 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 31 Mar 2015 17:13:01 +0200 Subject: [PATCH 54/62] q35: implement high SMRAM When H_SMRAME is 1, low memory at 0xa0000 is left alone by SMM, and instead the chipset maps the 0xa0000-0xbffff window at 0xfeda0000-0xfedbffff. This affects both the "non-SMM" view controlled by D_OPEN and the SMM view controlled by G_SMRAME, so add two new MemoryRegions and toggle the enabled/disabled state of all four in mch_update_smram. Acked-by: Michael S. Tsirkin Signed-off-by: Paolo Bonzini --- hw/pci-host/q35.c | 35 +++++++++++++++++++++++++++++++---- include/hw/pci-host/q35.h | 16 ++++++++-------- 2 files changed, 39 insertions(+), 12 deletions(-) diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index 24829e0d52..8f8d9e86e1 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -266,12 +266,29 @@ static void mch_update_pam(MCHPCIState *mch) static void mch_update_smram(MCHPCIState *mch) { PCIDevice *pd = PCI_DEVICE(mch); + bool h_smrame = (pd->config[MCH_HOST_BRIDGE_ESMRAMC] & MCH_HOST_BRIDGE_ESMRAMC_H_SMRAME); memory_region_transaction_begin(); - memory_region_set_enabled(&mch->smram_region, - !(pd->config[MCH_HOST_BRIDGE_SMRAM] & SMRAM_D_OPEN)); - memory_region_set_enabled(&mch->smram, - pd->config[MCH_HOST_BRIDGE_SMRAM] & SMRAM_G_SMRAME); + + if (pd->config[MCH_HOST_BRIDGE_SMRAM] & SMRAM_D_OPEN) { + /* Hide (!) low SMRAM if H_SMRAME = 1 */ + memory_region_set_enabled(&mch->smram_region, h_smrame); + /* Show high SMRAM if H_SMRAME = 1 */ + memory_region_set_enabled(&mch->open_high_smram, h_smrame); + } else { + /* Hide high SMRAM and low SMRAM */ + memory_region_set_enabled(&mch->smram_region, true); + memory_region_set_enabled(&mch->open_high_smram, false); + } + + if (pd->config[MCH_HOST_BRIDGE_SMRAM] & SMRAM_G_SMRAME) { + memory_region_set_enabled(&mch->low_smram, !h_smrame); + memory_region_set_enabled(&mch->high_smram, h_smrame); + } else { + memory_region_set_enabled(&mch->low_smram, false); + memory_region_set_enabled(&mch->high_smram, false); + } + memory_region_transaction_commit(); } @@ -400,6 +417,12 @@ static void mch_realize(PCIDevice *d, Error **errp) &mch->smram_region, 1); memory_region_set_enabled(&mch->smram_region, true); + memory_region_init_alias(&mch->open_high_smram, OBJECT(mch), "smram-open-high", + mch->ram_memory, 0xa0000, 0x20000); + memory_region_add_subregion_overlap(mch->system_memory, 0xfeda0000, + &mch->open_high_smram, 1); + memory_region_set_enabled(&mch->open_high_smram, false); + /* smram, as seen by SMM CPUs */ memory_region_init(&mch->smram, OBJECT(mch), "smram", 1ull << 32); memory_region_set_enabled(&mch->smram, true); @@ -407,6 +430,10 @@ static void mch_realize(PCIDevice *d, Error **errp) mch->ram_memory, 0xa0000, 0x20000); memory_region_set_enabled(&mch->low_smram, true); memory_region_add_subregion(&mch->smram, 0xa0000, &mch->low_smram); + memory_region_init_alias(&mch->high_smram, OBJECT(mch), "smram-high", + mch->ram_memory, 0xa0000, 0x20000); + memory_region_set_enabled(&mch->high_smram, true); + memory_region_add_subregion(&mch->smram, 0xfeda0000, &mch->high_smram); object_property_add_const_link(qdev_get_machine(), "smram", OBJECT(&mch->smram), &error_abort); diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h index 17adeaaafd..0fff6a2421 100644 --- a/include/hw/pci-host/q35.h +++ b/include/hw/pci-host/q35.h @@ -52,8 +52,8 @@ typedef struct MCHPCIState { MemoryRegion *system_memory; MemoryRegion *address_space_io; PAMMemoryRegion pam_regions[13]; - MemoryRegion smram_region; - MemoryRegion smram, low_smram; + MemoryRegion smram_region, open_high_smram; + MemoryRegion smram, low_smram, high_smram; PcPciInfo pci_info; ram_addr_t below_4g_mem_size; ram_addr_t above_4g_mem_size; @@ -127,7 +127,7 @@ typedef struct Q35PCIHost { #define MCH_HOST_BRIDGE_PAM_MASK ((uint8_t)0x3) #define MCH_HOST_BRIDGE_SMRAM 0x9d -#define MCH_HOST_BRIDGE_SMRAM_SIZE 1 +#define MCH_HOST_BRIDGE_SMRAM_SIZE 2 #define MCH_HOST_BRIDGE_SMRAM_DEFAULT ((uint8_t)0x2) #define MCH_HOST_BRIDGE_SMRAM_D_OPEN ((uint8_t)(1 << 6)) #define MCH_HOST_BRIDGE_SMRAM_D_CLS ((uint8_t)(1 << 5)) @@ -141,11 +141,11 @@ typedef struct Q35PCIHost { #define MCH_HOST_BRIDGE_UPPER_SYSTEM_BIOS_END 0x100000 #define MCH_HOST_BRIDGE_ESMRAMC 0x9e -#define MCH_HOST_BRIDGE_ESMRAMC_H_SMRAME ((uint8_t)(1 << 6)) -#define MCH_HOST_BRIDGE_ESMRAMC_E_SMERR ((uint8_t)(1 << 5)) -#define MCH_HOST_BRIDGE_ESMRAMC_SM_CACHE ((uint8_t)(1 << 4)) -#define MCH_HOST_BRIDGE_ESMRAMC_SM_L1 ((uint8_t)(1 << 3)) -#define MCH_HOST_BRIDGE_ESMRAMC_SM_L2 ((uint8_t)(1 << 2)) +#define MCH_HOST_BRIDGE_ESMRAMC_H_SMRAME ((uint8_t)(1 << 7)) +#define MCH_HOST_BRIDGE_ESMRAMC_E_SMERR ((uint8_t)(1 << 6)) +#define MCH_HOST_BRIDGE_ESMRAMC_SM_CACHE ((uint8_t)(1 << 5)) +#define MCH_HOST_BRIDGE_ESMRAMC_SM_L1 ((uint8_t)(1 << 4)) +#define MCH_HOST_BRIDGE_ESMRAMC_SM_L2 ((uint8_t)(1 << 3)) #define MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_MASK ((uint8_t)(0x3 << 1)) #define MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_1MB ((uint8_t)(0x0 << 1)) #define MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_2MB ((uint8_t)(0x1 << 1)) From 7744752402d11cebe4c1d4079dcd40d3145eb37b Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Wed, 15 Apr 2015 16:43:24 +0200 Subject: [PATCH 55/62] q35: fix ESMRAMC default The cache bits in ESMRAMC are hardcoded to 1 (=disabled) according to the q35 mch specs. Add and use a define with this default. While being at it also update the SMRAM default to use the name (no code change, just makes things a bit more readable). Signed-off-by: Gerd Hoffmann Acked-by: Michael S. Tsirkin Signed-off-by: Paolo Bonzini --- hw/pci-host/q35.c | 1 + include/hw/pci-host/q35.h | 7 ++++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index 8f8d9e86e1..4e65bdc559 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -354,6 +354,7 @@ static void mch_reset(DeviceState *qdev) MCH_HOST_BRIDGE_PCIEXBAR_DEFAULT); d->config[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_DEFAULT; + d->config[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_DEFAULT; mch_update(mch); } diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h index 0fff6a2421..d3c7bbbdfe 100644 --- a/include/hw/pci-host/q35.h +++ b/include/hw/pci-host/q35.h @@ -128,7 +128,6 @@ typedef struct Q35PCIHost { #define MCH_HOST_BRIDGE_SMRAM 0x9d #define MCH_HOST_BRIDGE_SMRAM_SIZE 2 -#define MCH_HOST_BRIDGE_SMRAM_DEFAULT ((uint8_t)0x2) #define MCH_HOST_BRIDGE_SMRAM_D_OPEN ((uint8_t)(1 << 6)) #define MCH_HOST_BRIDGE_SMRAM_D_CLS ((uint8_t)(1 << 5)) #define MCH_HOST_BRIDGE_SMRAM_D_LCK ((uint8_t)(1 << 4)) @@ -139,6 +138,8 @@ typedef struct Q35PCIHost { #define MCH_HOST_BRIDGE_SMRAM_C_END 0xc0000 #define MCH_HOST_BRIDGE_SMRAM_C_SIZE 0x20000 #define MCH_HOST_BRIDGE_UPPER_SYSTEM_BIOS_END 0x100000 +#define MCH_HOST_BRIDGE_SMRAM_DEFAULT \ + MCH_HOST_BRIDGE_SMRAM_C_BASE_SEG #define MCH_HOST_BRIDGE_ESMRAMC 0x9e #define MCH_HOST_BRIDGE_ESMRAMC_H_SMRAME ((uint8_t)(1 << 7)) @@ -151,6 +152,10 @@ typedef struct Q35PCIHost { #define MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_2MB ((uint8_t)(0x1 << 1)) #define MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_8MB ((uint8_t)(0x2 << 1)) #define MCH_HOST_BRIDGE_ESMRAMC_T_EN ((uint8_t)1) +#define MCH_HOST_BRIDGE_ESMRAMC_DEFAULT \ + (MCH_HOST_BRIDGE_ESMRAMC_SM_CACHE | \ + MCH_HOST_BRIDGE_ESMRAMC_SM_L1 | \ + MCH_HOST_BRIDGE_ESMRAMC_SM_L2) /* D1:F0 PCIE* port*/ #define MCH_PCIE_DEV 1 From b66a67d7519cb7f980885af5391b1103c42e9b6d Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Wed, 15 Apr 2015 16:48:12 +0200 Subject: [PATCH 56/62] q35: add config space wmask for SMRAM and ESMRAMC Not all bits in SMRAM and ESMRAMC can be changed by the guest. Add wmask defines accordingly and set them in mch_reset(). Signed-off-by: Gerd Hoffmann Acked-by: Michael S. Tsirkin Signed-off-by: Paolo Bonzini --- hw/pci-host/q35.c | 2 ++ include/hw/pci-host/q35.h | 9 +++++++++ 2 files changed, 11 insertions(+) diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index 4e65bdc559..14e5aebee6 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -355,6 +355,8 @@ static void mch_reset(DeviceState *qdev) d->config[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_DEFAULT; d->config[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_DEFAULT; + d->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK; + d->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK; mch_update(mch); } diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h index d3c7bbbdfe..01b8492551 100644 --- a/include/hw/pci-host/q35.h +++ b/include/hw/pci-host/q35.h @@ -140,6 +140,11 @@ typedef struct Q35PCIHost { #define MCH_HOST_BRIDGE_UPPER_SYSTEM_BIOS_END 0x100000 #define MCH_HOST_BRIDGE_SMRAM_DEFAULT \ MCH_HOST_BRIDGE_SMRAM_C_BASE_SEG +#define MCH_HOST_BRIDGE_SMRAM_WMASK \ + (MCH_HOST_BRIDGE_SMRAM_D_OPEN | \ + MCH_HOST_BRIDGE_SMRAM_D_CLS | \ + MCH_HOST_BRIDGE_SMRAM_D_LCK | \ + MCH_HOST_BRIDGE_SMRAM_G_SMRAME) #define MCH_HOST_BRIDGE_ESMRAMC 0x9e #define MCH_HOST_BRIDGE_ESMRAMC_H_SMRAME ((uint8_t)(1 << 7)) @@ -156,6 +161,10 @@ typedef struct Q35PCIHost { (MCH_HOST_BRIDGE_ESMRAMC_SM_CACHE | \ MCH_HOST_BRIDGE_ESMRAMC_SM_L1 | \ MCH_HOST_BRIDGE_ESMRAMC_SM_L2) +#define MCH_HOST_BRIDGE_ESMRAMC_WMASK \ + (MCH_HOST_BRIDGE_ESMRAMC_H_SMRAME | \ + MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_MASK | \ + MCH_HOST_BRIDGE_ESMRAMC_T_EN) /* D1:F0 PCIE* port*/ #define MCH_PCIE_DEV 1 From 68c77acfb18d28933f17b1c2a842bd936ce7223b Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Tue, 14 Apr 2015 14:03:22 +0200 Subject: [PATCH 57/62] q35: implement SMRAM.D_LCK Once the SMRAM.D_LCK bit has been set by the guest several bits in SMRAM and ESMRAMC become readonly until the next machine reset. Implement this by updating the wmask accordingly when the guest sets the lock bit. As the lock it itself is locked down too we don't need to worry about the guest clearing the lock bit. Signed-off-by: Gerd Hoffmann Acked-by: Michael S. Tsirkin Signed-off-by: Paolo Bonzini --- hw/pci-host/q35.c | 8 +++++++- include/hw/pci-host/q35.h | 3 +++ 2 files changed, 10 insertions(+), 1 deletion(-) diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index 14e5aebee6..305994bc38 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -268,6 +268,13 @@ static void mch_update_smram(MCHPCIState *mch) PCIDevice *pd = PCI_DEVICE(mch); bool h_smrame = (pd->config[MCH_HOST_BRIDGE_ESMRAMC] & MCH_HOST_BRIDGE_ESMRAMC_H_SMRAME); + /* implement SMRAM.D_LCK */ + if (pd->config[MCH_HOST_BRIDGE_SMRAM] & MCH_HOST_BRIDGE_SMRAM_D_LCK) { + pd->config[MCH_HOST_BRIDGE_SMRAM] &= ~MCH_HOST_BRIDGE_SMRAM_D_OPEN; + pd->wmask[MCH_HOST_BRIDGE_SMRAM] = MCH_HOST_BRIDGE_SMRAM_WMASK_LCK; + pd->wmask[MCH_HOST_BRIDGE_ESMRAMC] = MCH_HOST_BRIDGE_ESMRAMC_WMASK_LCK; + } + memory_region_transaction_begin(); if (pd->config[MCH_HOST_BRIDGE_SMRAM] & SMRAM_D_OPEN) { @@ -297,7 +304,6 @@ static void mch_write_config(PCIDevice *d, { MCHPCIState *mch = MCH_PCI_DEVICE(d); - /* XXX: implement SMRAM.D_LOCK */ pci_default_write_config(d, address, val, len); if (ranges_overlap(address, len, MCH_HOST_BRIDGE_PAM0, diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h index 01b8492551..113cbe8190 100644 --- a/include/hw/pci-host/q35.h +++ b/include/hw/pci-host/q35.h @@ -145,6 +145,8 @@ typedef struct Q35PCIHost { MCH_HOST_BRIDGE_SMRAM_D_CLS | \ MCH_HOST_BRIDGE_SMRAM_D_LCK | \ MCH_HOST_BRIDGE_SMRAM_G_SMRAME) +#define MCH_HOST_BRIDGE_SMRAM_WMASK_LCK \ + MCH_HOST_BRIDGE_SMRAM_D_CLS #define MCH_HOST_BRIDGE_ESMRAMC 0x9e #define MCH_HOST_BRIDGE_ESMRAMC_H_SMRAME ((uint8_t)(1 << 7)) @@ -165,6 +167,7 @@ typedef struct Q35PCIHost { (MCH_HOST_BRIDGE_ESMRAMC_H_SMRAME | \ MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_MASK | \ MCH_HOST_BRIDGE_ESMRAMC_T_EN) +#define MCH_HOST_BRIDGE_ESMRAMC_WMASK_LCK 0 /* D1:F0 PCIE* port*/ #define MCH_PCIE_DEV 1 From 66e2ec2417e72edea1df5fb340b210100b0571b7 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Tue, 14 Apr 2015 15:11:36 +0200 Subject: [PATCH 58/62] q35: add test for SMRAM.D_LCK Signed-off-by: Gerd Hoffmann Acked-by: Michael S. Tsirkin [Fix compilation of the newly introduced test. - Paolo] Signed-off-by: Paolo Bonzini --- tests/Makefile | 3 ++ tests/q35-test.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 94 insertions(+) create mode 100644 tests/q35-test.c diff --git a/tests/Makefile b/tests/Makefile index 729b9694cf..c5e474455c 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -174,6 +174,8 @@ gcov-files-i386-y += hw/usb/dev-storage.c check-qtest-i386-y += tests/usb-hcd-xhci-test$(EXESUF) gcov-files-i386-y += hw/usb/hcd-xhci.c check-qtest-i386-y += tests/pc-cpu-test$(EXESUF) +check-qtest-i386-y += tests/q35-test$(EXESUF) +gcov-files-i386-y += hw/pci-host/q35.c check-qtest-i386-$(CONFIG_LINUX) += tests/vhost-user-test$(EXESUF) check-qtest-x86_64-y = $(check-qtest-i386-y) gcov-files-i386-y += i386-softmmu/hw/timer/mc146818rtc.c @@ -355,6 +357,7 @@ tests/boot-order-test$(EXESUF): tests/boot-order-test.o $(libqos-obj-y) tests/bios-tables-test$(EXESUF): tests/bios-tables-test.o $(libqos-obj-y) tests/tmp105-test$(EXESUF): tests/tmp105-test.o $(libqos-omap-obj-y) tests/i440fx-test$(EXESUF): tests/i440fx-test.o $(libqos-pc-obj-y) +tests/q35-test$(EXESUF): tests/q35-test.o $(libqos-pc-obj-y) tests/fw_cfg-test$(EXESUF): tests/fw_cfg-test.o $(libqos-pc-obj-y) tests/e1000-test$(EXESUF): tests/e1000-test.o tests/rtl8139-test$(EXESUF): tests/rtl8139-test.o $(libqos-pc-obj-y) diff --git a/tests/q35-test.c b/tests/q35-test.c new file mode 100644 index 0000000000..812abe5480 --- /dev/null +++ b/tests/q35-test.c @@ -0,0 +1,91 @@ +/* + * QTest testcase for Q35 northbridge + * + * Copyright (c) 2015 Red Hat, Inc. + * + * Author: Gerd Hoffmann + * + * This work is licensed under the terms of the GNU GPL, version 2 or later. + * See the COPYING file in the top-level directory. + */ + +#include +#include +#include "libqtest.h" +#include "libqos/pci.h" +#include "libqos/pci-pc.h" +#include "qemu/osdep.h" +#include "hw/pci-host/q35.h" + +static void smram_set_bit(QPCIDevice *pcidev, uint8_t mask, bool enabled) +{ + uint8_t smram; + + smram = qpci_config_readb(pcidev, MCH_HOST_BRIDGE_SMRAM); + if (enabled) { + smram |= mask; + } else { + smram &= ~mask; + } + qpci_config_writeb(pcidev, MCH_HOST_BRIDGE_SMRAM, smram); +} + +static bool smram_test_bit(QPCIDevice *pcidev, uint8_t mask) +{ + uint8_t smram; + + smram = qpci_config_readb(pcidev, MCH_HOST_BRIDGE_SMRAM); + return smram & mask; +} + +static void test_smram_lock(void) +{ + QPCIBus *pcibus; + QPCIDevice *pcidev; + QDict *response; + + pcibus = qpci_init_pc(); + g_assert(pcibus != NULL); + + pcidev = qpci_device_find(pcibus, 0); + g_assert(pcidev != NULL); + + /* check open is settable */ + smram_set_bit(pcidev, MCH_HOST_BRIDGE_SMRAM_D_OPEN, false); + g_assert(smram_test_bit(pcidev, MCH_HOST_BRIDGE_SMRAM_D_OPEN) == false); + smram_set_bit(pcidev, MCH_HOST_BRIDGE_SMRAM_D_OPEN, true); + g_assert(smram_test_bit(pcidev, MCH_HOST_BRIDGE_SMRAM_D_OPEN) == true); + + /* lock, check open is cleared & not settable */ + smram_set_bit(pcidev, MCH_HOST_BRIDGE_SMRAM_D_LCK, true); + g_assert(smram_test_bit(pcidev, MCH_HOST_BRIDGE_SMRAM_D_OPEN) == false); + smram_set_bit(pcidev, MCH_HOST_BRIDGE_SMRAM_D_OPEN, true); + g_assert(smram_test_bit(pcidev, MCH_HOST_BRIDGE_SMRAM_D_OPEN) == false); + + /* reset */ + response = qmp("{'execute': 'system_reset', 'arguments': {} }"); + g_assert(response); + g_assert(!qdict_haskey(response, "error")); + QDECREF(response); + + /* check open is settable again */ + smram_set_bit(pcidev, MCH_HOST_BRIDGE_SMRAM_D_OPEN, false); + g_assert(smram_test_bit(pcidev, MCH_HOST_BRIDGE_SMRAM_D_OPEN) == false); + smram_set_bit(pcidev, MCH_HOST_BRIDGE_SMRAM_D_OPEN, true); + g_assert(smram_test_bit(pcidev, MCH_HOST_BRIDGE_SMRAM_D_OPEN) == true); +} + +int main(int argc, char **argv) +{ + int ret; + + g_test_init(&argc, &argv, NULL); + + qtest_add_func("/q35/smram/lock", test_smram_lock); + + qtest_start("-M q35"); + ret = g_test_run(); + qtest_end(); + + return ret; +} From bafc90bdc594a4d04db846bd8712bdcec59678a8 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Mon, 20 Apr 2015 10:55:09 +0200 Subject: [PATCH 59/62] q35: implement TSEG TSEG provides larger amounts of SMRAM than the 128 KB available with legacy SMRAM and high SMRAM. Route access to tseg into nowhere when enabled, for both cpus and busmaster dma, and add tseg window to smram region, so cpus can access it in smm mode. Signed-off-by: Gerd Hoffmann Acked-by: Michael S. Tsirkin Signed-off-by: Paolo Bonzini --- hw/pci-host/q35.c | 70 +++++++++++++++++++++++++++++++++++++++ include/hw/pci-host/q35.h | 1 + 2 files changed, 71 insertions(+) diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index 305994bc38..bd7409456f 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -198,6 +198,28 @@ static const TypeInfo q35_host_info = { * MCH D0:F0 */ +static uint64_t tseg_blackhole_read(void *ptr, hwaddr reg, unsigned size) +{ + return 0xffffffff; +} + +static void tseg_blackhole_write(void *opaque, hwaddr addr, uint64_t val, + unsigned width) +{ + /* nothing */ +} + +static const MemoryRegionOps tseg_blackhole_ops = { + .read = tseg_blackhole_read, + .write = tseg_blackhole_write, + .endianness = DEVICE_NATIVE_ENDIAN, + .valid.min_access_size = 1, + .valid.max_access_size = 4, + .impl.min_access_size = 4, + .impl.max_access_size = 4, + .endianness = DEVICE_LITTLE_ENDIAN, +}; + /* PCIe MMCFG */ static void mch_update_pciexbar(MCHPCIState *mch) { @@ -267,6 +289,7 @@ static void mch_update_smram(MCHPCIState *mch) { PCIDevice *pd = PCI_DEVICE(mch); bool h_smrame = (pd->config[MCH_HOST_BRIDGE_ESMRAMC] & MCH_HOST_BRIDGE_ESMRAMC_H_SMRAME); + uint32_t tseg_size; /* implement SMRAM.D_LCK */ if (pd->config[MCH_HOST_BRIDGE_SMRAM] & MCH_HOST_BRIDGE_SMRAM_D_LCK) { @@ -296,6 +319,39 @@ static void mch_update_smram(MCHPCIState *mch) memory_region_set_enabled(&mch->high_smram, false); } + if (pd->config[MCH_HOST_BRIDGE_ESMRAMC] & MCH_HOST_BRIDGE_ESMRAMC_T_EN) { + switch (pd->config[MCH_HOST_BRIDGE_ESMRAMC] & + MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_MASK) { + case MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_1MB: + tseg_size = 1024 * 1024; + break; + case MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_2MB: + tseg_size = 1024 * 1024 * 2; + break; + case MCH_HOST_BRIDGE_ESMRAMC_TSEG_SZ_8MB: + tseg_size = 1024 * 1024 * 8; + break; + default: + tseg_size = 0; + break; + } + } else { + tseg_size = 0; + } + memory_region_del_subregion(mch->system_memory, &mch->tseg_blackhole); + memory_region_set_enabled(&mch->tseg_blackhole, tseg_size); + memory_region_set_size(&mch->tseg_blackhole, tseg_size); + memory_region_add_subregion_overlap(mch->system_memory, + mch->below_4g_mem_size - tseg_size, + &mch->tseg_blackhole, 1); + + memory_region_set_enabled(&mch->tseg_window, tseg_size); + memory_region_set_size(&mch->tseg_window, tseg_size); + memory_region_set_address(&mch->tseg_window, + mch->below_4g_mem_size - tseg_size); + memory_region_set_alias_offset(&mch->tseg_window, + mch->below_4g_mem_size - tseg_size); + memory_region_transaction_commit(); } @@ -443,6 +499,20 @@ static void mch_realize(PCIDevice *d, Error **errp) mch->ram_memory, 0xa0000, 0x20000); memory_region_set_enabled(&mch->high_smram, true); memory_region_add_subregion(&mch->smram, 0xfeda0000, &mch->high_smram); + + memory_region_init_io(&mch->tseg_blackhole, OBJECT(mch), + &tseg_blackhole_ops, NULL, + "tseg-blackhole", 0); + memory_region_set_enabled(&mch->tseg_blackhole, false); + memory_region_add_subregion_overlap(mch->system_memory, + mch->below_4g_mem_size, + &mch->tseg_blackhole, 1); + + memory_region_init_alias(&mch->tseg_window, OBJECT(mch), "tseg-window", + mch->ram_memory, mch->below_4g_mem_size, 0); + memory_region_set_enabled(&mch->tseg_window, false); + memory_region_add_subregion(&mch->smram, mch->below_4g_mem_size, + &mch->tseg_window); object_property_add_const_link(qdev_get_machine(), "smram", OBJECT(&mch->smram), &error_abort); diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h index 113cbe8190..dbe6dc05b5 100644 --- a/include/hw/pci-host/q35.h +++ b/include/hw/pci-host/q35.h @@ -54,6 +54,7 @@ typedef struct MCHPCIState { PAMMemoryRegion pam_regions[13]; MemoryRegion smram_region, open_high_smram; MemoryRegion smram, low_smram, high_smram; + MemoryRegion tseg_blackhole, tseg_window; PcPciInfo pci_info; ram_addr_t below_4g_mem_size; ram_addr_t above_4g_mem_size; From 11e66a15a084cb0820dba13f4ea3b15b0512fd39 Mon Sep 17 00:00:00 2001 From: Gerd Hoffmann Date: Wed, 6 May 2015 10:58:30 +0200 Subject: [PATCH 60/62] ich9: implement SMI_LOCK Add write mask for the smi enable register, so we can disable write access to certain bits. Open all bits on reset. Disable write access to GBL_SMI_EN when SMI_LOCK (in ich9 lpc pci config space) is set. Write access to SMI_LOCK itself is disabled too. Signed-off-by: Gerd Hoffmann Acked-by: Michael S. Tsirkin Signed-off-by: Paolo Bonzini --- hw/acpi/ich9.c | 4 +++- hw/isa/lpc_ich9.c | 19 +++++++++++++++++++ include/hw/acpi/ich9.h | 1 + include/hw/i386/ich9.h | 6 ++++++ 4 files changed, 29 insertions(+), 1 deletion(-) diff --git a/hw/acpi/ich9.c b/hw/acpi/ich9.c index 799351ea44..25bc023882 100644 --- a/hw/acpi/ich9.c +++ b/hw/acpi/ich9.c @@ -94,7 +94,8 @@ static void ich9_smi_writel(void *opaque, hwaddr addr, uint64_t val, ICH9LPCPMRegs *pm = opaque; switch (addr) { case 0: - pm->smi_en = val; + pm->smi_en &= ~pm->smi_en_wmask; + pm->smi_en |= (val & pm->smi_en_wmask); break; } } @@ -198,6 +199,7 @@ static void pm_reset(void *opaque) * support SMM mode. */ pm->smi_en |= ICH9_PMIO_SMI_EN_APMC_EN; } + pm->smi_en_wmask = ~0; acpi_update_sci(&pm->acpi_regs, pm->irq); } diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c index 18718d772e..71a9f7a716 100644 --- a/hw/isa/lpc_ich9.c +++ b/hw/isa/lpc_ich9.c @@ -407,12 +407,28 @@ static void ich9_lpc_rcba_update(ICH9LPCState *lpc, uint32_t rbca_old) } } +/* config:GEN_PMCON* */ +static void +ich9_lpc_pmcon_update(ICH9LPCState *lpc) +{ + uint16_t gen_pmcon_1 = pci_get_word(lpc->d.config + ICH9_LPC_GEN_PMCON_1); + uint16_t wmask; + + if (gen_pmcon_1 & ICH9_LPC_GEN_PMCON_1_SMI_LOCK) { + wmask = pci_get_word(lpc->d.wmask + ICH9_LPC_GEN_PMCON_1); + wmask &= ~ICH9_LPC_GEN_PMCON_1_SMI_LOCK; + pci_set_word(lpc->d.wmask + ICH9_LPC_GEN_PMCON_1, wmask); + lpc->pm.smi_en_wmask &= ~1; + } +} + static int ich9_lpc_post_load(void *opaque, int version_id) { ICH9LPCState *lpc = opaque; ich9_lpc_pmbase_update(lpc); ich9_lpc_rcba_update(lpc, 0 /* disabled ICH9_LPC_RBCA_EN */); + ich9_lpc_pmcon_update(lpc); return 0; } @@ -435,6 +451,9 @@ static void ich9_lpc_config_write(PCIDevice *d, if (ranges_overlap(addr, len, ICH9_LPC_PIRQE_ROUT, 4)) { pci_bus_fire_intx_routing_notifier(lpc->d.bus); } + if (ranges_overlap(addr, len, ICH9_LPC_GEN_PMCON_1, 8)) { + ich9_lpc_pmcon_update(lpc); + } } static void ich9_lpc_reset(DeviceState *qdev) diff --git a/include/hw/acpi/ich9.h b/include/hw/acpi/ich9.h index c2d3dba0c7..77cc65cbc2 100644 --- a/include/hw/acpi/ich9.h +++ b/include/hw/acpi/ich9.h @@ -39,6 +39,7 @@ typedef struct ICH9LPCPMRegs { MemoryRegion io_smi; uint32_t smi_en; + uint32_t smi_en_wmask; uint32_t smi_sts; qemu_irq irq; /* SCI */ diff --git a/include/hw/i386/ich9.h b/include/hw/i386/ich9.h index f4e522cc1f..a2cc15c915 100644 --- a/include/hw/i386/ich9.h +++ b/include/hw/i386/ich9.h @@ -152,6 +152,12 @@ Object *ich9_lpc_find(void); #define ICH9_LPC_PIRQ_ROUT_MASK Q35_MASK(8, 3, 0) #define ICH9_LPC_PIRQ_ROUT_DEFAULT 0x80 +#define ICH9_LPC_GEN_PMCON_1 0xa0 +#define ICH9_LPC_GEN_PMCON_1_SMI_LOCK (1 << 4) +#define ICH9_LPC_GEN_PMCON_2 0xa2 +#define ICH9_LPC_GEN_PMCON_3 0xa4 +#define ICH9_LPC_GEN_PMCON_LOCK 0xa6 + #define ICH9_LPC_RCBA 0xf0 #define ICH9_LPC_RCBA_BA_MASK Q35_MASK(32, 31, 14) #define ICH9_LPC_RCBA_EN 0x1 From 3bbf572345c65813f86a8fc434ea1b23beb08e16 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 3 Jun 2015 14:21:20 +0200 Subject: [PATCH 61/62] atomics: add explicit compiler fence in __atomic memory barriers __atomic_thread_fence does not include a compiler barrier; in the C++11 memory model, fences take effect in combination with other atomic operations. GCC implements this by making __atomic_load and __atomic_store access memory as if the pointer was volatile, and leaves no trace whatsoever of acquire and release fences in the compiler's intermediate representation. In QEMU, we want memory barriers to act on all memory, but at the same time we would like to use __atomic_thread_fence for portability reasons. Add compiler barriers manually around the __atomic_thread_fence. Message-Id: <1433334080-14912-1-git-send-email-pbonzini@redhat.com> Reviewed-by: Stefan Hajnoczi Signed-off-by: Paolo Bonzini --- include/qemu/atomic.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/include/qemu/atomic.h b/include/qemu/atomic.h index 98e05ca875..bd2c075343 100644 --- a/include/qemu/atomic.h +++ b/include/qemu/atomic.h @@ -99,7 +99,13 @@ #ifndef smp_wmb #ifdef __ATOMIC_RELEASE -#define smp_wmb() __atomic_thread_fence(__ATOMIC_RELEASE) +/* __atomic_thread_fence does not include a compiler barrier; instead, + * the barrier is part of __atomic_load/__atomic_store's "volatile-like" + * semantics. If smp_wmb() is a no-op, absence of the barrier means that + * the compiler is free to reorder stores on each side of the barrier. + * Add one here, and similarly in smp_rmb() and smp_read_barrier_depends(). + */ +#define smp_wmb() ({ barrier(); __atomic_thread_fence(__ATOMIC_RELEASE); barrier(); }) #else #define smp_wmb() __sync_synchronize() #endif @@ -107,7 +113,7 @@ #ifndef smp_rmb #ifdef __ATOMIC_ACQUIRE -#define smp_rmb() __atomic_thread_fence(__ATOMIC_ACQUIRE) +#define smp_rmb() ({ barrier(); __atomic_thread_fence(__ATOMIC_ACQUIRE); barrier(); }) #else #define smp_rmb() __sync_synchronize() #endif @@ -115,7 +121,7 @@ #ifndef smp_read_barrier_depends #ifdef __ATOMIC_CONSUME -#define smp_read_barrier_depends() __atomic_thread_fence(__ATOMIC_CONSUME) +#define smp_read_barrier_depends() ({ barrier(); __atomic_thread_fence(__ATOMIC_CONSUME); barrier(); }) #else #define smp_read_barrier_depends() barrier() #endif From 24a314269281a175b5540b3b6a8981ed2e8220e1 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 4 Jun 2015 16:38:29 +0200 Subject: [PATCH 62/62] update Linux headers from kvm/next This is kvm.git commit 05ff30bb56c6b3d3000519d6e02ed35678ddae3b. Signed-off-by: Paolo Bonzini --- include/standard-headers/linux/virtio_ring.h | 2 +- linux-headers/asm-x86/kvm.h | 14 +++++++++++++- linux-headers/linux/kvm.h | 7 ++++++- 3 files changed, 20 insertions(+), 3 deletions(-) diff --git a/include/standard-headers/linux/virtio_ring.h b/include/standard-headers/linux/virtio_ring.h index cc647d61fc..6fe276fafb 100644 --- a/include/standard-headers/linux/virtio_ring.h +++ b/include/standard-headers/linux/virtio_ring.h @@ -155,7 +155,7 @@ static inline unsigned vring_size(unsigned int num, unsigned long align) } /* The following is used with USED_EVENT_IDX and AVAIL_EVENT_IDX */ -/* Assuming a given event_idx value from the other size, if +/* Assuming a given event_idx value from the other side, if * we have just incremented index from old to new_idx, * should we trigger an event? */ static inline int vring_need_event(uint16_t event_idx, uint16_t new_idx, uint16_t old) diff --git a/linux-headers/asm-x86/kvm.h b/linux-headers/asm-x86/kvm.h index d7dcef58ae..a4ae82eb82 100644 --- a/linux-headers/asm-x86/kvm.h +++ b/linux-headers/asm-x86/kvm.h @@ -106,6 +106,8 @@ struct kvm_ioapic_state { #define KVM_IRQCHIP_IOAPIC 2 #define KVM_NR_IRQCHIPS 3 +#define KVM_RUN_X86_SMM (1 << 0) + /* for KVM_GET_REGS and KVM_SET_REGS */ struct kvm_regs { /* out (KVM_GET_REGS) / in (KVM_SET_REGS) */ @@ -281,6 +283,7 @@ struct kvm_reinject_control { #define KVM_VCPUEVENT_VALID_NMI_PENDING 0x00000001 #define KVM_VCPUEVENT_VALID_SIPI_VECTOR 0x00000002 #define KVM_VCPUEVENT_VALID_SHADOW 0x00000004 +#define KVM_VCPUEVENT_VALID_SMM 0x00000008 /* Interrupt shadow states */ #define KVM_X86_SHADOW_INT_MOV_SS 0x01 @@ -309,7 +312,13 @@ struct kvm_vcpu_events { } nmi; __u32 sipi_vector; __u32 flags; - __u32 reserved[10]; + struct { + __u8 smm; + __u8 pending; + __u8 smm_inside_nmi; + __u8 latched_init; + } smi; + __u32 reserved[9]; }; /* for KVM_GET/SET_DEBUGREGS */ @@ -345,4 +354,7 @@ struct kvm_xcrs { struct kvm_sync_regs { }; +#define KVM_QUIRK_LINT0_REENABLED (1 << 0) +#define KVM_QUIRK_CD_NW_CLEARED (1 << 1) + #endif /* _ASM_X86_KVM_H */ diff --git a/linux-headers/linux/kvm.h b/linux-headers/linux/kvm.h index b96d9787dd..fad9e5c561 100644 --- a/linux-headers/linux/kvm.h +++ b/linux-headers/linux/kvm.h @@ -202,7 +202,7 @@ struct kvm_run { __u32 exit_reason; __u8 ready_for_interrupt_injection; __u8 if_flag; - __u8 padding2[2]; + __u16 flags; /* in (pre_kvm_run), out (post_kvm_run) */ __u64 cr8; @@ -814,6 +814,9 @@ struct kvm_ppc_smmu_info { #define KVM_CAP_S390_INJECT_IRQ 113 #define KVM_CAP_S390_IRQ_STATE 114 #define KVM_CAP_PPC_HWRNG 115 +#define KVM_CAP_DISABLE_QUIRKS 116 +#define KVM_CAP_X86_SMM 117 +#define KVM_CAP_MULTI_ADDRESS_SPACE 118 #ifdef KVM_CAP_IRQ_ROUTING @@ -1199,6 +1202,8 @@ struct kvm_s390_ucas_mapping { /* Available with KVM_CAP_S390_IRQ_STATE */ #define KVM_S390_SET_IRQ_STATE _IOW(KVMIO, 0xb5, struct kvm_s390_irq_state) #define KVM_S390_GET_IRQ_STATE _IOW(KVMIO, 0xb6, struct kvm_s390_irq_state) +/* Available with KVM_CAP_X86_SMM */ +#define KVM_SMI _IO(KVMIO, 0xb7) #define KVM_DEV_ASSIGN_ENABLE_IOMMU (1 << 0) #define KVM_DEV_ASSIGN_PCI_2_3 (1 << 1)