From f41d912023e777d95e782a1ade6338c5fed8b842 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 8 Feb 2018 19:26:51 +0200 Subject: [PATCH 01/22] Revert "vhost: add traces for memory listeners" This reverts commit 0750b060216de69ed1f14bc08181bf4ad27fc622. Follow up patches are reworking the memory listeners, the new mechanism will add its own set of traces. Signed-off-by: Michael S. Tsirkin --- hw/virtio/trace-events | 6 ------ hw/virtio/vhost.c | 7 ------- 2 files changed, 13 deletions(-) diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index 2b8f81eb25..775461ae98 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -25,9 +25,3 @@ virtio_balloon_handle_output(const char *name, uint64_t gpa) "section name: %s g virtio_balloon_get_config(uint32_t num_pages, uint32_t actual) "num_pages: %d actual: %d" virtio_balloon_set_config(uint32_t actual, uint32_t oldactual) "actual: %d oldactual: %d" virtio_balloon_to_target(uint64_t target, uint32_t num_pages) "balloon target: 0x%"PRIx64" num_pages: %d" - -# hw/virtio/vhost.c -vhost_region_add(void *p, const char *mr) "dev %p mr %s" -vhost_region_del(void *p, const char *mr) "dev %p mr %s" -vhost_iommu_region_add(void *p, const char *mr) "dev %p mr %s" -vhost_iommu_region_del(void *p, const char *mr) "dev %p mr %s" diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 338e4395b7..23b9e17675 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -27,7 +27,6 @@ #include "hw/virtio/virtio-access.h" #include "migration/blocker.h" #include "sysemu/dma.h" -#include "trace.h" /* enabled until disconnected backend stabilizes */ #define _VHOST_DEBUG 1 @@ -694,7 +693,6 @@ static void vhost_region_add(MemoryListener *listener, return; } - trace_vhost_region_add(dev, section->mr->name ?: NULL); ++dev->n_mem_sections; dev->mem_sections = g_renew(MemoryRegionSection, dev->mem_sections, dev->n_mem_sections); @@ -714,7 +712,6 @@ static void vhost_region_del(MemoryListener *listener, return; } - trace_vhost_region_del(dev, section->mr->name ?: NULL); vhost_set_memory(listener, section, false); memory_region_unref(section->mr); for (i = 0; i < dev->n_mem_sections; ++i) { @@ -752,8 +749,6 @@ static void vhost_iommu_region_add(MemoryListener *listener, return; } - trace_vhost_iommu_region_add(dev, section->mr->name ?: NULL); - iommu = g_malloc0(sizeof(*iommu)); end = int128_add(int128_make64(section->offset_within_region), section->size); @@ -782,8 +777,6 @@ static void vhost_iommu_region_del(MemoryListener *listener, return; } - trace_vhost_iommu_region_del(dev, section->mr->name ?: NULL); - QLIST_FOREACH(iommu, &dev->iommu_list, iommu_next) { if (iommu->mr == section->mr && iommu->n.start == section->offset_within_region) { From 76143618a5a9f33e8bc968f27b3de79d79a286c5 Mon Sep 17 00:00:00 2001 From: Gal Hammer Date: Mon, 29 Jan 2018 16:20:56 +0200 Subject: [PATCH 02/22] virtio: remove event notifier cleanup call on de-assign The virtio_bus_set_host_notifier function no longer calls event_notifier_cleanup when a event notifier is removed. The commit updates the code to match the new behavior and calls virtio_bus_cleanup_host_notifier after the notifier was de-assign and no longer in use. This change is a preparation to allow executing the virtio_bus_set_host_notifier function in a memory region transaction. Signed-off-by: Gal Hammer Reviewed-by: Greg Kurz Tested-by: Greg Kurz Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/block/dataplane/virtio-blk.c | 2 ++ hw/scsi/virtio-scsi-dataplane.c | 2 ++ hw/virtio/vhost.c | 2 ++ hw/virtio/virtio-bus.c | 14 ++++++++++---- hw/virtio/virtio.c | 2 ++ include/hw/virtio/virtio-bus.h | 2 ++ 6 files changed, 20 insertions(+), 4 deletions(-) diff --git a/hw/block/dataplane/virtio-blk.c b/hw/block/dataplane/virtio-blk.c index f6fc639e88..2cb990997e 100644 --- a/hw/block/dataplane/virtio-blk.c +++ b/hw/block/dataplane/virtio-blk.c @@ -192,6 +192,7 @@ int virtio_blk_data_plane_start(VirtIODevice *vdev) fprintf(stderr, "virtio-blk failed to set host notifier (%d)\n", r); while (i--) { virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); + virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); } goto fail_guest_notifiers; } @@ -267,6 +268,7 @@ void virtio_blk_data_plane_stop(VirtIODevice *vdev) for (i = 0; i < nvqs; i++) { virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); + virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); } /* Clean up guest notifier (irq) */ diff --git a/hw/scsi/virtio-scsi-dataplane.c b/hw/scsi/virtio-scsi-dataplane.c index add4b3f4a4..1c33322ba6 100644 --- a/hw/scsi/virtio-scsi-dataplane.c +++ b/hw/scsi/virtio-scsi-dataplane.c @@ -175,6 +175,7 @@ fail_vrings: aio_context_release(s->ctx); for (i = 0; i < vs->conf.num_queues + 2; i++) { virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); + virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); } k->set_guest_notifiers(qbus->parent, vs->conf.num_queues + 2, false); fail_guest_notifiers: @@ -213,6 +214,7 @@ void virtio_scsi_dataplane_stop(VirtIODevice *vdev) for (i = 0; i < vs->conf.num_queues + 2; i++) { virtio_bus_set_host_notifier(VIRTIO_BUS(qbus), i, false); + virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), i); } /* Clean up guest notifier (irq) */ diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 23b9e17675..56885aaf13 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1418,6 +1418,7 @@ fail_vq: error_report("vhost VQ %d notifier cleanup error: %d", i, -r); } assert (e >= 0); + virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i); } virtio_device_release_ioeventfd(vdev); fail: @@ -1441,6 +1442,7 @@ void vhost_dev_disable_notifiers(struct vhost_dev *hdev, VirtIODevice *vdev) error_report("vhost VQ %d notifier cleanup failed: %d", i, -r); } assert (r >= 0); + virtio_bus_cleanup_host_notifier(VIRTIO_BUS(qbus), hdev->vq_index + i); } virtio_device_release_ioeventfd(vdev); } diff --git a/hw/virtio/virtio-bus.c b/hw/virtio/virtio-bus.c index 3042232daf..f9bc9ea46d 100644 --- a/hw/virtio/virtio-bus.c +++ b/hw/virtio/virtio-bus.c @@ -283,20 +283,26 @@ int virtio_bus_set_host_notifier(VirtioBusState *bus, int n, bool assign) r = k->ioeventfd_assign(proxy, notifier, n, true); if (r < 0) { error_report("%s: unable to assign ioeventfd: %d", __func__, r); - goto cleanup_event_notifier; + virtio_bus_cleanup_host_notifier(bus, n); } - return 0; } else { k->ioeventfd_assign(proxy, notifier, n, false); } -cleanup_event_notifier: + return r; +} + +void virtio_bus_cleanup_host_notifier(VirtioBusState *bus, int n) +{ + VirtIODevice *vdev = virtio_bus_get_device(bus); + VirtQueue *vq = virtio_get_queue(vdev, n); + EventNotifier *notifier = virtio_queue_get_host_notifier(vq); + /* Test and clear notifier after disabling event, * in case poll callback didn't have time to run. */ virtio_queue_host_notifier_read(notifier); event_notifier_cleanup(notifier); - return r; } static char *virtio_bus_get_dev_path(DeviceState *dev) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index d6002ee550..3667cd61fd 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2608,6 +2608,7 @@ assign_error: event_notifier_set_handler(&vq->host_notifier, NULL); r = virtio_bus_set_host_notifier(qbus, n, false); assert(r >= 0); + virtio_bus_cleanup_host_notifier(qbus, n); } return err; } @@ -2634,6 +2635,7 @@ static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev) event_notifier_set_handler(&vq->host_notifier, NULL); r = virtio_bus_set_host_notifier(qbus, n, false); assert(r >= 0); + virtio_bus_cleanup_host_notifier(qbus, n); } } diff --git a/include/hw/virtio/virtio-bus.h b/include/hw/virtio/virtio-bus.h index a63c1d216d..ced3d2d2b0 100644 --- a/include/hw/virtio/virtio-bus.h +++ b/include/hw/virtio/virtio-bus.h @@ -148,5 +148,7 @@ int virtio_bus_grab_ioeventfd(VirtioBusState *bus); void virtio_bus_release_ioeventfd(VirtioBusState *bus); /* Switch from/to the generic ioeventfd handler */ int virtio_bus_set_host_notifier(VirtioBusState *bus, int n, bool assign); +/* Tell the bus that the ioeventfd handler is no longer required. */ +void virtio_bus_cleanup_host_notifier(VirtioBusState *bus, int n); #endif /* VIRTIO_BUS_H */ From 710fccf80d787911120145f508f9c4c664cf0e03 Mon Sep 17 00:00:00 2001 From: Gal Hammer Date: Mon, 29 Jan 2018 16:20:57 +0200 Subject: [PATCH 03/22] virtio: improve virtio devices initialization time The loading time of a VM is quite significant when its virtio devices use a large amount of virt-queues (e.g. a virtio-serial device with max_ports=511). Most of the time is spend in the creation of all the required event notifiers (ioeventfd and memory regions). This patch pack all the changes to the memory regions in a single memory transaction. Reported-by: Sitong Liu Reported-by: Xiaoling Gao Signed-off-by: Gal Hammer Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Greg Kurz Tested-by: Greg Kurz --- hw/virtio/virtio.c | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 3667cd61fd..006d3d1148 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -2572,8 +2572,9 @@ static Property virtio_properties[] = { static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev) { VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev))); - int n, r, err; + int i, n, r, err; + memory_region_transaction_begin(); for (n = 0; n < VIRTIO_QUEUE_MAX; n++) { VirtQueue *vq = &vdev->vq[n]; if (!virtio_queue_get_num(vdev, n)) { @@ -2596,9 +2597,11 @@ static int virtio_device_start_ioeventfd_impl(VirtIODevice *vdev) } event_notifier_set(&vq->host_notifier); } + memory_region_transaction_commit(); return 0; assign_error: + i = n; /* save n for a second iteration after transaction is committed. */ while (--n >= 0) { VirtQueue *vq = &vdev->vq[n]; if (!virtio_queue_get_num(vdev, n)) { @@ -2608,7 +2611,14 @@ assign_error: event_notifier_set_handler(&vq->host_notifier, NULL); r = virtio_bus_set_host_notifier(qbus, n, false); assert(r >= 0); - virtio_bus_cleanup_host_notifier(qbus, n); + } + memory_region_transaction_commit(); + + while (--i >= 0) { + if (!virtio_queue_get_num(vdev, i)) { + continue; + } + virtio_bus_cleanup_host_notifier(qbus, i); } return err; } @@ -2626,6 +2636,7 @@ static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev) VirtioBusState *qbus = VIRTIO_BUS(qdev_get_parent_bus(DEVICE(vdev))); int n, r; + memory_region_transaction_begin(); for (n = 0; n < VIRTIO_QUEUE_MAX; n++) { VirtQueue *vq = &vdev->vq[n]; @@ -2635,6 +2646,13 @@ static void virtio_device_stop_ioeventfd_impl(VirtIODevice *vdev) event_notifier_set_handler(&vq->host_notifier, NULL); r = virtio_bus_set_host_notifier(qbus, n, false); assert(r >= 0); + } + memory_region_transaction_commit(); + + for (n = 0; n < VIRTIO_QUEUE_MAX; n++) { + if (!virtio_queue_get_num(vdev, n)) { + continue; + } virtio_bus_cleanup_host_notifier(qbus, n); } } From c44317efecb240b9b0951ad46ba56eb547114f1d Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 19 Jan 2018 10:39:18 +0000 Subject: [PATCH 04/22] vhost: Build temporary section list and deref after commit Igor spotted that there's a race, where a region that's unref'd in a _del callback might be free'd before the set_mem_table call in the _commit callback, and thus the vhost might end up using free memory. Fix this by building a complete temporary sections list, ref'ing every section (during add and nop) and then unref'ing the whole list right at the end of commit. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost.c | 73 +++++++++++++++++++++++++-------------- include/hw/virtio/vhost.h | 2 ++ 2 files changed, 49 insertions(+), 26 deletions(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 56885aaf13..92c9500bab 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -627,6 +627,8 @@ static void vhost_begin(MemoryListener *listener) memory_listener); dev->mem_changed_end_addr = 0; dev->mem_changed_start_addr = -1; + dev->tmp_sections = NULL; + dev->n_tmp_sections = 0; } static void vhost_commit(MemoryListener *listener) @@ -635,17 +637,25 @@ static void vhost_commit(MemoryListener *listener) memory_listener); hwaddr start_addr = 0; ram_addr_t size = 0; + MemoryRegionSection *old_sections; + int n_old_sections; + uint64_t log_size; int r; + old_sections = dev->mem_sections; + n_old_sections = dev->n_mem_sections; + dev->mem_sections = dev->tmp_sections; + dev->n_mem_sections = dev->n_tmp_sections; + if (!dev->memory_changed) { - return; + goto out; } if (!dev->started) { - return; + goto out; } if (dev->mem_changed_start_addr > dev->mem_changed_end_addr) { - return; + goto out; } if (dev->started) { @@ -662,7 +672,7 @@ static void vhost_commit(MemoryListener *listener) VHOST_OPS_DEBUG("vhost_set_mem_table failed"); } dev->memory_changed = false; - return; + goto out; } log_size = vhost_get_log_size(dev); /* We allocate an extra 4K bytes to log, @@ -681,6 +691,27 @@ static void vhost_commit(MemoryListener *listener) vhost_dev_log_resize(dev, log_size); } dev->memory_changed = false; + +out: + /* Deref the old list of sections, this must happen _after_ the + * vhost_set_mem_table to ensure the client isn't still using the + * section we're about to unref. + */ + while (n_old_sections--) { + memory_region_unref(old_sections[n_old_sections].mr); + } + g_free(old_sections); + return; +} + +static void vhost_add_section(struct vhost_dev *dev, + MemoryRegionSection *section) +{ + ++dev->n_tmp_sections; + dev->tmp_sections = g_renew(MemoryRegionSection, dev->tmp_sections, + dev->n_tmp_sections); + dev->tmp_sections[dev->n_tmp_sections - 1] = *section; + memory_region_ref(section->mr); } static void vhost_region_add(MemoryListener *listener, @@ -693,36 +724,31 @@ static void vhost_region_add(MemoryListener *listener, return; } - ++dev->n_mem_sections; - dev->mem_sections = g_renew(MemoryRegionSection, dev->mem_sections, - dev->n_mem_sections); - dev->mem_sections[dev->n_mem_sections - 1] = *section; - memory_region_ref(section->mr); + vhost_add_section(dev, section); vhost_set_memory(listener, section, true); } -static void vhost_region_del(MemoryListener *listener, +static void vhost_region_nop(MemoryListener *listener, MemoryRegionSection *section) { struct vhost_dev *dev = container_of(listener, struct vhost_dev, memory_listener); - int i; if (!vhost_section(section)) { return; } - vhost_set_memory(listener, section, false); - memory_region_unref(section->mr); - for (i = 0; i < dev->n_mem_sections; ++i) { - if (dev->mem_sections[i].offset_within_address_space - == section->offset_within_address_space) { - --dev->n_mem_sections; - memmove(&dev->mem_sections[i], &dev->mem_sections[i+1], - (dev->n_mem_sections - i) * sizeof(*dev->mem_sections)); - break; - } + vhost_add_section(dev, section); +} + +static void vhost_region_del(MemoryListener *listener, + MemoryRegionSection *section) +{ + if (!vhost_section(section)) { + return; } + + vhost_set_memory(listener, section, false); } static void vhost_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) @@ -789,11 +815,6 @@ static void vhost_iommu_region_del(MemoryListener *listener, } } -static void vhost_region_nop(MemoryListener *listener, - MemoryRegionSection *section) -{ -} - static int vhost_virtqueue_set_addr(struct vhost_dev *dev, struct vhost_virtqueue *vq, unsigned idx, bool enable_log) diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index 1dc2d73d76..09854b611b 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -60,6 +60,8 @@ struct vhost_dev { struct vhost_memory *mem; int n_mem_sections; MemoryRegionSection *mem_sections; + int n_tmp_sections; + MemoryRegionSection *tmp_sections; struct vhost_virtqueue *vqs; int nvqs; /* the first virtqueue which would be used by this vhost dev */ From 0ca1fd2d6878a360c9e3b5be6b0bbe5d3143280e Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 19 Jan 2018 10:39:19 +0000 Subject: [PATCH 05/22] vhost: Simplify ring verification checks vhost_verify_ring_mappings() were used to verify that rings are still accessible and related memory hasn't been moved after flatview is updated. It was doing checks by mapping ring's GPA+len and checking that HVA hadn't changed with new memory map. To avoid maybe expensive mapping call, we were identifying address range that changed and were doing mapping only if ring was in changed range. However it's not neccessary to perform ring's GPA mapping as we already have its current HVA and all we need is to verify that ring's GPA translates to the same HVA in updated flatview. This will allow the following patches to simplify the range comparison that was previously needed to avoid expensive verify_ring_mapping calls. Signed-off-by: Igor Mammedov with modifications by: Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost.c | 75 +++++++++++++++++++++++++---------------------- 1 file changed, 40 insertions(+), 35 deletions(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 92c9500bab..91cab5131c 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -455,35 +455,37 @@ static void vhost_memory_unmap(struct vhost_dev *dev, void *buffer, } } -static int vhost_verify_ring_part_mapping(struct vhost_dev *dev, - void *part, - uint64_t part_addr, - uint64_t part_size, - uint64_t start_addr, - uint64_t size) +static int vhost_verify_ring_part_mapping(void *ring_hva, + uint64_t ring_gpa, + uint64_t ring_size, + void *reg_hva, + uint64_t reg_gpa, + uint64_t reg_size) { - hwaddr l; - void *p; - int r = 0; + uint64_t hva_ring_offset; + uint64_t ring_last = range_get_last(ring_gpa, ring_size); + uint64_t reg_last = range_get_last(reg_gpa, reg_size); - if (!ranges_overlap(start_addr, size, part_addr, part_size)) { + if (ring_last < reg_gpa || ring_gpa > reg_last) { return 0; } - l = part_size; - p = vhost_memory_map(dev, part_addr, &l, 1); - if (!p || l != part_size) { - r = -ENOMEM; + /* check that whole ring's is mapped */ + if (ring_last > reg_last) { + return -ENOMEM; } - if (p != part) { - r = -EBUSY; + /* check that ring's MemoryRegion wasn't replaced */ + hva_ring_offset = ring_gpa - reg_gpa; + if (ring_hva != reg_hva + hva_ring_offset) { + return -EBUSY; } - vhost_memory_unmap(dev, p, l, 0, 0); - return r; + + return 0; } static int vhost_verify_ring_mappings(struct vhost_dev *dev, - uint64_t start_addr, - uint64_t size) + void *reg_hva, + uint64_t reg_gpa, + uint64_t reg_size) { int i, j; int r = 0; @@ -497,22 +499,25 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev, struct vhost_virtqueue *vq = dev->vqs + i; j = 0; - r = vhost_verify_ring_part_mapping(dev, vq->desc, vq->desc_phys, - vq->desc_size, start_addr, size); + r = vhost_verify_ring_part_mapping( + vq->desc, vq->desc_phys, vq->desc_size, + reg_hva, reg_gpa, reg_size); if (r) { break; } j++; - r = vhost_verify_ring_part_mapping(dev, vq->avail, vq->avail_phys, - vq->avail_size, start_addr, size); + r = vhost_verify_ring_part_mapping( + vq->desc, vq->desc_phys, vq->desc_size, + reg_hva, reg_gpa, reg_size); if (r) { break; } j++; - r = vhost_verify_ring_part_mapping(dev, vq->used, vq->used_phys, - vq->used_size, start_addr, size); + r = vhost_verify_ring_part_mapping( + vq->desc, vq->desc_phys, vq->desc_size, + reg_hva, reg_gpa, reg_size); if (r) { break; } @@ -635,13 +640,11 @@ static void vhost_commit(MemoryListener *listener) { struct vhost_dev *dev = container_of(listener, struct vhost_dev, memory_listener); - hwaddr start_addr = 0; - ram_addr_t size = 0; MemoryRegionSection *old_sections; int n_old_sections; - uint64_t log_size; int r; + int i; old_sections = dev->mem_sections; n_old_sections = dev->n_mem_sections; @@ -658,12 +661,14 @@ static void vhost_commit(MemoryListener *listener) goto out; } - if (dev->started) { - start_addr = dev->mem_changed_start_addr; - size = dev->mem_changed_end_addr - dev->mem_changed_start_addr + 1; - - r = vhost_verify_ring_mappings(dev, start_addr, size); - assert(r >= 0); + for (i = 0; i < dev->mem->nregions; i++) { + if (vhost_verify_ring_mappings(dev, + (void *)(uintptr_t)dev->mem->regions[i].userspace_addr, + dev->mem->regions[i].guest_phys_addr, + dev->mem->regions[i].memory_size)) { + error_report("Verify ring failure on region %d", i); + abort(); + } } if (!dev->log_enabled) { From 48d7c97577498657f9ccbcbf1f990fdb4b79501f Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 19 Jan 2018 10:39:20 +0000 Subject: [PATCH 06/22] vhost: Merge sections added to temporary list As sections are reported by the listener to the _nop and _add methods, add them to the temporary section list but now merge them with the previous section if the new one abuts and the backend allows. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/trace-events | 4 +++ hw/virtio/vhost.c | 76 +++++++++++++++++++++++++++++++++++++----- 2 files changed, 71 insertions(+), 9 deletions(-) diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index 775461ae98..ab9da06244 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -1,5 +1,9 @@ # See docs/devel/tracing.txt for syntax documentation. +# hw/virtio/vhost.c +vhost_region_add_section(const char *name, uint64_t gpa, uint64_t size, uint64_t host) "%s: 0x%"PRIx64"+0x%"PRIx64" @ 0x%"PRIx64 +vhost_region_add_section_abut(const char *name, uint64_t new_size) "%s: 0x%"PRIx64 + # hw/virtio/virtio.c virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u" virtqueue_fill(void *vq, const void *elem, unsigned int len, unsigned int idx) "vq %p elem %p len %u idx %u" diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 91cab5131c..351c691439 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -709,14 +709,71 @@ out: return; } -static void vhost_add_section(struct vhost_dev *dev, - MemoryRegionSection *section) +/* Adds the section data to the tmp_section structure. + * It relies on the listener calling us in memory address order + * and for each region (via the _add and _nop methods) to + * join neighbours. + */ +static void vhost_region_add_section(struct vhost_dev *dev, + MemoryRegionSection *section) { - ++dev->n_tmp_sections; - dev->tmp_sections = g_renew(MemoryRegionSection, dev->tmp_sections, - dev->n_tmp_sections); - dev->tmp_sections[dev->n_tmp_sections - 1] = *section; - memory_region_ref(section->mr); + bool need_add = true; + uint64_t mrs_size = int128_get64(section->size); + uint64_t mrs_gpa = section->offset_within_address_space; + uintptr_t mrs_host = (uintptr_t)memory_region_get_ram_ptr(section->mr) + + section->offset_within_region; + + trace_vhost_region_add_section(section->mr->name, mrs_gpa, mrs_size, + mrs_host); + + bool log_dirty = memory_region_get_dirty_log_mask(section->mr) & + ~(1 << DIRTY_MEMORY_MIGRATION); + if (log_dirty) { + return; + } + + if (dev->n_tmp_sections) { + /* Since we already have at least one section, lets see if + * this extends it; since we're scanning in order, we only + * have to look at the last one, and the FlatView that calls + * us shouldn't have overlaps. + */ + MemoryRegionSection *prev_sec = dev->tmp_sections + + (dev->n_tmp_sections - 1); + uint64_t prev_gpa_start = prev_sec->offset_within_address_space; + uint64_t prev_size = int128_get64(prev_sec->size); + uint64_t prev_gpa_end = range_get_last(prev_gpa_start, prev_size); + uint64_t prev_host_start = + (uintptr_t)memory_region_get_ram_ptr(prev_sec->mr) + + prev_sec->offset_within_region; + uint64_t prev_host_end = range_get_last(prev_host_start, prev_size); + + if (prev_gpa_end + 1 == mrs_gpa && + prev_host_end + 1 == mrs_host && + section->mr == prev_sec->mr && + (!dev->vhost_ops->vhost_backend_can_merge || + dev->vhost_ops->vhost_backend_can_merge(dev, + mrs_host, mrs_size, + prev_host_start, prev_size))) { + /* The two sections abut */ + need_add = false; + prev_sec->size = int128_add(prev_sec->size, section->size); + trace_vhost_region_add_section_abut(section->mr->name, + mrs_size + prev_size); + } + } + + if (need_add) { + ++dev->n_tmp_sections; + dev->tmp_sections = g_renew(MemoryRegionSection, dev->tmp_sections, + dev->n_tmp_sections); + dev->tmp_sections[dev->n_tmp_sections - 1] = *section; + /* The flatview isn't stable and we don't use it, making it NULL + * means we can memcmp the list. + */ + dev->tmp_sections[dev->n_tmp_sections - 1].fv = NULL; + memory_region_ref(section->mr); + } } static void vhost_region_add(MemoryListener *listener, @@ -728,11 +785,12 @@ static void vhost_region_add(MemoryListener *listener, if (!vhost_section(section)) { return; } + vhost_region_add_section(dev, section); - vhost_add_section(dev, section); vhost_set_memory(listener, section, true); } +/* Called on regions that have not changed */ static void vhost_region_nop(MemoryListener *listener, MemoryRegionSection *section) { @@ -743,7 +801,7 @@ static void vhost_region_nop(MemoryListener *listener, return; } - vhost_add_section(dev, section); + vhost_region_add_section(dev, section); } static void vhost_region_del(MemoryListener *listener, From ade6d081fc33948e56e655ee37b1306b6de6e0ef Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 19 Jan 2018 10:39:21 +0000 Subject: [PATCH 07/22] vhost: Regenerate region list from changed sections list Compare the sections list that's just been generated, and if it's different from the old one regenerate the region list. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Igor Mammedov --- hw/virtio/trace-events | 1 + hw/virtio/vhost.c | 39 +++++++++++++++++++++++++++++++++++---- 2 files changed, 36 insertions(+), 4 deletions(-) diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index ab9da06244..606e4fc949 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -1,6 +1,7 @@ # See docs/devel/tracing.txt for syntax documentation. # hw/virtio/vhost.c +vhost_commit(bool started, bool changed) "Started: %d Changed: %d" vhost_region_add_section(const char *name, uint64_t gpa, uint64_t size, uint64_t host) "%s: 0x%"PRIx64"+0x%"PRIx64" @ 0x%"PRIx64 vhost_region_add_section_abut(const char *name, uint64_t new_size) "%s: 0x%"PRIx64 diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 351c691439..f3badc4827 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -643,23 +643,54 @@ static void vhost_commit(MemoryListener *listener) MemoryRegionSection *old_sections; int n_old_sections; uint64_t log_size; + size_t regions_size; int r; int i; + bool changed = false; + /* Note we can be called before the device is started, but then + * starting the device calls set_mem_table, so we need to have + * built the data structures. + */ old_sections = dev->mem_sections; n_old_sections = dev->n_mem_sections; dev->mem_sections = dev->tmp_sections; dev->n_mem_sections = dev->n_tmp_sections; - if (!dev->memory_changed) { + if (dev->n_mem_sections != n_old_sections) { + changed = true; + } else { + /* Same size, lets check the contents */ + changed = n_old_sections && memcmp(dev->mem_sections, old_sections, + n_old_sections * sizeof(old_sections[0])) != 0; + } + + trace_vhost_commit(dev->started, changed); + if (!changed) { goto out; } + + /* Rebuild the regions list from the new sections list */ + regions_size = offsetof(struct vhost_memory, regions) + + dev->n_mem_sections * sizeof dev->mem->regions[0]; + dev->mem = g_realloc(dev->mem, regions_size); + dev->mem->nregions = dev->n_mem_sections; + used_memslots = dev->mem->nregions; + for (i = 0; i < dev->n_mem_sections; i++) { + struct vhost_memory_region *cur_vmr = dev->mem->regions + i; + struct MemoryRegionSection *mrs = dev->mem_sections + i; + + cur_vmr->guest_phys_addr = mrs->offset_within_address_space; + cur_vmr->memory_size = int128_get64(mrs->size); + cur_vmr->userspace_addr = + (uintptr_t)memory_region_get_ram_ptr(mrs->mr) + + mrs->offset_within_region; + cur_vmr->flags_padding = 0; + } + if (!dev->started) { goto out; } - if (dev->mem_changed_start_addr > dev->mem_changed_end_addr) { - goto out; - } for (i = 0; i < dev->mem->nregions; i++) { if (vhost_verify_ring_mappings(dev, From 06709c120ca3bae9fa243783b89005c08b71a440 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 19 Jan 2018 10:39:22 +0000 Subject: [PATCH 08/22] vhost: Clean out old vhost_set_memory and friends Remove the old update mechanism, vhost_set_memory, and the functions and flags it used. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost.c | 251 -------------------------------------- include/hw/virtio/vhost.h | 3 - 2 files changed, 254 deletions(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index f3badc4827..c7814d1d63 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -155,160 +155,6 @@ static void vhost_log_sync_range(struct vhost_dev *dev, } } -/* Assign/unassign. Keep an unsorted array of non-overlapping - * memory regions in dev->mem. */ -static void vhost_dev_unassign_memory(struct vhost_dev *dev, - uint64_t start_addr, - uint64_t size) -{ - int from, to, n = dev->mem->nregions; - /* Track overlapping/split regions for sanity checking. */ - int overlap_start = 0, overlap_end = 0, overlap_middle = 0, split = 0; - - for (from = 0, to = 0; from < n; ++from, ++to) { - struct vhost_memory_region *reg = dev->mem->regions + to; - uint64_t reglast; - uint64_t memlast; - uint64_t change; - - /* clone old region */ - if (to != from) { - memcpy(reg, dev->mem->regions + from, sizeof *reg); - } - - /* No overlap is simple */ - if (!ranges_overlap(reg->guest_phys_addr, reg->memory_size, - start_addr, size)) { - continue; - } - - /* Split only happens if supplied region - * is in the middle of an existing one. Thus it can not - * overlap with any other existing region. */ - assert(!split); - - reglast = range_get_last(reg->guest_phys_addr, reg->memory_size); - memlast = range_get_last(start_addr, size); - - /* Remove whole region */ - if (start_addr <= reg->guest_phys_addr && memlast >= reglast) { - --dev->mem->nregions; - --to; - ++overlap_middle; - continue; - } - - /* Shrink region */ - if (memlast >= reglast) { - reg->memory_size = start_addr - reg->guest_phys_addr; - assert(reg->memory_size); - assert(!overlap_end); - ++overlap_end; - continue; - } - - /* Shift region */ - if (start_addr <= reg->guest_phys_addr) { - change = memlast + 1 - reg->guest_phys_addr; - reg->memory_size -= change; - reg->guest_phys_addr += change; - reg->userspace_addr += change; - assert(reg->memory_size); - assert(!overlap_start); - ++overlap_start; - continue; - } - - /* This only happens if supplied region - * is in the middle of an existing one. Thus it can not - * overlap with any other existing region. */ - assert(!overlap_start); - assert(!overlap_end); - assert(!overlap_middle); - /* Split region: shrink first part, shift second part. */ - memcpy(dev->mem->regions + n, reg, sizeof *reg); - reg->memory_size = start_addr - reg->guest_phys_addr; - assert(reg->memory_size); - change = memlast + 1 - reg->guest_phys_addr; - reg = dev->mem->regions + n; - reg->memory_size -= change; - assert(reg->memory_size); - reg->guest_phys_addr += change; - reg->userspace_addr += change; - /* Never add more than 1 region */ - assert(dev->mem->nregions == n); - ++dev->mem->nregions; - ++split; - } -} - -/* Called after unassign, so no regions overlap the given range. */ -static void vhost_dev_assign_memory(struct vhost_dev *dev, - uint64_t start_addr, - uint64_t size, - uint64_t uaddr) -{ - int from, to; - struct vhost_memory_region *merged = NULL; - for (from = 0, to = 0; from < dev->mem->nregions; ++from, ++to) { - struct vhost_memory_region *reg = dev->mem->regions + to; - uint64_t prlast, urlast; - uint64_t pmlast, umlast; - uint64_t s, e, u; - - /* clone old region */ - if (to != from) { - memcpy(reg, dev->mem->regions + from, sizeof *reg); - } - prlast = range_get_last(reg->guest_phys_addr, reg->memory_size); - pmlast = range_get_last(start_addr, size); - urlast = range_get_last(reg->userspace_addr, reg->memory_size); - umlast = range_get_last(uaddr, size); - - /* check for overlapping regions: should never happen. */ - assert(prlast < start_addr || pmlast < reg->guest_phys_addr); - /* Not an adjacent or overlapping region - do not merge. */ - if ((prlast + 1 != start_addr || urlast + 1 != uaddr) && - (pmlast + 1 != reg->guest_phys_addr || - umlast + 1 != reg->userspace_addr)) { - continue; - } - - if (dev->vhost_ops->vhost_backend_can_merge && - !dev->vhost_ops->vhost_backend_can_merge(dev, uaddr, size, - reg->userspace_addr, - reg->memory_size)) { - continue; - } - - if (merged) { - --to; - assert(to >= 0); - } else { - merged = reg; - } - u = MIN(uaddr, reg->userspace_addr); - s = MIN(start_addr, reg->guest_phys_addr); - e = MAX(pmlast, prlast); - uaddr = merged->userspace_addr = u; - start_addr = merged->guest_phys_addr = s; - size = merged->memory_size = e - s + 1; - assert(merged->memory_size); - } - - if (!merged) { - struct vhost_memory_region *reg = dev->mem->regions + to; - memset(reg, 0, sizeof *reg); - reg->memory_size = size; - assert(reg->memory_size); - reg->guest_phys_addr = start_addr; - reg->userspace_addr = uaddr; - ++to; - } - assert(to <= dev->mem->nregions + 1); - dev->mem->nregions = to; -} - static uint64_t vhost_get_log_size(struct vhost_dev *dev) { uint64_t log_size = 0; @@ -531,95 +377,6 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev, return r; } -static struct vhost_memory_region *vhost_dev_find_reg(struct vhost_dev *dev, - uint64_t start_addr, - uint64_t size) -{ - int i, n = dev->mem->nregions; - for (i = 0; i < n; ++i) { - struct vhost_memory_region *reg = dev->mem->regions + i; - if (ranges_overlap(reg->guest_phys_addr, reg->memory_size, - start_addr, size)) { - return reg; - } - } - return NULL; -} - -static bool vhost_dev_cmp_memory(struct vhost_dev *dev, - uint64_t start_addr, - uint64_t size, - uint64_t uaddr) -{ - struct vhost_memory_region *reg = vhost_dev_find_reg(dev, start_addr, size); - uint64_t reglast; - uint64_t memlast; - - if (!reg) { - return true; - } - - reglast = range_get_last(reg->guest_phys_addr, reg->memory_size); - memlast = range_get_last(start_addr, size); - - /* Need to extend region? */ - if (start_addr < reg->guest_phys_addr || memlast > reglast) { - return true; - } - /* userspace_addr changed? */ - return uaddr != reg->userspace_addr + start_addr - reg->guest_phys_addr; -} - -static void vhost_set_memory(MemoryListener *listener, - MemoryRegionSection *section, - bool add) -{ - struct vhost_dev *dev = container_of(listener, struct vhost_dev, - memory_listener); - hwaddr start_addr = section->offset_within_address_space; - ram_addr_t size = int128_get64(section->size); - bool log_dirty = - memory_region_get_dirty_log_mask(section->mr) & ~(1 << DIRTY_MEMORY_MIGRATION); - int s = offsetof(struct vhost_memory, regions) + - (dev->mem->nregions + 1) * sizeof dev->mem->regions[0]; - void *ram; - - dev->mem = g_realloc(dev->mem, s); - - if (log_dirty) { - add = false; - } - - assert(size); - - /* Optimize no-change case. At least cirrus_vga does this a lot at this time. */ - ram = memory_region_get_ram_ptr(section->mr) + section->offset_within_region; - if (add) { - if (!vhost_dev_cmp_memory(dev, start_addr, size, (uintptr_t)ram)) { - /* Region exists with same address. Nothing to do. */ - return; - } - } else { - if (!vhost_dev_find_reg(dev, start_addr, size)) { - /* Removing region that we don't access. Nothing to do. */ - return; - } - } - - vhost_dev_unassign_memory(dev, start_addr, size); - if (add) { - /* Add given mapping, merging adjacent regions if any */ - vhost_dev_assign_memory(dev, start_addr, size, (uintptr_t)ram); - } else { - /* Remove old mapping for this memory, if any. */ - vhost_dev_unassign_memory(dev, start_addr, size); - } - dev->mem_changed_start_addr = MIN(dev->mem_changed_start_addr, start_addr); - dev->mem_changed_end_addr = MAX(dev->mem_changed_end_addr, start_addr + size - 1); - dev->memory_changed = true; - used_memslots = dev->mem->nregions; -} - static bool vhost_section(MemoryRegionSection *section) { return memory_region_is_ram(section->mr) && @@ -630,8 +387,6 @@ static void vhost_begin(MemoryListener *listener) { struct vhost_dev *dev = container_of(listener, struct vhost_dev, memory_listener); - dev->mem_changed_end_addr = 0; - dev->mem_changed_start_addr = -1; dev->tmp_sections = NULL; dev->n_tmp_sections = 0; } @@ -707,7 +462,6 @@ static void vhost_commit(MemoryListener *listener) if (r < 0) { VHOST_OPS_DEBUG("vhost_set_mem_table failed"); } - dev->memory_changed = false; goto out; } log_size = vhost_get_log_size(dev); @@ -726,7 +480,6 @@ static void vhost_commit(MemoryListener *listener) if (dev->log_size > log_size + VHOST_LOG_BUFFER) { vhost_dev_log_resize(dev, log_size); } - dev->memory_changed = false; out: /* Deref the old list of sections, this must happen _after_ the @@ -817,8 +570,6 @@ static void vhost_region_add(MemoryListener *listener, return; } vhost_region_add_section(dev, section); - - vhost_set_memory(listener, section, true); } /* Called on regions that have not changed */ @@ -842,7 +593,6 @@ static void vhost_region_del(MemoryListener *listener, return; } - vhost_set_memory(listener, section, false); } static void vhost_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) @@ -1457,7 +1207,6 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, hdev->log_size = 0; hdev->log_enabled = false; hdev->started = false; - hdev->memory_changed = false; memory_listener_register(&hdev->memory_listener, &address_space_memory); QLIST_INSERT_HEAD(&vhost_devices, hdev, entry); return 0; diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index 09854b611b..a7f449fa87 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -75,9 +75,6 @@ struct vhost_dev { bool log_enabled; uint64_t log_size; Error *migration_blocker; - bool memory_changed; - hwaddr mem_changed_start_addr; - hwaddr mem_changed_end_addr; const VhostOps *vhost_ops; void *opaque; struct vhost_log *log; From 938eeb640c0a006a3a53ea6f8b409cad56b80a55 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 19 Jan 2018 10:39:23 +0000 Subject: [PATCH 09/22] vhost: Merge and delete unused callbacks Now that the olf vhost_set_memory code is gone, the _nop and _add callbacks are identical and can be merged. The _del callback is no longer needed. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost.c | 33 +++++---------------------------- 1 file changed, 5 insertions(+), 28 deletions(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index c7814d1d63..326f168022 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -560,8 +560,9 @@ static void vhost_region_add_section(struct vhost_dev *dev, } } -static void vhost_region_add(MemoryListener *listener, - MemoryRegionSection *section) +/* Used for both add and nop callbacks */ +static void vhost_region_addnop(MemoryListener *listener, + MemoryRegionSection *section) { struct vhost_dev *dev = container_of(listener, struct vhost_dev, memory_listener); @@ -572,29 +573,6 @@ static void vhost_region_add(MemoryListener *listener, vhost_region_add_section(dev, section); } -/* Called on regions that have not changed */ -static void vhost_region_nop(MemoryListener *listener, - MemoryRegionSection *section) -{ - struct vhost_dev *dev = container_of(listener, struct vhost_dev, - memory_listener); - - if (!vhost_section(section)) { - return; - } - - vhost_region_add_section(dev, section); -} - -static void vhost_region_del(MemoryListener *listener, - MemoryRegionSection *section) -{ - if (!vhost_section(section)) { - return; - } - -} - static void vhost_iommu_unmap_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) { struct vhost_iommu *iommu = container_of(n, struct vhost_iommu, n); @@ -1163,9 +1141,8 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, hdev->memory_listener = (MemoryListener) { .begin = vhost_begin, .commit = vhost_commit, - .region_add = vhost_region_add, - .region_del = vhost_region_del, - .region_nop = vhost_region_nop, + .region_add = vhost_region_addnop, + .region_nop = vhost_region_addnop, .log_start = vhost_log_start, .log_stop = vhost_log_stop, .log_sync = vhost_log_sync, From aa3c40f6bfae251a483b5c19f5ceb8e11bdbdb18 Mon Sep 17 00:00:00 2001 From: "Dr. David Alan Gilbert" Date: Fri, 19 Jan 2018 10:39:24 +0000 Subject: [PATCH 10/22] vhost: Move log_dirty check Move the log_dirty check into vhost_section. Signed-off-by: Dr. David Alan Gilbert Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/trace-events | 1 + hw/virtio/vhost.c | 20 +++++++++++++------- 2 files changed, 14 insertions(+), 7 deletions(-) diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index 606e4fc949..742ff0f90b 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -4,6 +4,7 @@ vhost_commit(bool started, bool changed) "Started: %d Changed: %d" vhost_region_add_section(const char *name, uint64_t gpa, uint64_t size, uint64_t host) "%s: 0x%"PRIx64"+0x%"PRIx64" @ 0x%"PRIx64 vhost_region_add_section_abut(const char *name, uint64_t new_size) "%s: 0x%"PRIx64 +vhost_section(const char *name, int r) "%s:%d" # hw/virtio/virtio.c virtqueue_alloc_element(void *elem, size_t sz, unsigned in_num, unsigned out_num) "elem %p size %zd in_num %u out_num %u" diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 326f168022..4a44e6e6bf 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -27,6 +27,7 @@ #include "hw/virtio/virtio-access.h" #include "migration/blocker.h" #include "sysemu/dma.h" +#include "trace.h" /* enabled until disconnected backend stabilizes */ #define _VHOST_DEBUG 1 @@ -379,8 +380,19 @@ static int vhost_verify_ring_mappings(struct vhost_dev *dev, static bool vhost_section(MemoryRegionSection *section) { - return memory_region_is_ram(section->mr) && + bool result; + bool log_dirty = memory_region_get_dirty_log_mask(section->mr) & + ~(1 << DIRTY_MEMORY_MIGRATION); + result = memory_region_is_ram(section->mr) && !memory_region_is_rom(section->mr); + + /* Vhost doesn't handle any block which is doing dirty-tracking other + * than migration; this typically fires on VGA areas. + */ + result &= !log_dirty; + + trace_vhost_section(section->mr->name, result); + return result; } static void vhost_begin(MemoryListener *listener) @@ -510,12 +522,6 @@ static void vhost_region_add_section(struct vhost_dev *dev, trace_vhost_region_add_section(section->mr->name, mrs_gpa, mrs_size, mrs_host); - bool log_dirty = memory_region_get_dirty_log_mask(section->mr) & - ~(1 << DIRTY_MEMORY_MIGRATION); - if (log_dirty) { - return; - } - if (dev->n_tmp_sections) { /* Since we already have at least one section, lets see if * this extends it; since we're scanning in order, we only From ed247f40db84c8bd4bb7d10948702cd47cc4d5ae Mon Sep 17 00:00:00 2001 From: Laszlo Ersek Date: Wed, 7 Feb 2018 13:10:27 +0100 Subject: [PATCH 11/22] pci-bridge/i82801b11: clear bridge registers on platform reset The "i82801b11-bridge" device model is a descendant of "base-pci-bridge" (TYPE_PCI_BRIDGE). However, unlike other similar devices, such as - pci-bridge, - pcie-pci-bridge, - PCIE Root Port, - xio3130 switch upstream and downstream ports, - dec-21154-p2p-bridge, - pbm-bridge, - xilinx-pcie-root, "i82801b11-bridge" does not clear the bridge specific registers at platform reset. This is a problem because devices on "i82801b11-bridge" continue to respond to config space cycles after platform reset, when addressed with the bus number that was previously programmed into the secondary bus number register of "i82801b11-bridge". This error breaks OVMF's search for extra (PXB) root buses, for example. The device class reset method for "i82801b11-bridge" is currently NULL; set it directly to pci_bridge_reset(), like the last three bridge models in the above listing do. Cc: "Michael S. Tsirkin" Cc: Marcel Apfelbaum Cc: qemu-stable@nongnu.org Ref: https://bugzilla.redhat.com/show_bug.cgi?id=1541839 Signed-off-by: Laszlo Ersek Reviewed-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-bridge/i82801b11.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/pci-bridge/i82801b11.c b/hw/pci-bridge/i82801b11.c index cb522bf30c..ebf7f5f0e8 100644 --- a/hw/pci-bridge/i82801b11.c +++ b/hw/pci-bridge/i82801b11.c @@ -98,6 +98,7 @@ static void i82801b11_bridge_class_init(ObjectClass *klass, void *data) k->realize = i82801b11_bridge_realize; k->config_write = pci_bridge_write_config; dc->vmsd = &i82801b11_bridge_dev_vmstate; + dc->reset = pci_bridge_reset; set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); } From 9d6b9db19c4b99ce5a1ad75b490c01edd2c2b0cf Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Tue, 6 Feb 2018 15:39:33 +0800 Subject: [PATCH 12/22] pci/bus: let it has higher migration priority In the past, we prioritized IOMMU migration so that we have such a priority order: IOMMU > PCI Devices When migrating a guest with both vIOMMU and a pcie-root-port, we'll always migrate vIOMMU first, since pci buses will be seen to have the same priority of general PCI devices. That's problematic. The thing is that PCI bus number information is stored in the root port, and that is needed by vIOMMU during post_load(), e.g., to figure out context entry for a device. If we don't have correct bus numbers for devices, we won't be able to recover device state of the DMAR memory regions, and things will be messed up. So let's boost the PCIe root ports to be even with higher priority: PCIe Root Port > IOMMU > PCI Devices A smoke test shows that this patch fixes bug 1538953. Also, apply this rule to all the PCI bus/bridge devices: ioh3420, xio3130_downstream, xio3130_upstream, pcie_pci_bridge, pci-pci bridge, i82801b11. I noted that we set pcie_pci_bridge_dev_vmstate twice. Clean that up together. CC: Alex Williamson CC: Marcel Apfelbaum CC: Michael S. Tsirkin CC: Dr. David Alan Gilbert CC: Juan Quintela CC: Laurent Vivier Bug: https://bugzilla.redhat.com/show_bug.cgi?id=1538953 Reported-by: Maxime Coquelin Signed-off-by: Peter Xu Reviewed-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-bridge/gen_pcie_root_port.c | 1 + hw/pci-bridge/i82801b11.c | 1 + hw/pci-bridge/ioh3420.c | 1 + hw/pci-bridge/pci_bridge_dev.c | 1 + hw/pci-bridge/pcie_pci_bridge.c | 2 +- hw/pci-bridge/xio3130_downstream.c | 1 + hw/pci-bridge/xio3130_upstream.c | 1 + include/migration/vmstate.h | 1 + 8 files changed, 8 insertions(+), 1 deletion(-) diff --git a/hw/pci-bridge/gen_pcie_root_port.c b/hw/pci-bridge/gen_pcie_root_port.c index 3dbacc6cea..d117e20325 100644 --- a/hw/pci-bridge/gen_pcie_root_port.c +++ b/hw/pci-bridge/gen_pcie_root_port.c @@ -101,6 +101,7 @@ static void gen_rp_realize(DeviceState *dev, Error **errp) static const VMStateDescription vmstate_rp_dev = { .name = "pcie-root-port", + .priority = MIG_PRI_PCI_BUS, .version_id = 1, .minimum_version_id = 1, .post_load = pcie_cap_slot_post_load, diff --git a/hw/pci-bridge/i82801b11.c b/hw/pci-bridge/i82801b11.c index ebf7f5f0e8..620b43518e 100644 --- a/hw/pci-bridge/i82801b11.c +++ b/hw/pci-bridge/i82801b11.c @@ -80,6 +80,7 @@ err_bridge: static const VMStateDescription i82801b11_bridge_dev_vmstate = { .name = "i82801b11_bridge", + .priority = MIG_PRI_PCI_BUS, .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(parent_obj, PCIBridge), VMSTATE_END_OF_LIST() diff --git a/hw/pci-bridge/ioh3420.c b/hw/pci-bridge/ioh3420.c index 5f56a2feb6..a7bfbdd238 100644 --- a/hw/pci-bridge/ioh3420.c +++ b/hw/pci-bridge/ioh3420.c @@ -83,6 +83,7 @@ static void ioh3420_interrupts_uninit(PCIDevice *d) static const VMStateDescription vmstate_ioh3420 = { .name = "ioh-3240-express-root-port", + .priority = MIG_PRI_PCI_BUS, .version_id = 1, .minimum_version_id = 1, .post_load = pcie_cap_slot_post_load, diff --git a/hw/pci-bridge/pci_bridge_dev.c b/hw/pci-bridge/pci_bridge_dev.c index d56f6638c2..b2d861d216 100644 --- a/hw/pci-bridge/pci_bridge_dev.c +++ b/hw/pci-bridge/pci_bridge_dev.c @@ -174,6 +174,7 @@ static bool pci_device_shpc_present(void *opaque, int version_id) static const VMStateDescription pci_bridge_dev_vmstate = { .name = "pci_bridge", + .priority = MIG_PRI_PCI_BUS, .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(parent_obj, PCIBridge), SHPC_VMSTATE(shpc, PCIDevice, pci_device_shpc_present), diff --git a/hw/pci-bridge/pcie_pci_bridge.c b/hw/pci-bridge/pcie_pci_bridge.c index a4d827c99d..e5ac7974cf 100644 --- a/hw/pci-bridge/pcie_pci_bridge.c +++ b/hw/pci-bridge/pcie_pci_bridge.c @@ -129,6 +129,7 @@ static Property pcie_pci_bridge_dev_properties[] = { static const VMStateDescription pcie_pci_bridge_dev_vmstate = { .name = TYPE_PCIE_PCI_BRIDGE_DEV, + .priority = MIG_PRI_PCI_BUS, .fields = (VMStateField[]) { VMSTATE_PCI_DEVICE(parent_obj, PCIBridge), SHPC_VMSTATE(shpc, PCIDevice, NULL), @@ -178,7 +179,6 @@ static void pcie_pci_bridge_class_init(ObjectClass *klass, void *data) k->config_write = pcie_pci_bridge_write_config; dc->vmsd = &pcie_pci_bridge_dev_vmstate; dc->props = pcie_pci_bridge_dev_properties; - dc->vmsd = &pcie_pci_bridge_dev_vmstate; dc->reset = &pcie_pci_bridge_reset; set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); hc->plug = pcie_pci_bridge_hotplug_cb; diff --git a/hw/pci-bridge/xio3130_downstream.c b/hw/pci-bridge/xio3130_downstream.c index 1e09d2afb7..4dd2e65118 100644 --- a/hw/pci-bridge/xio3130_downstream.c +++ b/hw/pci-bridge/xio3130_downstream.c @@ -161,6 +161,7 @@ static Property xio3130_downstream_props[] = { static const VMStateDescription vmstate_xio3130_downstream = { .name = "xio3130-express-downstream-port", + .priority = MIG_PRI_PCI_BUS, .version_id = 1, .minimum_version_id = 1, .post_load = pcie_cap_slot_post_load, diff --git a/hw/pci-bridge/xio3130_upstream.c b/hw/pci-bridge/xio3130_upstream.c index 227997ce46..c5f02a6ee8 100644 --- a/hw/pci-bridge/xio3130_upstream.c +++ b/hw/pci-bridge/xio3130_upstream.c @@ -133,6 +133,7 @@ PCIEPort *xio3130_upstream_init(PCIBus *bus, int devfn, bool multifunction, static const VMStateDescription vmstate_xio3130_upstream = { .name = "xio3130-express-upstream-port", + .priority = MIG_PRI_PCI_BUS, .version_id = 1, .minimum_version_id = 1, .fields = (VMStateField[]) { diff --git a/include/migration/vmstate.h b/include/migration/vmstate.h index 8c3889433c..df463fd33d 100644 --- a/include/migration/vmstate.h +++ b/include/migration/vmstate.h @@ -148,6 +148,7 @@ enum VMStateFlags { typedef enum { MIG_PRI_DEFAULT = 0, MIG_PRI_IOMMU, /* Must happen before PCI devices */ + MIG_PRI_PCI_BUS, /* Must happen before IOMMU */ MIG_PRI_GICV3_ITS, /* Must happen before PCI devices */ MIG_PRI_GICV3, /* Must happen before the ITS */ MIG_PRI_MAX, From 0ebf9a748881fa85491501f0fa3ca5aec79b7afc Mon Sep 17 00:00:00 2001 From: Changpeng Liu Date: Tue, 6 Feb 2018 09:35:34 +0800 Subject: [PATCH 13/22] virtio-blk: enable multiple vectors when using multiple I/O queues Currently virtio-pci driver hardcoded 2 vectors for virtio-blk device, for multiple I/O queues scenario, all the I/O queues will share one interrupt vector, while here, enable multiple vectors according to the number of I/O queues. Signed-off-by: Changpeng Liu Reviewed-by: Paolo Bonzini Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-pci.c | 14 ++++++++++++-- include/hw/compat.h | 8 ++++++++ 2 files changed, 20 insertions(+), 2 deletions(-) diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index c20537f31d..b55dfcf05c 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1932,7 +1932,8 @@ static Property virtio_blk_pci_properties[] = { DEFINE_PROP_UINT32("class", VirtIOPCIProxy, class_code, 0), DEFINE_PROP_BIT("ioeventfd", VirtIOPCIProxy, flags, VIRTIO_PCI_FLAG_USE_IOEVENTFD_BIT, true), - DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2), + DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, + DEV_NVECTORS_UNSPECIFIED), DEFINE_PROP_END_OF_LIST(), }; @@ -1941,6 +1942,10 @@ static void virtio_blk_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) VirtIOBlkPCI *dev = VIRTIO_BLK_PCI(vpci_dev); DeviceState *vdev = DEVICE(&dev->vdev); + if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) { + vpci_dev->nvectors = dev->vdev.conf.num_queues + 1; + } + qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus)); object_property_set_bool(OBJECT(vdev), true, "realized", errp); } @@ -1983,7 +1988,8 @@ static const TypeInfo virtio_blk_pci_info = { static Property vhost_user_blk_pci_properties[] = { DEFINE_PROP_UINT32("class", VirtIOPCIProxy, class_code, 0), - DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, 2), + DEFINE_PROP_UINT32("vectors", VirtIOPCIProxy, nvectors, + DEV_NVECTORS_UNSPECIFIED), DEFINE_PROP_END_OF_LIST(), }; @@ -1992,6 +1998,10 @@ static void vhost_user_blk_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) VHostUserBlkPCI *dev = VHOST_USER_BLK_PCI(vpci_dev); DeviceState *vdev = DEVICE(&dev->vdev); + if (vpci_dev->nvectors == DEV_NVECTORS_UNSPECIFIED) { + vpci_dev->nvectors = dev->vdev.num_queues + 1; + } + qdev_set_parent_bus(vdev, BUS(&vpci_dev->bus)); object_property_set_bool(OBJECT(vdev), true, "realized", errp); } diff --git a/include/hw/compat.h b/include/hw/compat.h index 7f31850dfa..bc9e3a6627 100644 --- a/include/hw/compat.h +++ b/include/hw/compat.h @@ -6,6 +6,14 @@ .driver = "hpet",\ .property = "hpet-offset-saved",\ .value = "false",\ + },{\ + .driver = "virtio-blk-pci",\ + .property = "vectors",\ + .value = "2",\ + },{\ + .driver = "vhost-user-blk-pci",\ + .property = "vectors",\ + .value = "2",\ }, #define HW_COMPAT_2_10 \ From d61a363d3e801fcfdba0767ba0f2b44cd458be37 Mon Sep 17 00:00:00 2001 From: Yoni Bettan Date: Tue, 16 Jan 2018 14:34:56 +0200 Subject: [PATCH 14/22] pci: removed the is_express field since a uniform interface was inserted according to Eduardo Habkost's commit fd3b02c889 all PCIEs now implement INTERFACE_PCIE_DEVICE so we don't need is_express field anymore. Devices that implements only INTERFACE_PCIE_DEVICE (is_express == 1) or devices that implements only INTERFACE_CONVENTIONAL_PCI_DEVICE (is_express == 0) where not affected by the change. The only devices that were affected are those that are hybrid and also had (is_express == 1) - therefor only: - hw/vfio/pci.c - hw/usb/hcd-xhci.c - hw/xen/xen_pt.c For those 3 I made sure that QEMU_PCI_CAP_EXPRESS is on in instance_init() Reviewed-by: Marcel Apfelbaum Reviewed-by: Eduardo Habkost Signed-off-by: Yoni Bettan Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- docs/pcie_pci_bridge.txt | 2 +- hw/block/nvme.c | 1 - hw/net/e1000e.c | 1 - hw/pci-bridge/pcie_pci_bridge.c | 1 - hw/pci-bridge/pcie_root_port.c | 1 - hw/pci-bridge/xio3130_downstream.c | 1 - hw/pci-bridge/xio3130_upstream.c | 1 - hw/pci-host/xilinx-pcie.c | 1 - hw/pci/pci.c | 8 ++++++-- hw/scsi/megasas.c | 4 ---- hw/usb/hcd-xhci.c | 9 ++++++++- hw/vfio/pci.c | 5 ++++- hw/xen/xen_pt.c | 9 ++++++++- include/hw/pci/pci.h | 3 --- 14 files changed, 27 insertions(+), 20 deletions(-) diff --git a/docs/pcie_pci_bridge.txt b/docs/pcie_pci_bridge.txt index 5a4203f97c..ab35ebf3ca 100644 --- a/docs/pcie_pci_bridge.txt +++ b/docs/pcie_pci_bridge.txt @@ -110,5 +110,5 @@ To enable device hot-plug into the bridge on Linux there're 3 ways: Implementation ============== The PCIE-PCI bridge is based on PCI-PCI bridge, but also accumulates PCI Express -features as a PCI Express device (is_express=1). +features as a PCI Express device. diff --git a/hw/block/nvme.c b/hw/block/nvme.c index 51a58fefba..85d2406400 100644 --- a/hw/block/nvme.c +++ b/hw/block/nvme.c @@ -1360,7 +1360,6 @@ static void nvme_class_init(ObjectClass *oc, void *data) pc->vendor_id = PCI_VENDOR_ID_INTEL; pc->device_id = 0x5845; pc->revision = 2; - pc->is_express = 1; set_bit(DEVICE_CATEGORY_STORAGE, dc->categories); dc->desc = "Non-Volatile Memory Express"; diff --git a/hw/net/e1000e.c b/hw/net/e1000e.c index 191398a3d5..16a9417a85 100644 --- a/hw/net/e1000e.c +++ b/hw/net/e1000e.c @@ -675,7 +675,6 @@ static void e1000e_class_init(ObjectClass *class, void *data) c->revision = 0; c->romfile = "efi-e1000e.rom"; c->class_id = PCI_CLASS_NETWORK_ETHERNET; - c->is_express = 1; dc->desc = "Intel 82574L GbE Controller"; dc->reset = e1000e_qdev_reset; diff --git a/hw/pci-bridge/pcie_pci_bridge.c b/hw/pci-bridge/pcie_pci_bridge.c index e5ac7974cf..04cf5a6a92 100644 --- a/hw/pci-bridge/pcie_pci_bridge.c +++ b/hw/pci-bridge/pcie_pci_bridge.c @@ -170,7 +170,6 @@ static void pcie_pci_bridge_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); HotplugHandlerClass *hc = HOTPLUG_HANDLER_CLASS(klass); - k->is_express = 1; k->is_bridge = 1; k->vendor_id = PCI_VENDOR_ID_REDHAT; k->device_id = PCI_DEVICE_ID_REDHAT_PCIE_BRIDGE; diff --git a/hw/pci-bridge/pcie_root_port.c b/hw/pci-bridge/pcie_root_port.c index 9b6e4ce512..45f9e8cd4a 100644 --- a/hw/pci-bridge/pcie_root_port.c +++ b/hw/pci-bridge/pcie_root_port.c @@ -145,7 +145,6 @@ static void rp_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); - k->is_express = 1; k->is_bridge = 1; k->config_write = rp_write_config; k->realize = rp_realize; diff --git a/hw/pci-bridge/xio3130_downstream.c b/hw/pci-bridge/xio3130_downstream.c index 4dd2e65118..b202657954 100644 --- a/hw/pci-bridge/xio3130_downstream.c +++ b/hw/pci-bridge/xio3130_downstream.c @@ -178,7 +178,6 @@ static void xio3130_downstream_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); - k->is_express = 1; k->is_bridge = 1; k->config_write = xio3130_downstream_write_config; k->realize = xio3130_downstream_realize; diff --git a/hw/pci-bridge/xio3130_upstream.c b/hw/pci-bridge/xio3130_upstream.c index c5f02a6ee8..556f471a83 100644 --- a/hw/pci-bridge/xio3130_upstream.c +++ b/hw/pci-bridge/xio3130_upstream.c @@ -149,7 +149,6 @@ static void xio3130_upstream_class_init(ObjectClass *klass, void *data) DeviceClass *dc = DEVICE_CLASS(klass); PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); - k->is_express = 1; k->is_bridge = 1; k->config_write = xio3130_upstream_write_config; k->realize = xio3130_upstream_realize; diff --git a/hw/pci-host/xilinx-pcie.c b/hw/pci-host/xilinx-pcie.c index 53b561f81f..044e312dc1 100644 --- a/hw/pci-host/xilinx-pcie.c +++ b/hw/pci-host/xilinx-pcie.c @@ -297,7 +297,6 @@ static void xilinx_pcie_root_class_init(ObjectClass *klass, void *data) k->device_id = 0x7021; k->revision = 0; k->class_id = PCI_CLASS_BRIDGE_HOST; - k->is_express = true; k->is_bridge = true; k->realize = xilinx_pcie_root_realize; k->exit = pci_bridge_exitfn; diff --git a/hw/pci/pci.c b/hw/pci/pci.c index fc25cdecba..ef4342293e 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -2005,11 +2005,15 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp) { PCIDevice *pci_dev = (PCIDevice *)qdev; PCIDeviceClass *pc = PCI_DEVICE_GET_CLASS(pci_dev); + ObjectClass *klass = OBJECT_CLASS(pc); Error *local_err = NULL; bool is_default_rom; - /* initialize cap_present for pci_is_express() and pci_config_size() */ - if (pc->is_express) { + /* initialize cap_present for pci_is_express() and pci_config_size(), + * Note that hybrid PCIs are not set automatically and need to manage + * QEMU_PCI_CAP_EXPRESS manually */ + if (object_class_dynamic_cast(klass, INTERFACE_PCIE_DEVICE) && + !object_class_dynamic_cast(klass, INTERFACE_CONVENTIONAL_PCI_DEVICE)) { pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; } diff --git a/hw/scsi/megasas.c b/hw/scsi/megasas.c index 3e38e9e8aa..ba1afa3c1e 100644 --- a/hw/scsi/megasas.c +++ b/hw/scsi/megasas.c @@ -2447,7 +2447,6 @@ typedef struct MegasasInfo { uint16_t subsystem_id; int ioport_bar; int mmio_bar; - bool is_express; int osts; const VMStateDescription *vmsd; Property *props; @@ -2465,7 +2464,6 @@ static struct MegasasInfo megasas_devices[] = { .ioport_bar = 2, .mmio_bar = 0, .osts = MFI_1078_RM | 1, - .is_express = false, .vmsd = &vmstate_megasas_gen1, .props = megasas_properties_gen1, .interfaces = (InterfaceInfo[]) { @@ -2482,7 +2480,6 @@ static struct MegasasInfo megasas_devices[] = { .ioport_bar = 0, .mmio_bar = 1, .osts = MFI_GEN2_RM, - .is_express = true, .vmsd = &vmstate_megasas_gen2, .props = megasas_properties_gen2, .interfaces = (InterfaceInfo[]) { @@ -2506,7 +2503,6 @@ static void megasas_class_init(ObjectClass *oc, void *data) pc->subsystem_vendor_id = PCI_VENDOR_ID_LSI_LOGIC; pc->subsystem_id = info->subsystem_id; pc->class_id = PCI_CLASS_STORAGE_RAID; - pc->is_express = info->is_express; e->mmio_bar = info->mmio_bar; e->ioport_bar = info->ioport_bar; e->osts = info->osts; diff --git a/hw/usb/hcd-xhci.c b/hw/usb/hcd-xhci.c index 228e82b3fb..721beb5486 100644 --- a/hw/usb/hcd-xhci.c +++ b/hw/usb/hcd-xhci.c @@ -3649,6 +3649,13 @@ static Property xhci_properties[] = { DEFINE_PROP_END_OF_LIST(), }; +static void xhci_instance_init(Object *obj) +{ + /* QEMU_PCI_CAP_EXPRESS initialization does not depend on QEMU command + * line, therefore, no need to wait to realize like other devices */ + PCI_DEVICE(obj)->cap_present |= QEMU_PCI_CAP_EXPRESS; +} + static void xhci_class_init(ObjectClass *klass, void *data) { PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); @@ -3661,7 +3668,6 @@ static void xhci_class_init(ObjectClass *klass, void *data) k->realize = usb_xhci_realize; k->exit = usb_xhci_exit; k->class_id = PCI_CLASS_SERIAL_USB; - k->is_express = 1; } static const TypeInfo xhci_info = { @@ -3669,6 +3675,7 @@ static const TypeInfo xhci_info = { .parent = TYPE_PCI_DEVICE, .instance_size = sizeof(XHCIState), .class_init = xhci_class_init, + .instance_init = xhci_instance_init, .abstract = true, .interfaces = (InterfaceInfo[]) { { INTERFACE_PCIE_DEVICE }, diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 879510c046..b33c5e8a03 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -3113,6 +3113,10 @@ static void vfio_instance_init(Object *obj) vdev->host.function = ~0U; vdev->nv_gpudirect_clique = 0xFF; + + /* QEMU_PCI_CAP_EXPRESS initialization does not depend on QEMU command + * line, therefore, no need to wait to realize like other devices */ + pci_dev->cap_present |= QEMU_PCI_CAP_EXPRESS; } static Property vfio_pci_dev_properties[] = { @@ -3171,7 +3175,6 @@ static void vfio_pci_dev_class_init(ObjectClass *klass, void *data) pdc->exit = vfio_exitfn; pdc->config_read = vfio_pci_read_config; pdc->config_write = vfio_pci_write_config; - pdc->is_express = 1; /* We might be */ } static const TypeInfo vfio_pci_dev_info = { diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index f662f30370..9b7a960de1 100644 --- a/hw/xen/xen_pt.c +++ b/hw/xen/xen_pt.c @@ -937,6 +937,13 @@ static Property xen_pci_passthrough_properties[] = { DEFINE_PROP_END_OF_LIST(), }; +static void xen_pci_passthrough_instance_init(Object *obj) +{ + /* QEMU_PCI_CAP_EXPRESS initialization does not depend on QEMU command + * line, therefore, no need to wait to realize like other devices */ + PCI_DEVICE(obj)->cap_present |= QEMU_PCI_CAP_EXPRESS; +} + static void xen_pci_passthrough_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); @@ -946,7 +953,6 @@ static void xen_pci_passthrough_class_init(ObjectClass *klass, void *data) k->exit = xen_pt_unregister_device; k->config_read = xen_pt_pci_read_config; k->config_write = xen_pt_pci_write_config; - k->is_express = 1; /* We might be */ set_bit(DEVICE_CATEGORY_MISC, dc->categories); dc->desc = "Assign an host PCI device with Xen"; dc->props = xen_pci_passthrough_properties; @@ -965,6 +971,7 @@ static const TypeInfo xen_pci_passthrough_info = { .instance_size = sizeof(XenPCIPassthroughState), .instance_finalize = xen_pci_passthrough_finalize, .class_init = xen_pci_passthrough_class_init, + .instance_init = xen_pci_passthrough_instance_init, .interfaces = (InterfaceInfo[]) { { INTERFACE_CONVENTIONAL_PCI_DEVICE }, { INTERFACE_PCIE_DEVICE }, diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 15ced9648c..d8c18c7fa4 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -236,9 +236,6 @@ typedef struct PCIDeviceClass { */ int is_bridge; - /* pcie stuff */ - int is_express; /* is this device pci express? */ - /* rom bar */ const char *romfile; } PCIDeviceClass; From b86107ab43b804e899a226fe287e34ab8acef596 Mon Sep 17 00:00:00 2001 From: Tiwei Bie Date: Thu, 25 Jan 2018 15:12:43 +0800 Subject: [PATCH 15/22] virtio-balloon: unref the memory region before continuing Signed-off-by: Tiwei Bie Cc: qemu-stable@nongnu.org Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-balloon.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index 14e08d20d0..f2104bfcef 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -234,6 +234,7 @@ static void virtio_balloon_handle_output(VirtIODevice *vdev, VirtQueue *vq) memory_region_is_rom(section.mr) || memory_region_is_romd(section.mr)) { trace_virtio_balloon_bad_addr(pa); + memory_region_unref(section.mr); continue; } From bb102d1da15a97c6750a4f96810cf15713be2bd6 Mon Sep 17 00:00:00 2001 From: Yongji Xie Date: Thu, 18 Jan 2018 23:41:56 +0800 Subject: [PATCH 16/22] libvhost-user: Fix resource leak MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Free the mmaped memory when we need to mmap new memory space on vu_set_mem_table_exec() and vu_set_log_base_exec() to avoid memory leak. Also close the corresponding fd after mmap() on vu_set_log_base_exec() to avoid fd leak. Signed-off-by: Yongji Xie Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Marc-André Lureau --- contrib/libvhost-user/libvhost-user.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c index 27cc59791b..54dbc933b3 100644 --- a/contrib/libvhost-user/libvhost-user.c +++ b/contrib/libvhost-user/libvhost-user.c @@ -407,6 +407,15 @@ vu_set_mem_table_exec(VuDev *dev, VhostUserMsg *vmsg) { int i; VhostUserMemory *memory = &vmsg->payload.memory; + + for (i = 0; i < dev->nregions; i++) { + VuDevRegion *r = &dev->regions[i]; + void *m = (void *) (uintptr_t) r->mmap_addr; + + if (m) { + munmap(m, r->size + r->mmap_offset); + } + } dev->nregions = memory->nregions; DPRINT("Nregions: %d\n", memory->nregions); @@ -472,9 +481,14 @@ vu_set_log_base_exec(VuDev *dev, VhostUserMsg *vmsg) rc = mmap(0, log_mmap_size, PROT_READ | PROT_WRITE, MAP_SHARED, fd, log_mmap_offset); + close(fd); if (rc == MAP_FAILED) { perror("log mmap error"); } + + if (dev->log_table) { + munmap(dev->log_table, dev->log_size); + } dev->log_table = rc; dev->log_size = log_mmap_size; From 293084a7196b1d7781b6fe19b24e85eb8b7f4de0 Mon Sep 17 00:00:00 2001 From: Yongji Xie Date: Fri, 19 Jan 2018 00:04:05 +0800 Subject: [PATCH 17/22] libvhost-user: Support across-memory-boundary access The sg list/indirect descriptor table may be contigious in GPA but not in HVA address space. But libvhost-user wasn't aware of that. This would cause out-of-bounds access. Even a malicious guest could use it to get information from the vhost-user backend. Introduce a plen parameter in vu_gpa_to_va() so we can handle this case, returning the actual mapped length. Signed-off-by: Yongji Xie Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Maxime Coquelin --- contrib/libvhost-user/libvhost-user.c | 133 +++++++++++++++++++++++--- contrib/libvhost-user/libvhost-user.h | 3 +- 2 files changed, 122 insertions(+), 14 deletions(-) diff --git a/contrib/libvhost-user/libvhost-user.c b/contrib/libvhost-user/libvhost-user.c index 54dbc933b3..2e358b5bce 100644 --- a/contrib/libvhost-user/libvhost-user.c +++ b/contrib/libvhost-user/libvhost-user.c @@ -118,15 +118,22 @@ vu_panic(VuDev *dev, const char *msg, ...) /* Translate guest physical address to our virtual address. */ void * -vu_gpa_to_va(VuDev *dev, uint64_t guest_addr) +vu_gpa_to_va(VuDev *dev, uint64_t *plen, uint64_t guest_addr) { int i; + if (*plen == 0) { + return NULL; + } + /* Find matching memory region. */ for (i = 0; i < dev->nregions; i++) { VuDevRegion *r = &dev->regions[i]; if ((guest_addr >= r->gpa) && (guest_addr < (r->gpa + r->size))) { + if ((guest_addr + *plen) > (r->gpa + r->size)) { + *plen = r->gpa + r->size - guest_addr; + } return (void *)(uintptr_t) guest_addr - r->gpa + r->mmap_addr + r->mmap_offset; } @@ -1116,6 +1123,37 @@ virtqueue_get_head(VuDev *dev, VuVirtq *vq, return true; } +static int +virtqueue_read_indirect_desc(VuDev *dev, struct vring_desc *desc, + uint64_t addr, size_t len) +{ + struct vring_desc *ori_desc; + uint64_t read_len; + + if (len > (VIRTQUEUE_MAX_SIZE * sizeof(struct vring_desc))) { + return -1; + } + + if (len == 0) { + return -1; + } + + while (len) { + read_len = len; + ori_desc = vu_gpa_to_va(dev, &read_len, addr); + if (!ori_desc) { + return -1; + } + + memcpy(desc, ori_desc, read_len); + len -= read_len; + addr += read_len; + desc += read_len; + } + + return 0; +} + enum { VIRTQUEUE_READ_DESC_ERROR = -1, VIRTQUEUE_READ_DESC_DONE = 0, /* end of chain */ @@ -1162,8 +1200,10 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes, } while ((rc = virtqueue_num_heads(dev, vq, idx)) > 0) { - unsigned int max, num_bufs, indirect = 0; + unsigned int max, desc_len, num_bufs, indirect = 0; + uint64_t desc_addr, read_len; struct vring_desc *desc; + struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE]; unsigned int i; max = vq->vring.num; @@ -1187,8 +1227,24 @@ vu_queue_get_avail_bytes(VuDev *dev, VuVirtq *vq, unsigned int *in_bytes, /* loop over the indirect descriptor table */ indirect = 1; - max = desc[i].len / sizeof(struct vring_desc); - desc = vu_gpa_to_va(dev, desc[i].addr); + desc_addr = desc[i].addr; + desc_len = desc[i].len; + max = desc_len / sizeof(struct vring_desc); + read_len = desc_len; + desc = vu_gpa_to_va(dev, &read_len, desc_addr); + if (unlikely(desc && read_len != desc_len)) { + /* Failed to use zero copy */ + desc = NULL; + if (!virtqueue_read_indirect_desc(dev, desc_buf, + desc_addr, + desc_len)) { + desc = desc_buf; + } + } + if (!desc) { + vu_panic(dev, "Invalid indirect buffer table"); + goto err; + } num_bufs = i = 0; } @@ -1386,9 +1442,24 @@ virtqueue_map_desc(VuDev *dev, return; } - iov[num_sg].iov_base = vu_gpa_to_va(dev, pa); - iov[num_sg].iov_len = sz; - num_sg++; + while (sz) { + uint64_t len = sz; + + if (num_sg == max_num_sg) { + vu_panic(dev, "virtio: too many descriptors in indirect table"); + return; + } + + iov[num_sg].iov_base = vu_gpa_to_va(dev, &len, pa); + if (iov[num_sg].iov_base == NULL) { + vu_panic(dev, "virtio: invalid address for buffers"); + return; + } + iov[num_sg].iov_len = len; + num_sg++; + sz -= len; + pa += len; + } *p_num_sg = num_sg; } @@ -1420,10 +1491,12 @@ virtqueue_alloc_element(size_t sz, void * vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz) { - unsigned int i, head, max; + unsigned int i, head, max, desc_len; + uint64_t desc_addr, read_len; VuVirtqElement *elem; unsigned out_num, in_num; struct iovec iov[VIRTQUEUE_MAX_SIZE]; + struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE]; struct vring_desc *desc; int rc; @@ -1464,8 +1537,24 @@ vu_queue_pop(VuDev *dev, VuVirtq *vq, size_t sz) } /* loop over the indirect descriptor table */ - max = desc[i].len / sizeof(struct vring_desc); - desc = vu_gpa_to_va(dev, desc[i].addr); + desc_addr = desc[i].addr; + desc_len = desc[i].len; + max = desc_len / sizeof(struct vring_desc); + read_len = desc_len; + desc = vu_gpa_to_va(dev, &read_len, desc_addr); + if (unlikely(desc && read_len != desc_len)) { + /* Failed to use zero copy */ + desc = NULL; + if (!virtqueue_read_indirect_desc(dev, desc_buf, + desc_addr, + desc_len)) { + desc = desc_buf; + } + } + if (!desc) { + vu_panic(dev, "Invalid indirect buffer table"); + return NULL; + } i = 0; } @@ -1541,7 +1630,9 @@ vu_log_queue_fill(VuDev *dev, VuVirtq *vq, unsigned int len) { struct vring_desc *desc = vq->vring.desc; - unsigned int i, max, min; + unsigned int i, max, min, desc_len; + uint64_t desc_addr, read_len; + struct vring_desc desc_buf[VIRTQUEUE_MAX_SIZE]; unsigned num_bufs = 0; max = vq->vring.num; @@ -1553,8 +1644,24 @@ vu_log_queue_fill(VuDev *dev, VuVirtq *vq, } /* loop over the indirect descriptor table */ - max = desc[i].len / sizeof(struct vring_desc); - desc = vu_gpa_to_va(dev, desc[i].addr); + desc_addr = desc[i].addr; + desc_len = desc[i].len; + max = desc_len / sizeof(struct vring_desc); + read_len = desc_len; + desc = vu_gpa_to_va(dev, &read_len, desc_addr); + if (unlikely(desc && read_len != desc_len)) { + /* Failed to use zero copy */ + desc = NULL; + if (!virtqueue_read_indirect_desc(dev, desc_buf, + desc_addr, + desc_len)) { + desc = desc_buf; + } + } + if (!desc) { + vu_panic(dev, "Invalid indirect buffer table"); + return; + } i = 0; } diff --git a/contrib/libvhost-user/libvhost-user.h b/contrib/libvhost-user/libvhost-user.h index f8a730b725..18f95f65d7 100644 --- a/contrib/libvhost-user/libvhost-user.h +++ b/contrib/libvhost-user/libvhost-user.h @@ -327,11 +327,12 @@ bool vu_dispatch(VuDev *dev); /** * vu_gpa_to_va: * @dev: a VuDev context + * @plen: guest memory size * @guest_addr: guest address * * Translate a guest address to a pointer. Returns NULL on failure. */ -void *vu_gpa_to_va(VuDev *dev, uint64_t guest_addr); +void *vu_gpa_to_va(VuDev *dev, uint64_t *plen, uint64_t guest_addr); /** * vu_get_queue: From fc67208f228af8e444f74362db1bced56a3daa71 Mon Sep 17 00:00:00 2001 From: Marcel Apfelbaum Date: Wed, 17 Jan 2018 21:19:47 +0200 Subject: [PATCH 18/22] hw/pci-bridge: fix pcie root port's IO hints capability The gen_pcie_root_port mem-reserve and pref32-reserve properties are defined as size (so uint64_t), but passed as uint32_t when building the 'IO hints' vendor specific capability. Passing 4G (or more) gets truncated and passed as a zero reservation. Is not a huge issue since the guest firmware will always compare the hints with the default value and take the maximum. Fix it by passing the values as uint64_t and failing to init the gen_pcie_root_port id invalid values are used. Signed-off-by: Marcel Apfelbaum Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pci_bridge.c | 24 +++++++++++++++++++----- include/hw/pci/pci_bridge.h | 4 ++-- 2 files changed, 21 insertions(+), 7 deletions(-) diff --git a/hw/pci/pci_bridge.c b/hw/pci/pci_bridge.c index b2e50c36a0..40a39f57cb 100644 --- a/hw/pci/pci_bridge.c +++ b/hw/pci/pci_bridge.c @@ -412,22 +412,36 @@ void pci_bridge_map_irq(PCIBridge *br, const char* bus_name, int pci_bridge_qemu_reserve_cap_init(PCIDevice *dev, int cap_offset, uint32_t bus_reserve, uint64_t io_reserve, - uint32_t mem_non_pref_reserve, - uint32_t mem_pref_32_reserve, + uint64_t mem_non_pref_reserve, + uint64_t mem_pref_32_reserve, uint64_t mem_pref_64_reserve, Error **errp) { - if (mem_pref_32_reserve != (uint32_t)-1 && + if (mem_pref_32_reserve != (uint64_t)-1 && mem_pref_64_reserve != (uint64_t)-1) { error_setg(errp, "PCI resource reserve cap: PREF32 and PREF64 conflict"); return -EINVAL; } + if (mem_non_pref_reserve != (uint64_t)-1 && + mem_non_pref_reserve >= (1ULL << 32)) { + error_setg(errp, + "PCI resource reserve cap: mem-reserve must be less than 4G"); + return -EINVAL; + } + + if (mem_pref_32_reserve != (uint64_t)-1 && + mem_pref_32_reserve >= (1ULL << 32)) { + error_setg(errp, + "PCI resource reserve cap: pref32-reserve must be less than 4G"); + return -EINVAL; + } + if (bus_reserve == (uint32_t)-1 && io_reserve == (uint64_t)-1 && - mem_non_pref_reserve == (uint32_t)-1 && - mem_pref_32_reserve == (uint32_t)-1 && + mem_non_pref_reserve == (uint64_t)-1 && + mem_pref_32_reserve == (uint64_t)-1 && mem_pref_64_reserve == (uint64_t)-1) { return 0; } diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h index 9b44ffd22a..0347da52d2 100644 --- a/include/hw/pci/pci_bridge.h +++ b/include/hw/pci/pci_bridge.h @@ -135,8 +135,8 @@ typedef struct PCIBridgeQemuCap { int pci_bridge_qemu_reserve_cap_init(PCIDevice *dev, int cap_offset, uint32_t bus_reserve, uint64_t io_reserve, - uint32_t mem_non_pref_reserve, - uint32_t mem_pref_32_reserve, + uint64_t mem_non_pref_reserve, + uint64_t mem_pref_32_reserve, uint64_t mem_pref_64_reserve, Error **errp); From 92146b7a0fe6d6f9df34eeb22dc8e33785a49a42 Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Tue, 16 Jan 2018 16:30:26 +0100 Subject: [PATCH 19/22] tests: acpi: fix FADT not being compared to reference table It turns out that FADT isn't actually tested for changes against reference table, since it happens to be the 1st table in RSDT which is currently ignored. Fix it by making sure that all tables from RSDT are added to test list. NOTE: FADT contains guest allocated pointers to FACS/DSDT, zero them out so that possible FACS/DSDT address change won't affect test results. Signed-off-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/bios-tables-test.c | 35 +++++++++++++++++++++++++++++++++-- 1 file changed, 33 insertions(+), 2 deletions(-) diff --git a/tests/bios-tables-test.c b/tests/bios-tables-test.c index b354aaafe6..2b332ed3c1 100644 --- a/tests/bios-tables-test.c +++ b/tests/bios-tables-test.c @@ -194,6 +194,35 @@ static void test_acpi_fadt_table(test_data *data) le32_to_cpu(fadt_table->length))); } +static void sanitize_fadt_ptrs(test_data *data) +{ + /* fixup pointers in FADT */ + int i; + + for (i = 0; i < data->tables->len; i++) { + AcpiSdtTable *sdt = &g_array_index(data->tables, AcpiSdtTable, i); + + if (memcmp(&sdt->header.signature, "FACP", 4)) { + continue; + } + + /* sdt->aml field offset := spec offset - header size */ + memset(sdt->aml + 0, 0, 4); /* sanitize FIRMWARE_CTRL(36) ptr */ + memset(sdt->aml + 4, 0, 4); /* sanitize DSDT(40) ptr */ + if (sdt->header.revision >= 3) { + memset(sdt->aml + 96, 0, 8); /* sanitize X_FIRMWARE_CTRL(132) ptr */ + memset(sdt->aml + 104, 0, 8); /* sanitize X_DSDT(140) ptr */ + } + + /* update checksum */ + sdt->header.checksum = 0; + sdt->header.checksum -= + acpi_calc_checksum((uint8_t *)sdt, sizeof(AcpiTableHeader)) + + acpi_calc_checksum((uint8_t *)sdt->aml, sdt->aml_len); + break; + } +} + static void test_acpi_facs_table(test_data *data) { AcpiFacsDescriptorRev1 *facs_table = &data->facs_table; @@ -248,14 +277,14 @@ static void test_acpi_dsdt_table(test_data *data) /* Load all tables and add to test list directly RSDT referenced tables */ static void fetch_rsdt_referenced_tables(test_data *data) { - int tables_nr = data->rsdt_tables_nr - 1; /* fadt is first */ + int tables_nr = data->rsdt_tables_nr; int i; for (i = 0; i < tables_nr; i++) { AcpiSdtTable ssdt_table; uint32_t addr; - addr = le32_to_cpu(data->rsdt_tables_addr[i + 1]); /* fadt is first */ + addr = le32_to_cpu(data->rsdt_tables_addr[i]); fetch_table(&ssdt_table, addr); /* Add table to ASL test tables list */ @@ -650,6 +679,8 @@ static void test_acpi_one(const char *params, test_data *data) test_acpi_dsdt_table(data); fetch_rsdt_referenced_tables(data); + sanitize_fadt_ptrs(data); + if (iasl) { if (getenv(ACPI_REBUILD_EXPECTED_AML)) { dump_aml_files(data, true); From 562a140765e56b898601fbd84046fc4d9b9a3bed Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 8 Feb 2018 19:40:02 +0200 Subject: [PATCH 20/22] lpc: drop pcie host dependency Doesn't look like that header is used. Signed-off-by: Michael S. Tsirkin --- hw/isa/lpc_ich9.c | 1 - 1 file changed, 1 deletion(-) diff --git a/hw/isa/lpc_ich9.c b/hw/isa/lpc_ich9.c index adcf077fa5..e692b9fdc1 100644 --- a/hw/isa/lpc_ich9.c +++ b/hw/isa/lpc_ich9.c @@ -39,7 +39,6 @@ #include "hw/isa/apm.h" #include "hw/i386/ioapic.h" #include "hw/pci/pci.h" -#include "hw/pci/pcie_host.h" #include "hw/pci/pci_bridge.h" #include "hw/i386/ich9.h" #include "hw/acpi/acpi.h" From e89e7ea620f866ab42cbaa2de964552a3d2f7494 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Thu, 8 Feb 2018 19:56:49 +0200 Subject: [PATCH 21/22] acpi-test: update FADT Previous commit ("tests: acpi: fix FADT not being compared to reference table") started tracking changes to the FADT. Generate the expected FACP files - apparently these weren't updated since 2013. Signed-off-by: Michael S. Tsirkin --- tests/acpi-test-data/pc/FACP | Bin 116 -> 116 bytes tests/acpi-test-data/q35/FACP | Bin 116 -> 244 bytes 2 files changed, 0 insertions(+), 0 deletions(-) diff --git a/tests/acpi-test-data/pc/FACP b/tests/acpi-test-data/pc/FACP index 0639999ed1f748d44977f88e63d6a0ab49add040..261ebdc5d1c3bdf18fb7935314a04fd7f6f92a7a 100644 GIT binary patch delta 70 zcmXRZ;c|0y4k%$@U|?K0kxN<=$N&LG22O@eK>FhcAi)L_VPIf^(jYbm+eAM(4kI9I IX#)cT0Ciyr82|tP delta 70 zcmXRZ;c|0y4k%$@U|_sCkxN?h!T)*(AZBFXWY`20{P+P#Yye^)V1d#gHphmEesUZj J*3t$B1^}tB5E%df diff --git a/tests/acpi-test-data/q35/FACP b/tests/acpi-test-data/q35/FACP index 19f3ac3ce6ab732caa750d835ce1261bc7343cf2..72c9d97902a4bbf14896023d9ba78e0899d6517b 100644 GIT binary patch literal 244 zcmZ>BbPo8!z`(#P@8s|75v<@85#a0w6k`O6f!H7#1{fJQ88!hqOw2%n4I;_{r9nIn zAX@<@&cwhX02KSr|DPYCl7Ybp$XMFKz`)4C!0?j?A_|v;DFV`r3P1wMTp$k&7=Z>N X+XoXzrWq9=?f{7~HXz&s;==#{BbPgzCU|?YEaPoKd2v%^42yk`-iZKGkKx`1raN&Qw0}wMZa58KHa+#Qc#0HQA i0|N_`2C+GSYz3G&69bC?Q0zbde}0f03@mM6U;qFojS~R? From bf1e7140ef0b3a149860ab9f05b36665133238f6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Tom=C3=A1=C5=A1=20Golembiovsk=C3=BD?= Date: Tue, 5 Dec 2017 13:14:46 +0100 Subject: [PATCH 22/22] virtio-balloon: include statistics of disk/file caches MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Tomáš Golembiovský Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/virtio-balloon.c | 1 + include/standard-headers/linux/virtio_balloon.h | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index f2104bfcef..179c8f5768 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -50,6 +50,7 @@ static const char *balloon_stat_names[] = { [VIRTIO_BALLOON_S_MEMFREE] = "stat-free-memory", [VIRTIO_BALLOON_S_MEMTOT] = "stat-total-memory", [VIRTIO_BALLOON_S_AVAIL] = "stat-available-memory", + [VIRTIO_BALLOON_S_CACHES] = "stat-disk-caches", [VIRTIO_BALLOON_S_NR] = NULL }; diff --git a/include/standard-headers/linux/virtio_balloon.h b/include/standard-headers/linux/virtio_balloon.h index 9d06ccd066..7b0a41b8fc 100644 --- a/include/standard-headers/linux/virtio_balloon.h +++ b/include/standard-headers/linux/virtio_balloon.h @@ -52,7 +52,8 @@ struct virtio_balloon_config { #define VIRTIO_BALLOON_S_MEMFREE 4 /* Total amount of free memory */ #define VIRTIO_BALLOON_S_MEMTOT 5 /* Total amount of memory */ #define VIRTIO_BALLOON_S_AVAIL 6 /* Available memory as in /proc */ -#define VIRTIO_BALLOON_S_NR 7 +#define VIRTIO_BALLOON_S_CACHES 7 /* Disk caches */ +#define VIRTIO_BALLOON_S_NR 8 /* * Memory statistics structure.