From f0d634ea1964ccce317818c44fe299e71007e64d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Carlos=20L=C3=B3pez?= Date: Fri, 17 Mar 2023 01:27:51 +0100 Subject: [PATCH 01/31] virtio: refresh vring region cache after updating a virtqueue size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When a virtqueue size is changed by the guest via virtio_queue_set_num(), its region cache is not automatically updated. If the size was increased, this could lead to accessing the cache out of bounds. For example, in vring_get_used_event(): static inline uint16_t vring_get_used_event(VirtQueue *vq) { return vring_avail_ring(vq, vq->vring.num); } static inline uint16_t vring_avail_ring(VirtQueue *vq, int i) { VRingMemoryRegionCaches *caches = vring_get_region_caches(vq); hwaddr pa = offsetof(VRingAvail, ring[i]); if (!caches) { return 0; } return virtio_lduw_phys_cached(vq->vdev, &caches->avail, pa); } vq->vring.num will be greater than caches->avail.len, which will trigger a failed assertion down the call path of virtio_lduw_phys_cached(). Fix this by calling virtio_init_region_cache() after virtio_queue_set_num() if we are not already calling virtio_queue_set_rings(). In the legacy path this is already done by virtio_queue_update_rings(). Signed-off-by: Carlos López Message-Id: <20230317002749.27379-1-clopez@suse.de> Reviewed-by: Cornelia Huck Acked-by: Halil Pasic Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/s390x/virtio-ccw.c | 1 + hw/virtio/virtio-mmio.c | 1 + hw/virtio/virtio-pci.c | 1 + hw/virtio/virtio.c | 2 +- include/hw/virtio/virtio.h | 1 + 5 files changed, 5 insertions(+), 1 deletion(-) diff --git a/hw/s390x/virtio-ccw.c b/hw/s390x/virtio-ccw.c index e33e5207ab..f44de1a8c1 100644 --- a/hw/s390x/virtio-ccw.c +++ b/hw/s390x/virtio-ccw.c @@ -237,6 +237,7 @@ static int virtio_ccw_set_vqs(SubchDev *sch, VqInfoBlock *info, return -EINVAL; } virtio_queue_set_num(vdev, index, num); + virtio_init_region_cache(vdev, index); } else if (virtio_queue_get_num(vdev, index) > num) { /* Fail if we don't have a big enough queue. */ return -EINVAL; diff --git a/hw/virtio/virtio-mmio.c b/hw/virtio/virtio-mmio.c index 23ba625eb6..c2c6d85475 100644 --- a/hw/virtio/virtio-mmio.c +++ b/hw/virtio/virtio-mmio.c @@ -354,6 +354,7 @@ static void virtio_mmio_write(void *opaque, hwaddr offset, uint64_t value, if (proxy->legacy) { virtio_queue_update_rings(vdev, vdev->queue_sel); } else { + virtio_init_region_cache(vdev, vdev->queue_sel); proxy->vqs[vdev->queue_sel].num = value; } break; diff --git a/hw/virtio/virtio-pci.c b/hw/virtio/virtio-pci.c index 247325c193..02fb84a8fa 100644 --- a/hw/virtio/virtio-pci.c +++ b/hw/virtio/virtio-pci.c @@ -1554,6 +1554,7 @@ static void virtio_pci_common_write(void *opaque, hwaddr addr, proxy->vqs[vdev->queue_sel].num = val; virtio_queue_set_num(vdev, vdev->queue_sel, proxy->vqs[vdev->queue_sel].num); + virtio_init_region_cache(vdev, vdev->queue_sel); break; case VIRTIO_PCI_COMMON_Q_MSIX: vector = virtio_queue_vector(vdev, vdev->queue_sel); diff --git a/hw/virtio/virtio.c b/hw/virtio/virtio.c index 98c4819fcc..272d930721 100644 --- a/hw/virtio/virtio.c +++ b/hw/virtio/virtio.c @@ -226,7 +226,7 @@ static void virtio_virtqueue_reset_region_cache(struct VirtQueue *vq) } } -static void virtio_init_region_cache(VirtIODevice *vdev, int n) +void virtio_init_region_cache(VirtIODevice *vdev, int n) { VirtQueue *vq = &vdev->vq[n]; VRingMemoryRegionCaches *old = vq->vring.caches; diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index f236e94ca6..f6b38f7e9c 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -309,6 +309,7 @@ int virtio_get_num_queues(VirtIODevice *vdev); void virtio_queue_set_rings(VirtIODevice *vdev, int n, hwaddr desc, hwaddr avail, hwaddr used); void virtio_queue_update_rings(VirtIODevice *vdev, int n); +void virtio_init_region_cache(VirtIODevice *vdev, int n); void virtio_queue_set_align(VirtIODevice *vdev, int n, int align); void virtio_queue_notify(VirtIODevice *vdev, int n); uint16_t virtio_queue_vector(VirtIODevice *vdev, int n); From 607a079b29a896b3752ef8a14d746765473c07bf Mon Sep 17 00:00:00 2001 From: Ani Sinha Date: Mon, 20 Mar 2023 17:12:33 +0530 Subject: [PATCH 02/31] Add my old and new work email mapping and use work email to support biosbits Update mailmap to indicate ani@anisinha.ca and anisinha@redhat.com are one and the same person. Additionally update MAINTAINERS and bits documentation to use my work (redhat) email. Signed-off-by: Ani Sinha Message-Id: <20230320114233.90638-1-anisinha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- .mailmap | 1 + MAINTAINERS | 2 +- docs/devel/acpi-bits.rst | 2 +- 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/.mailmap b/.mailmap index 7677047950..bbe6d3fd69 100644 --- a/.mailmap +++ b/.mailmap @@ -54,6 +54,7 @@ Aleksandar Markovic Aleksandar Rikalo Aleksandar Rikalo Alexander Graf +Ani Sinha Anthony Liguori Anthony Liguori Christian Borntraeger Damien Hedde diff --git a/MAINTAINERS b/MAINTAINERS index 2c2068ea5c..0bd3d1830e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1931,7 +1931,7 @@ F: hw/acpi/viot.c F: hw/acpi/viot.h ACPI/AVOCADO/BIOSBITS -M: Ani Sinha +M: Ani Sinha M: Michael S. Tsirkin S: Supported F: tests/avocado/acpi-bits/* diff --git a/docs/devel/acpi-bits.rst b/docs/devel/acpi-bits.rst index 9eb4b9e666..22e2580200 100644 --- a/docs/devel/acpi-bits.rst +++ b/docs/devel/acpi-bits.rst @@ -135,7 +135,7 @@ Under ``tests/avocado/`` as the root we have: (c) They need not be loaded by avocado framework when running tests. -Author: Ani Sinha +Author: Ani Sinha References: ----------- From 0d74e2b785ed0391316479ccf97c8f1ad5966145 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Tue, 7 Mar 2023 18:00:18 +0100 Subject: [PATCH 03/31] vdpa: accept VIRTIO_NET_F_SPEED_DUPLEX in SVQ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There is no reason to block it as it has nothing to do with the vrings. All the support of the feature comes via config space. Signed-off-by: Eugenio Pérez Suggested-by: Alvaro Karsz Message-Id: <20230307170018.260557-1-eperezma@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 99904a0da7..37cdc84562 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -104,7 +104,8 @@ static const uint64_t vdpa_svq_device_features = /* VHOST_F_LOG_ALL is exposed by SVQ */ BIT_ULL(VHOST_F_LOG_ALL) | BIT_ULL(VIRTIO_NET_F_RSC_EXT) | - BIT_ULL(VIRTIO_NET_F_STANDBY); + BIT_ULL(VIRTIO_NET_F_STANDBY) | + BIT_ULL(VIRTIO_NET_F_SPEED_DUPLEX); #define VHOST_VDPA_NET_CVQ_ASID 1 From 0f220121a47fd1c8273eb3a4a67425c608307117 Mon Sep 17 00:00:00 2001 From: Thomas Huth Date: Mon, 17 Apr 2023 15:00:37 +0200 Subject: [PATCH 04/31] meson_options.txt: Enable qom-cast-debug by default again MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This switch had been disabled by default by accident in commit c55cf6ab03f. But we should enable it by default instead to avoid regressions in the QOM device hierarchy. Fixes: c55cf6ab03 ("configure, meson: move some default-disabled options to meson_options.txt") Signed-off-by: Thomas Huth Message-Id: <20230417130037.236747-3-thuth@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Peter Maydell Reported-by: Peter Maydell Reviewed-by: Philippe Mathieu-Daudé --- meson_options.txt | 2 +- scripts/meson-buildoptions.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/meson_options.txt b/meson_options.txt index fc9447d267..2471dd02da 100644 --- a/meson_options.txt +++ b/meson_options.txt @@ -315,7 +315,7 @@ option('debug_mutex', type: 'boolean', value: false, description: 'mutex debugging support') option('debug_stack_usage', type: 'boolean', value: false, description: 'measure coroutine stack usage') -option('qom_cast_debug', type: 'boolean', value: false, +option('qom_cast_debug', type: 'boolean', value: true, description: 'cast debugging support') option('gprof', type: 'boolean', value: false, description: 'QEMU profiling with gprof', diff --git a/scripts/meson-buildoptions.sh b/scripts/meson-buildoptions.sh index 009fab1515..d4369a3ad8 100644 --- a/scripts/meson-buildoptions.sh +++ b/scripts/meson-buildoptions.sh @@ -14,6 +14,7 @@ meson_options_help() { printf "%s\n" ' use idef-parser to automatically generate TCG' printf "%s\n" ' code for the Hexagon frontend' printf "%s\n" ' --disable-install-blobs install provided firmware blobs' + printf "%s\n" ' --disable-qom-cast-debug cast debugging support' printf "%s\n" ' --docdir=VALUE Base directory for documentation installation' printf "%s\n" ' (can be empty) [share/doc]' printf "%s\n" ' --enable-block-drv-whitelist-in-tools' @@ -35,7 +36,6 @@ meson_options_help() { printf "%s\n" ' --enable-module-upgrades try to load modules from alternate paths for' printf "%s\n" ' upgrades' printf "%s\n" ' --enable-profiler profiler support' - printf "%s\n" ' --enable-qom-cast-debug cast debugging support' printf "%s\n" ' --enable-rng-none dummy RNG, avoid using /dev/(u)random and' printf "%s\n" ' getrandom()' printf "%s\n" ' --enable-strip Strip targets on install' From 560a997535937df2ea3716ba56bcbe38be37682f Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Mon, 6 Mar 2023 14:32:09 -0500 Subject: [PATCH 05/31] vhost: Drop unused eventfd_add|del hooks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These hooks were introduced in: 80a1ea3748 ("memory: move ioeventfd ops to MemoryListener", 2012-02-29) But they seem to be never used. Drop them. Cc: Richard Henderson Signed-off-by: Peter Xu Message-Id: <20230306193209.516011-1-peterx@redhat.com> Reviewed-by: Philippe Mathieu-Daudé Acked-by: Jason Wang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost.c | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index a266396576..746d130c74 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -1291,18 +1291,6 @@ void vhost_virtqueue_stop(struct vhost_dev *dev, 0, virtio_queue_get_desc_size(vdev, idx)); } -static void vhost_eventfd_add(MemoryListener *listener, - MemoryRegionSection *section, - bool match_data, uint64_t data, EventNotifier *e) -{ -} - -static void vhost_eventfd_del(MemoryListener *listener, - MemoryRegionSection *section, - bool match_data, uint64_t data, EventNotifier *e) -{ -} - static int vhost_virtqueue_set_busyloop_timeout(struct vhost_dev *dev, int n, uint32_t timeout) { @@ -1457,8 +1445,6 @@ int vhost_dev_init(struct vhost_dev *hdev, void *opaque, .log_sync = vhost_log_sync, .log_global_start = vhost_log_global_start, .log_global_stop = vhost_log_global_stop, - .eventfd_add = vhost_eventfd_add, - .eventfd_del = vhost_eventfd_del, .priority = 10 }; From 17c67f6849551b2bbffb7a3a8b490b853df86129 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 9 Mar 2023 14:21:00 +0530 Subject: [PATCH 06/31] docs: vhost-user: Define memory region separately MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The same layout is defined twice, once in "single memory region description" and then in "memory regions description". Separate out details of memory region from these two and reuse the same definition later on. While at it, also rename "memory regions description" to "multiple memory regions description", to avoid potential confusion around similar names. And define single region before multiple ones. This is just a documentation optimization, the protocol remains the same. Signed-off-by: Viresh Kumar Message-Id: <7c3718e5eb99178b22696682ae73aca6df1899c7.1678351495.git.viresh.kumar@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi Reviewed-by: Alex Bennée --- docs/interop/vhost-user.rst | 39 +++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 21 deletions(-) diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst index 8a5924ea75..1720d68126 100644 --- a/docs/interop/vhost-user.rst +++ b/docs/interop/vhost-user.rst @@ -130,18 +130,8 @@ A vring address description Note that a ring address is an IOVA if ``VIRTIO_F_IOMMU_PLATFORM`` has been negotiated. Otherwise it is a user address. -Memory regions description -^^^^^^^^^^^^^^^^^^^^^^^^^^ - -+-------------+---------+---------+-----+---------+ -| num regions | padding | region0 | ... | region7 | -+-------------+---------+---------+-----+---------+ - -:num regions: a 32-bit number of regions - -:padding: 32-bit - -A region is: +Memory region description +^^^^^^^^^^^^^^^^^^^^^^^^^ +---------------+------+--------------+-------------+ | guest address | size | user address | mmap offset | @@ -158,19 +148,26 @@ A region is: Single memory region description ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -+---------+---------------+------+--------------+-------------+ -| padding | guest address | size | user address | mmap offset | -+---------+---------------+------+--------------+-------------+ ++---------+--------+ +| padding | region | ++---------+--------+ :padding: 64-bit -:guest address: a 64-bit guest address of the region +A region is represented by Memory region description. -:size: a 64-bit size +Multiple Memory regions description +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -:user address: a 64-bit user address ++-------------+---------+---------+-----+---------+ +| num regions | padding | region0 | ... | region7 | ++-------------+---------+---------+-----+---------+ -:mmap offset: 64-bit offset where region starts in the mapped memory +:num regions: a 32-bit number of regions + +:padding: 32-bit + +A region is represented by Memory region description. Log description ^^^^^^^^^^^^^^^ @@ -952,8 +949,8 @@ Front-end message types ``VHOST_USER_SET_MEM_TABLE`` :id: 5 :equivalent ioctl: ``VHOST_SET_MEM_TABLE`` - :request payload: memory regions description - :reply payload: (postcopy only) memory regions description + :request payload: multiple memory regions description + :reply payload: (postcopy only) multiple memory regions description Sets the memory map regions on the back-end so it can translate the vring addresses. In the ancillary data there is an array of file From f21e95ee97d5adb4a274b48c6c8f70a221c1f513 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 9 Mar 2023 14:21:01 +0530 Subject: [PATCH 07/31] docs: vhost-user: Add Xen specific memory mapping support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current model of memory mapping at the back-end works fine where a standard call to mmap() (for the respective file descriptor) is enough before the front-end can start accessing the guest memory. There are other complex cases though where the back-end needs more information and simple mmap() isn't enough. For example Xen, a type-1 hypervisor, currently supports memory mapping via two different methods, foreign-mapping (via /dev/privcmd) and grant-dev (via /dev/gntdev). In both these cases, the back-end needs to call mmap() and ioctl(), with extra information like the Xen domain-id of the guest whose memory we are trying to map. Add a new protocol feature, 'VHOST_USER_PROTOCOL_F_XEN_MMAP', which lets the back-end know about the additional memory mapping requirements. When this feature is negotiated, the front-end will send the additional information within the memory regions themselves. Signed-off-by: Viresh Kumar Message-Id: <6d0bd7f0e1aeec3ddb603ae4ff334c75c7d0d7b3.1678351495.git.viresh.kumar@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi Reviewed-by: Alex Bennée --- docs/interop/vhost-user.rst | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/docs/interop/vhost-user.rst b/docs/interop/vhost-user.rst index 1720d68126..5a070adbc1 100644 --- a/docs/interop/vhost-user.rst +++ b/docs/interop/vhost-user.rst @@ -145,6 +145,26 @@ Memory region description :mmap offset: 64-bit offset where region starts in the mapped memory +When the ``VHOST_USER_PROTOCOL_F_XEN_MMAP`` protocol feature has been +successfully negotiated, the memory region description contains two extra +fields at the end. + ++---------------+------+--------------+-------------+----------------+-------+ +| guest address | size | user address | mmap offset | xen mmap flags | domid | ++---------------+------+--------------+-------------+----------------+-------+ + +:xen mmap flags: 32-bit bit field + +- Bit 0 is set for Xen foreign memory mapping. +- Bit 1 is set for Xen grant memory mapping. +- Bit 8 is set if the memory region can not be mapped in advance, and memory + areas within this region must be mapped / unmapped only when required by the + back-end. The back-end shouldn't try to map the entire region at once, as the + front-end may not allow it. The back-end should rather map only the required + amount of memory at once and unmap it after it is used. + +:domid: a 32-bit Xen hypervisor specific domain id. + Single memory region description ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ @@ -864,6 +884,7 @@ Protocol features #define VHOST_USER_PROTOCOL_F_INBAND_NOTIFICATIONS 14 #define VHOST_USER_PROTOCOL_F_CONFIGURE_MEM_SLOTS 15 #define VHOST_USER_PROTOCOL_F_STATUS 16 + #define VHOST_USER_PROTOCOL_F_XEN_MMAP 17 Front-end message types ----------------------- From e919402b9e05c964561623ddada9d49d0a05b850 Mon Sep 17 00:00:00 2001 From: Yangming Date: Thu, 9 Mar 2023 07:52:57 +0000 Subject: [PATCH 08/31] virtio-balloon: optimize the virtio-balloon on the ARM platform Optimize the virtio-balloon feature on the ARM platform by adding a variable to keep track of the current hot-plugged pc-dimm size, instead of traversing the virtual machine's memory modules to count the current RAM size during the balloon inflation or deflation process. This variable can be updated only when plugging or unplugging the device, which will result in an increase of approximately 60% efficiency of balloon process on the ARM platform. We tested the total amount of time required for the balloon inflation process on ARM: inflate the balloon to 64GB of a 128GB guest under stress. Before: 102 seconds After: 42 seconds Signed-off-by: Qi Xi Signed-off-by: Ming Yang yangming73@huawei.com Message-Id: Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: David Hildenbrand --- hw/mem/pc-dimm.c | 7 +++++++ hw/virtio/virtio-balloon.c | 33 +++++---------------------------- include/hw/boards.h | 2 ++ 3 files changed, 14 insertions(+), 28 deletions(-) diff --git a/hw/mem/pc-dimm.c b/hw/mem/pc-dimm.c index 50ef83215c..37f1f4ccfd 100644 --- a/hw/mem/pc-dimm.c +++ b/hw/mem/pc-dimm.c @@ -81,6 +81,10 @@ void pc_dimm_plug(PCDIMMDevice *dimm, MachineState *machine) memory_device_plug(MEMORY_DEVICE(dimm), machine); vmstate_register_ram(vmstate_mr, DEVICE(dimm)); + /* count only "real" DIMMs, not NVDIMMs */ + if (!object_dynamic_cast(OBJECT(dimm), TYPE_NVDIMM)) { + machine->device_memory->dimm_size += memory_region_size(vmstate_mr); + } } void pc_dimm_unplug(PCDIMMDevice *dimm, MachineState *machine) @@ -90,6 +94,9 @@ void pc_dimm_unplug(PCDIMMDevice *dimm, MachineState *machine) memory_device_unplug(MEMORY_DEVICE(dimm), machine); vmstate_unregister_ram(vmstate_mr, DEVICE(dimm)); + if (!object_dynamic_cast(OBJECT(dimm), TYPE_NVDIMM)) { + machine->device_memory->dimm_size -= memory_region_size(vmstate_mr); + } } static int pc_dimm_slot2bitmap(Object *obj, void *opaque) diff --git a/hw/virtio/virtio-balloon.c b/hw/virtio/virtio-balloon.c index 746f07c4d2..2814a47cb1 100644 --- a/hw/virtio/virtio-balloon.c +++ b/hw/virtio/virtio-balloon.c @@ -729,37 +729,14 @@ static void virtio_balloon_get_config(VirtIODevice *vdev, uint8_t *config_data) memcpy(config_data, &config, virtio_balloon_config_size(dev)); } -static int build_dimm_list(Object *obj, void *opaque) -{ - GSList **list = opaque; - - if (object_dynamic_cast(obj, TYPE_PC_DIMM)) { - DeviceState *dev = DEVICE(obj); - if (dev->realized) { /* only realized DIMMs matter */ - *list = g_slist_prepend(*list, dev); - } - } - - object_child_foreach(obj, build_dimm_list, opaque); - return 0; -} - static ram_addr_t get_current_ram_size(void) { - GSList *list = NULL, *item; - ram_addr_t size = current_machine->ram_size; - - build_dimm_list(qdev_get_machine(), &list); - for (item = list; item; item = g_slist_next(item)) { - Object *obj = OBJECT(item->data); - if (!strcmp(object_get_typename(obj), TYPE_PC_DIMM)) { - size += object_property_get_int(obj, PC_DIMM_SIZE_PROP, - &error_abort); - } + MachineState *machine = MACHINE(qdev_get_machine()); + if (machine->device_memory) { + return machine->ram_size + machine->device_memory->dimm_size; + } else { + return machine->ram_size; } - g_slist_free(list); - - return size; } static bool virtio_balloon_page_poison_support(void *opaque) diff --git a/include/hw/boards.h b/include/hw/boards.h index 6fbbfd56c8..f840f88d54 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -292,10 +292,12 @@ struct MachineClass { * @base: address in guest physical address space where the memory * address space for memory devices starts * @mr: address space container for memory devices + * @dimm_size: the sum of plugged DIMMs' sizes */ typedef struct DeviceMemoryState { hwaddr base; MemoryRegion mr; + uint64_t dimm_size; } DeviceMemoryState; /** From 2f5a2b35f5883fb8e049d877263eecb4c3883fb2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 13 Mar 2023 16:30:26 +0100 Subject: [PATCH 09/31] MAINTAINERS: Mark AMD-Vi emulation as orphan MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit hw/i386/amd_iommu.c seems unmaintained: After commit 1c7955c450 ("x86-iommu: introduce parent class", 2016-07-14), almost no feature added, 2 bug fixes, other changes are generic tree-wide API cleanups. Cc: Roman Kapl Cc: Wei Huang Cc: Brijesh Singh Cc: David Kiarie Cc: Jean-Philippe Brucker Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20230313153031.86107-2-philmd@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- MAINTAINERS | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index 0bd3d1830e..83c0373872 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3367,6 +3367,10 @@ F: hw/i386/intel_iommu.c F: hw/i386/intel_iommu_internal.h F: include/hw/i386/intel_iommu.h +AMD-Vi Emulation +S: Orphan +F: hw/i386/amd_iommu.? + OpenSBI Firmware M: Bin Meng S: Supported From 6291a28645a0656477bc5962a81b181e6a99487c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 13 Mar 2023 16:30:27 +0100 Subject: [PATCH 10/31] hw/i386/amd_iommu: Explicit use of AMDVI_BASE_ADDR in amdvi_init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By accessing MemoryRegion internals, amdvi_init() gives the false idea that the PCI BAR can be modified. However this isn't true (at least the model isn't ready for that): the device is explicitly maps at the BAR at the fixed AMDVI_BASE_ADDR address in amdvi_sysbus_realize(). Since the SysBus API isn't designed to remap regions, directly use the fixed address in amdvi_init(). Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20230313153031.86107-3-philmd@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/amd_iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index bcd016f5c5..3813b341ec 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -1519,9 +1519,9 @@ static void amdvi_init(AMDVIState *s) /* reset AMDVI specific capabilities, all r/o */ pci_set_long(s->pci.dev.config + s->capab_offset, AMDVI_CAPAB_FEATURES); pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_BAR_LOW, - s->mmio.addr & ~(0xffff0000)); + AMDVI_BASE_ADDR & ~(0xffff0000)); pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_BAR_HIGH, - (s->mmio.addr & ~(0xffff)) >> 16); + (AMDVI_BASE_ADDR & ~(0xffff)) >> 16); pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_RANGE, 0xff000000); pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_MISC, 0); From 531f50ab058c6e443194cdda1a8eaf010c6e740a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 13 Mar 2023 16:30:28 +0100 Subject: [PATCH 11/31] hw/i386/amd_iommu: Remove intermediate AMDVIState::devid field MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit AMDVIState::devid is only accessed by build_amd_iommu() which has access to the PCIDevice state. Directly get the property calling object_property_get_int() there. Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20230313153031.86107-4-philmd@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 4 +++- hw/i386/amd_iommu.c | 2 -- hw/i386/amd_iommu.h | 2 -- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index ec857a117e..a27bc33956 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2395,7 +2395,9 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id, /* IVHD length */ build_append_int_noprefix(table_data, ivhd_table_len, 2); /* DeviceID */ - build_append_int_noprefix(table_data, s->devid, 2); + build_append_int_noprefix(table_data, + object_property_get_int(OBJECT(&s->pci), "addr", + &error_abort), 2); /* Capability offset */ build_append_int_noprefix(table_data, s->capab_offset, 2); /* IOMMU base address */ diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 3813b341ec..19f57e6318 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -1513,7 +1513,6 @@ static void amdvi_init(AMDVIState *s) /* reset device ident */ pci_config_set_vendor_id(s->pci.dev.config, PCI_VENDOR_ID_AMD); pci_config_set_prog_interface(s->pci.dev.config, 00); - pci_config_set_device_id(s->pci.dev.config, s->devid); pci_config_set_class(s->pci.dev.config, 0x0806); /* reset AMDVI specific capabilities, all r/o */ @@ -1581,7 +1580,6 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->mmio); sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, AMDVI_BASE_ADDR); pci_setup_iommu(bus, amdvi_host_dma_iommu, s); - s->devid = object_property_get_int(OBJECT(&s->pci), "addr", &error_abort); msi_init(&s->pci.dev, 0, 1, true, false, errp); amdvi_init(s); } diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h index 79d38a3e41..5eccaad790 100644 --- a/hw/i386/amd_iommu.h +++ b/hw/i386/amd_iommu.h @@ -319,8 +319,6 @@ struct AMDVIState { uint64_t mmio_addr; - uint32_t devid; /* auto-assigned devid */ - bool enabled; /* IOMMU enabled */ bool ats_enabled; /* address translation enabled */ bool cmdbuf_enabled; /* command buffer enabled */ From ae097d8fbd405011afc5c35c7f95a90066a97262 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 13 Mar 2023 16:30:29 +0100 Subject: [PATCH 12/31] hw/i386/amd_iommu: Move capab_offset from AMDVIState to AMDVIPCIState MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The 'PCI capability offset' is a *PCI* notion. Since AMDVIPCIState inherits PCIDevice and hold PCI-related fields, move capab_offset from AMDVIState to AMDVIPCIState. Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20230313153031.86107-5-philmd@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 2 +- hw/i386/amd_iommu.c | 14 +++++++------- hw/i386/amd_iommu.h | 2 +- 3 files changed, 9 insertions(+), 9 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index a27bc33956..7f211e1f48 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2399,7 +2399,7 @@ build_amd_iommu(GArray *table_data, BIOSLinker *linker, const char *oem_id, object_property_get_int(OBJECT(&s->pci), "addr", &error_abort), 2); /* Capability offset */ - build_append_int_noprefix(table_data, s->capab_offset, 2); + build_append_int_noprefix(table_data, s->pci.capab_offset, 2); /* IOMMU base address */ build_append_int_noprefix(table_data, s->mmio.addr, 8); /* PCI Segment Group */ diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 19f57e6318..9f6622e11f 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -1516,15 +1516,15 @@ static void amdvi_init(AMDVIState *s) pci_config_set_class(s->pci.dev.config, 0x0806); /* reset AMDVI specific capabilities, all r/o */ - pci_set_long(s->pci.dev.config + s->capab_offset, AMDVI_CAPAB_FEATURES); - pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_BAR_LOW, + pci_set_long(s->pci.dev.config + s->pci.capab_offset, AMDVI_CAPAB_FEATURES); + pci_set_long(s->pci.dev.config + s->pci.capab_offset + AMDVI_CAPAB_BAR_LOW, AMDVI_BASE_ADDR & ~(0xffff0000)); - pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_BAR_HIGH, + pci_set_long(s->pci.dev.config + s->pci.capab_offset + AMDVI_CAPAB_BAR_HIGH, (AMDVI_BASE_ADDR & ~(0xffff)) >> 16); - pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_RANGE, + pci_set_long(s->pci.dev.config + s->pci.capab_offset + AMDVI_CAPAB_RANGE, 0xff000000); - pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_MISC, 0); - pci_set_long(s->pci.dev.config + s->capab_offset + AMDVI_CAPAB_MISC, + pci_set_long(s->pci.dev.config + s->pci.capab_offset + AMDVI_CAPAB_MISC, 0); + pci_set_long(s->pci.dev.config + s->pci.capab_offset + AMDVI_CAPAB_MISC, AMDVI_MAX_PH_ADDR | AMDVI_MAX_GVA_ADDR | AMDVI_MAX_VA_ADDR); } @@ -1557,7 +1557,7 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) if (ret < 0) { return; } - s->capab_offset = ret; + s->pci.capab_offset = ret; ret = pci_add_capability(&s->pci.dev, PCI_CAP_ID_MSI, 0, AMDVI_CAPAB_REG_SIZE, errp); diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h index 5eccaad790..1c0cb54bd4 100644 --- a/hw/i386/amd_iommu.h +++ b/hw/i386/amd_iommu.h @@ -308,6 +308,7 @@ typedef struct AMDVIAddressSpace AMDVIAddressSpace; /* functions to steal PCI config space */ typedef struct AMDVIPCIState { PCIDevice dev; /* The PCI device itself */ + uint32_t capab_offset; /* capability offset pointer */ } AMDVIPCIState; struct AMDVIState { @@ -315,7 +316,6 @@ struct AMDVIState { AMDVIPCIState pci; /* IOMMU PCI device */ uint32_t version; - uint32_t capab_offset; /* capability offset pointer */ uint64_t mmio_addr; From 7f5a459dc8132bd15c28a0d2af1f0cf844ee19f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 13 Mar 2023 16:30:30 +0100 Subject: [PATCH 13/31] hw/i386/amd_iommu: Set PCI static/const fields via PCIDeviceClass MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set PCI static/const fields once in amdvi_pci_class_init. They will be propagated via DeviceClassRealize handler via pci_qdev_realize() -> do_pci_register_device() -> pci_config_set*(). Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20230313153031.86107-6-philmd@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/amd_iommu.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 9f6622e11f..8e4ce63f8e 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -1511,9 +1511,7 @@ static void amdvi_init(AMDVIState *s) amdvi_set_quad(s, AMDVI_MMIO_STATUS, 0, 0x98, 0x67); /* reset device ident */ - pci_config_set_vendor_id(s->pci.dev.config, PCI_VENDOR_ID_AMD); pci_config_set_prog_interface(s->pci.dev.config, 00); - pci_config_set_class(s->pci.dev.config, 0x0806); /* reset AMDVI specific capabilities, all r/o */ pci_set_long(s->pci.dev.config + s->pci.capab_offset, AMDVI_CAPAB_FEATURES); @@ -1623,6 +1621,10 @@ static const TypeInfo amdvi_sysbus = { static void amdvi_pci_class_init(ObjectClass *klass, void *data) { DeviceClass *dc = DEVICE_CLASS(klass); + PCIDeviceClass *k = PCI_DEVICE_CLASS(klass); + + k->vendor_id = PCI_VENDOR_ID_AMD; + k->class_id = 0x0806; set_bit(DEVICE_CATEGORY_MISC, dc->categories); dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device"; From 5ec7755eb7c503fc66fe44083ee5113bd5b87bd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Philippe=20Mathieu-Daud=C3=A9?= Date: Mon, 13 Mar 2023 16:30:31 +0100 Subject: [PATCH 14/31] hw/i386/amd_iommu: Factor amdvi_pci_realize out of amdvi_sysbus_realize MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Aside the Frankenstein model of a SysBusDevice realizing a PCIDevice, QOM parents shouldn't access children internals. In this particular case, amdvi_sysbus_realize() is just open-coding TYPE_AMD_IOMMU_PCI's DeviceRealize() handler. Factor it out. Declare QOM-cast macros with OBJECT_DECLARE_SIMPLE_TYPE() so we can cast the AMDVIPCIState in amdvi_pci_realize(). Note this commit removes the single use in the repository of pci_add_capability() and msi_init() on a *realized* QDev instance. Signed-off-by: Philippe Mathieu-Daudé Message-Id: <20230313153031.86107-7-philmd@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/amd_iommu.c | 62 ++++++++++++++++++++++++++------------------- hw/i386/amd_iommu.h | 5 ++-- 2 files changed, 39 insertions(+), 28 deletions(-) diff --git a/hw/i386/amd_iommu.c b/hw/i386/amd_iommu.c index 8e4ce63f8e..9c77304438 100644 --- a/hw/i386/amd_iommu.c +++ b/hw/i386/amd_iommu.c @@ -1509,20 +1509,48 @@ static void amdvi_init(AMDVIState *s) amdvi_set_quad(s, AMDVI_MMIO_EXT_FEATURES, AMDVI_EXT_FEATURES, 0xffffffffffffffef, 0); amdvi_set_quad(s, AMDVI_MMIO_STATUS, 0, 0x98, 0x67); +} + +static void amdvi_pci_realize(PCIDevice *pdev, Error **errp) +{ + AMDVIPCIState *s = AMD_IOMMU_PCI(pdev); + int ret; + + ret = pci_add_capability(pdev, AMDVI_CAPAB_ID_SEC, 0, + AMDVI_CAPAB_SIZE, errp); + if (ret < 0) { + return; + } + s->capab_offset = ret; + + ret = pci_add_capability(pdev, PCI_CAP_ID_MSI, 0, + AMDVI_CAPAB_REG_SIZE, errp); + if (ret < 0) { + return; + } + ret = pci_add_capability(pdev, PCI_CAP_ID_HT, 0, + AMDVI_CAPAB_REG_SIZE, errp); + if (ret < 0) { + return; + } + + if (msi_init(pdev, 0, 1, true, false, errp) < 0) { + return; + } /* reset device ident */ - pci_config_set_prog_interface(s->pci.dev.config, 00); + pci_config_set_prog_interface(pdev->config, 0); /* reset AMDVI specific capabilities, all r/o */ - pci_set_long(s->pci.dev.config + s->pci.capab_offset, AMDVI_CAPAB_FEATURES); - pci_set_long(s->pci.dev.config + s->pci.capab_offset + AMDVI_CAPAB_BAR_LOW, + pci_set_long(pdev->config + s->capab_offset, AMDVI_CAPAB_FEATURES); + pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_BAR_LOW, AMDVI_BASE_ADDR & ~(0xffff0000)); - pci_set_long(s->pci.dev.config + s->pci.capab_offset + AMDVI_CAPAB_BAR_HIGH, + pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_BAR_HIGH, (AMDVI_BASE_ADDR & ~(0xffff)) >> 16); - pci_set_long(s->pci.dev.config + s->pci.capab_offset + AMDVI_CAPAB_RANGE, + pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_RANGE, 0xff000000); - pci_set_long(s->pci.dev.config + s->pci.capab_offset + AMDVI_CAPAB_MISC, 0); - pci_set_long(s->pci.dev.config + s->pci.capab_offset + AMDVI_CAPAB_MISC, + pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_MISC, 0); + pci_set_long(pdev->config + s->capab_offset + AMDVI_CAPAB_MISC, AMDVI_MAX_PH_ADDR | AMDVI_MAX_GVA_ADDR | AMDVI_MAX_VA_ADDR); } @@ -1536,7 +1564,6 @@ static void amdvi_sysbus_reset(DeviceState *dev) static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) { - int ret = 0; AMDVIState *s = AMD_IOMMU_DEVICE(dev); MachineState *ms = MACHINE(qdev_get_machine()); PCMachineState *pcms = PC_MACHINE(ms); @@ -1550,23 +1577,6 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) if (!qdev_realize(DEVICE(&s->pci), &bus->qbus, errp)) { return; } - ret = pci_add_capability(&s->pci.dev, AMDVI_CAPAB_ID_SEC, 0, - AMDVI_CAPAB_SIZE, errp); - if (ret < 0) { - return; - } - s->pci.capab_offset = ret; - - ret = pci_add_capability(&s->pci.dev, PCI_CAP_ID_MSI, 0, - AMDVI_CAPAB_REG_SIZE, errp); - if (ret < 0) { - return; - } - ret = pci_add_capability(&s->pci.dev, PCI_CAP_ID_HT, 0, - AMDVI_CAPAB_REG_SIZE, errp); - if (ret < 0) { - return; - } /* Pseudo address space under root PCI bus. */ x86ms->ioapic_as = amdvi_host_dma_iommu(bus, s, AMDVI_IOAPIC_SB_DEVID); @@ -1578,7 +1588,6 @@ static void amdvi_sysbus_realize(DeviceState *dev, Error **errp) sysbus_init_mmio(SYS_BUS_DEVICE(s), &s->mmio); sysbus_mmio_map(SYS_BUS_DEVICE(s), 0, AMDVI_BASE_ADDR); pci_setup_iommu(bus, amdvi_host_dma_iommu, s); - msi_init(&s->pci.dev, 0, 1, true, false, errp); amdvi_init(s); } @@ -1625,6 +1634,7 @@ static void amdvi_pci_class_init(ObjectClass *klass, void *data) k->vendor_id = PCI_VENDOR_ID_AMD; k->class_id = 0x0806; + k->realize = amdvi_pci_realize; set_bit(DEVICE_CATEGORY_MISC, dc->categories); dc->desc = "AMD IOMMU (AMD-Vi) DMA Remapping device"; diff --git a/hw/i386/amd_iommu.h b/hw/i386/amd_iommu.h index 1c0cb54bd4..6da893ee57 100644 --- a/hw/i386/amd_iommu.h +++ b/hw/i386/amd_iommu.h @@ -300,16 +300,17 @@ struct irte_ga { OBJECT_DECLARE_SIMPLE_TYPE(AMDVIState, AMD_IOMMU_DEVICE) #define TYPE_AMD_IOMMU_PCI "AMDVI-PCI" +OBJECT_DECLARE_SIMPLE_TYPE(AMDVIPCIState, AMD_IOMMU_PCI) #define TYPE_AMD_IOMMU_MEMORY_REGION "amd-iommu-iommu-memory-region" typedef struct AMDVIAddressSpace AMDVIAddressSpace; /* functions to steal PCI config space */ -typedef struct AMDVIPCIState { +struct AMDVIPCIState { PCIDevice dev; /* The PCI device itself */ uint32_t capab_offset; /* capability offset pointer */ -} AMDVIPCIState; +}; struct AMDVIState { X86IOMMUState iommu; /* IOMMU bus device */ From 0259dd3e6ffe35145068ddc1bfb5c3ec06b33f48 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Tue, 14 Mar 2023 18:30:09 +0100 Subject: [PATCH 15/31] hw: Add compat machines for 8.1 Add 8.1 machine types for arm/i440fx/m68k/q35/s390x/spapr. Signed-off-by: Cornelia Huck Message-Id: <20230314173009.152667-1-cohuck@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/arm/virt.c | 11 +++++++++-- hw/core/machine.c | 3 +++ hw/i386/pc.c | 3 +++ hw/i386/pc_piix.c | 16 +++++++++++++--- hw/i386/pc_q35.c | 14 ++++++++++++-- hw/m68k/virt.c | 11 +++++++++-- hw/ppc/spapr.c | 17 ++++++++++++++--- hw/s390x/s390-virtio-ccw.c | 14 +++++++++++++- include/hw/boards.h | 3 +++ include/hw/i386/pc.h | 3 +++ 10 files changed, 82 insertions(+), 13 deletions(-) diff --git a/hw/arm/virt.c b/hw/arm/virt.c index ac626b3bef..267fe56fae 100644 --- a/hw/arm/virt.c +++ b/hw/arm/virt.c @@ -3232,10 +3232,17 @@ static void machvirt_machine_init(void) } type_init(machvirt_machine_init); -static void virt_machine_8_0_options(MachineClass *mc) +static void virt_machine_8_1_options(MachineClass *mc) { } -DEFINE_VIRT_MACHINE_AS_LATEST(8, 0) +DEFINE_VIRT_MACHINE_AS_LATEST(8, 1) + +static void virt_machine_8_0_options(MachineClass *mc) +{ + virt_machine_8_1_options(mc); + compat_props_add(mc->compat_props, hw_compat_8_0, hw_compat_8_0_len); +} +DEFINE_VIRT_MACHINE(8, 0) static void virt_machine_7_2_options(MachineClass *mc) { diff --git a/hw/core/machine.c b/hw/core/machine.c index cd13b8b0a3..2ce97a5d3b 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -39,6 +39,9 @@ #include "hw/virtio/virtio.h" #include "hw/virtio/virtio-pci.h" +GlobalProperty hw_compat_8_0[] = {}; +const size_t hw_compat_8_0_len = G_N_ELEMENTS(hw_compat_8_0); + GlobalProperty hw_compat_7_2[] = { { "e1000e", "migrate-timadj", "off" }, { "virtio-mem", "x-early-migration", "false" }, diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 1489abf010..615e1d3d06 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -116,6 +116,9 @@ { "qemu64-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, },\ { "athlon-" TYPE_X86_CPU, "model-id", "QEMU Virtual CPU version " v, }, +GlobalProperty pc_compat_8_0[] = {}; +const size_t pc_compat_8_0_len = G_N_ELEMENTS(pc_compat_8_0); + GlobalProperty pc_compat_7_2[] = { { "ICH9-LPC", "noreboot", "true" }, }; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 30eedd62a3..21591dad8d 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -454,21 +454,31 @@ static void pc_i440fx_machine_options(MachineClass *m) machine_class_allow_dynamic_sysbus_dev(m, TYPE_VMBUS_BRIDGE); } -static void pc_i440fx_8_0_machine_options(MachineClass *m) +static void pc_i440fx_8_1_machine_options(MachineClass *m) { pc_i440fx_machine_options(m); m->alias = "pc"; m->is_default = true; } +DEFINE_I440FX_MACHINE(v8_1, "pc-i440fx-8.1", NULL, + pc_i440fx_8_1_machine_options); + +static void pc_i440fx_8_0_machine_options(MachineClass *m) +{ + pc_i440fx_8_1_machine_options(m); + m->alias = NULL; + m->is_default = false; + compat_props_add(m->compat_props, hw_compat_8_0, hw_compat_8_0_len); + compat_props_add(m->compat_props, pc_compat_8_0, pc_compat_8_0_len); +} + DEFINE_I440FX_MACHINE(v8_0, "pc-i440fx-8.0", NULL, pc_i440fx_8_0_machine_options); static void pc_i440fx_7_2_machine_options(MachineClass *m) { pc_i440fx_8_0_machine_options(m); - m->alias = NULL; - m->is_default = false; compat_props_add(m->compat_props, hw_compat_7_2, hw_compat_7_2_len); compat_props_add(m->compat_props, pc_compat_7_2, pc_compat_7_2_len); } diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 797ba347fd..f02919d92c 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -373,19 +373,29 @@ static void pc_q35_machine_options(MachineClass *m) m->max_cpus = 288; } -static void pc_q35_8_0_machine_options(MachineClass *m) +static void pc_q35_8_1_machine_options(MachineClass *m) { pc_q35_machine_options(m); m->alias = "q35"; } +DEFINE_Q35_MACHINE(v8_1, "pc-q35-8.1", NULL, + pc_q35_8_1_machine_options); + +static void pc_q35_8_0_machine_options(MachineClass *m) +{ + pc_q35_8_1_machine_options(m); + m->alias = NULL; + compat_props_add(m->compat_props, hw_compat_8_0, hw_compat_8_0_len); + compat_props_add(m->compat_props, pc_compat_8_0, pc_compat_8_0_len); +} + DEFINE_Q35_MACHINE(v8_0, "pc-q35-8.0", NULL, pc_q35_8_0_machine_options); static void pc_q35_7_2_machine_options(MachineClass *m) { pc_q35_8_0_machine_options(m); - m->alias = NULL; compat_props_add(m->compat_props, hw_compat_7_2, hw_compat_7_2_len); compat_props_add(m->compat_props, pc_compat_7_2, pc_compat_7_2_len); } diff --git a/hw/m68k/virt.c b/hw/m68k/virt.c index 754b9bdfcc..731205b215 100644 --- a/hw/m68k/virt.c +++ b/hw/m68k/virt.c @@ -347,10 +347,17 @@ type_init(virt_machine_register_types) } \ type_init(machvirt_machine_##major##_##minor##_init); -static void virt_machine_8_0_options(MachineClass *mc) +static void virt_machine_8_1_options(MachineClass *mc) { } -DEFINE_VIRT_MACHINE(8, 0, true) +DEFINE_VIRT_MACHINE(8, 1, true) + +static void virt_machine_8_0_options(MachineClass *mc) +{ + virt_machine_8_1_options(mc); + compat_props_add(mc->compat_props, hw_compat_8_0, hw_compat_8_0_len); +} +DEFINE_VIRT_MACHINE(8, 0, false) static void virt_machine_7_2_options(MachineClass *mc) { diff --git a/hw/ppc/spapr.c b/hw/ppc/spapr.c index 4921198b9d..ddc9c7b1a1 100644 --- a/hw/ppc/spapr.c +++ b/hw/ppc/spapr.c @@ -4735,14 +4735,25 @@ static void spapr_machine_latest_class_options(MachineClass *mc) type_init(spapr_machine_register_##suffix) /* - * pseries-8.0 + * pseries-8.1 */ -static void spapr_machine_8_0_class_options(MachineClass *mc) +static void spapr_machine_8_1_class_options(MachineClass *mc) { /* Defaults for the latest behaviour inherited from the base class */ } -DEFINE_SPAPR_MACHINE(8_0, "8.0", true); +DEFINE_SPAPR_MACHINE(8_1, "8.1", true); + +/* + * pseries-8.0 + */ +static void spapr_machine_8_0_class_options(MachineClass *mc) +{ + spapr_machine_8_1_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_8_0, hw_compat_8_0_len); +} + +DEFINE_SPAPR_MACHINE(8_0, "8.0", false); /* * pseries-7.2 diff --git a/hw/s390x/s390-virtio-ccw.c b/hw/s390x/s390-virtio-ccw.c index 503f212a31..e6f2c62625 100644 --- a/hw/s390x/s390-virtio-ccw.c +++ b/hw/s390x/s390-virtio-ccw.c @@ -826,14 +826,26 @@ bool css_migration_enabled(void) } \ type_init(ccw_machine_register_##suffix) +static void ccw_machine_8_1_instance_options(MachineState *machine) +{ +} + +static void ccw_machine_8_1_class_options(MachineClass *mc) +{ +} +DEFINE_CCW_MACHINE(8_1, "8.1", true); + static void ccw_machine_8_0_instance_options(MachineState *machine) { + ccw_machine_8_1_instance_options(machine); } static void ccw_machine_8_0_class_options(MachineClass *mc) { + ccw_machine_8_1_class_options(mc); + compat_props_add(mc->compat_props, hw_compat_8_0, hw_compat_8_0_len); } -DEFINE_CCW_MACHINE(8_0, "8.0", true); +DEFINE_CCW_MACHINE(8_0, "8.0", false); static void ccw_machine_7_2_instance_options(MachineState *machine) { diff --git a/include/hw/boards.h b/include/hw/boards.h index f840f88d54..f4117fdb9a 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -383,6 +383,9 @@ struct MachineState { } \ type_init(machine_initfn##_register_types) +extern GlobalProperty hw_compat_8_0[]; +extern const size_t hw_compat_8_0_len; + extern GlobalProperty hw_compat_7_2[]; extern const size_t hw_compat_7_2_len; diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 8206d5405a..eb668e9034 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -195,6 +195,9 @@ void pc_madt_cpu_entry(int uid, const CPUArchIdList *apic_ids, /* sgx.c */ void pc_machine_init_sgx_epc(PCMachineState *pcms); +extern GlobalProperty pc_compat_8_0[]; +extern const size_t pc_compat_8_0_len; + extern GlobalProperty pc_compat_7_2[]; extern const size_t pc_compat_7_2_len; From b93fe7f2ca9aea5ef74db5881aabecd7b1c234ed Mon Sep 17 00:00:00 2001 From: Chuck Zmudzinski Date: Wed, 15 Mar 2023 10:26:19 -0400 Subject: [PATCH 16/31] pci: avoid accessing slot_reserved_mask directly outside of pci.c This patch provides accessor functions as replacements for direct access to slot_reserved_mask according to the comment at the top of include/hw/pci/pci_bus.h which advises that data structures for PCIBus should not be directly accessed but instead be accessed using accessor functions in pci.h. Three accessor functions can conveniently replace all direct accesses of slot_reserved_mask. With this patch, the new accessor functions are used in hw/sparc64/sun4u.c and hw/xen/xen_pt.c and pci_bus.h is removed from the included header files of the same two files. No functional change intended. Signed-off-by: Chuck Zmudzinski Message-Id: Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Mark Cave-Ayland Tested-by: Mark Cave-Ayland [sun4u] --- hw/pci/pci.c | 15 +++++++++++++++ hw/sparc64/sun4u.c | 7 +++---- hw/xen/xen_pt.c | 7 +++---- include/hw/pci/pci.h | 3 +++ 4 files changed, 24 insertions(+), 8 deletions(-) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index def5000e7b..8a87ccc8b0 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -1116,6 +1116,21 @@ static bool pci_bus_devfn_reserved(PCIBus *bus, int devfn) return bus->slot_reserved_mask & (1UL << PCI_SLOT(devfn)); } +uint32_t pci_bus_get_slot_reserved_mask(PCIBus *bus) +{ + return bus->slot_reserved_mask; +} + +void pci_bus_set_slot_reserved_mask(PCIBus *bus, uint32_t mask) +{ + bus->slot_reserved_mask |= mask; +} + +void pci_bus_clear_slot_reserved_mask(PCIBus *bus, uint32_t mask) +{ + bus->slot_reserved_mask &= ~mask; +} + /* -1 for devfn means auto assign */ static PCIDevice *do_pci_register_device(PCIDevice *pci_dev, const char *name, int devfn, diff --git a/hw/sparc64/sun4u.c b/hw/sparc64/sun4u.c index a25e951f9d..eae7589462 100644 --- a/hw/sparc64/sun4u.c +++ b/hw/sparc64/sun4u.c @@ -31,7 +31,6 @@ #include "hw/irq.h" #include "hw/pci/pci.h" #include "hw/pci/pci_bridge.h" -#include "hw/pci/pci_bus.h" #include "hw/pci/pci_host.h" #include "hw/qdev-properties.h" #include "hw/pci-host/sabre.h" @@ -608,9 +607,9 @@ static void sun4uv_init(MemoryRegion *address_space_mem, /* Only in-built Simba APBs can exist on the root bus, slot 0 on busA is reserved (leaving no slots free after on-board devices) however slots 0-3 are free on busB */ - pci_bus->slot_reserved_mask = 0xfffffffc; - pci_busA->slot_reserved_mask = 0xfffffff1; - pci_busB->slot_reserved_mask = 0xfffffff0; + pci_bus_set_slot_reserved_mask(pci_bus, 0xfffffffc); + pci_bus_set_slot_reserved_mask(pci_busA, 0xfffffff1); + pci_bus_set_slot_reserved_mask(pci_busB, 0xfffffff0); ebus = pci_new_multifunction(PCI_DEVFN(1, 0), true, TYPE_EBUS); qdev_prop_set_uint64(DEVICE(ebus), "console-serial-base", diff --git a/hw/xen/xen_pt.c b/hw/xen/xen_pt.c index 2d33d178ad..a540149639 100644 --- a/hw/xen/xen_pt.c +++ b/hw/xen/xen_pt.c @@ -57,7 +57,6 @@ #include #include "hw/pci/pci.h" -#include "hw/pci/pci_bus.h" #include "hw/qdev-properties.h" #include "hw/qdev-properties-system.h" #include "xen_pt.h" @@ -951,7 +950,7 @@ void xen_igd_reserve_slot(PCIBus *pci_bus) } XEN_PT_LOG(0, "Reserving PCI slot 2 for IGD\n"); - pci_bus->slot_reserved_mask |= XEN_PCI_IGD_SLOT_MASK; + pci_bus_set_slot_reserved_mask(pci_bus, XEN_PCI_IGD_SLOT_MASK); } static void xen_igd_clear_slot(DeviceState *qdev, Error **errp) @@ -971,7 +970,7 @@ static void xen_igd_clear_slot(DeviceState *qdev, Error **errp) return; } - if (!(pci_bus->slot_reserved_mask & XEN_PCI_IGD_SLOT_MASK)) { + if (!(pci_bus_get_slot_reserved_mask(pci_bus) & XEN_PCI_IGD_SLOT_MASK)) { xpdc->pci_qdev_realize(qdev, errp); return; } @@ -982,7 +981,7 @@ static void xen_igd_clear_slot(DeviceState *qdev, Error **errp) s->real_device.dev == XEN_PCI_IGD_DEV && s->real_device.func == XEN_PCI_IGD_FN && s->real_device.vendor_id == PCI_VENDOR_ID_INTEL) { - pci_bus->slot_reserved_mask &= ~XEN_PCI_IGD_SLOT_MASK; + pci_bus_clear_slot_reserved_mask(pci_bus, XEN_PCI_IGD_SLOT_MASK); XEN_PT_LOG(pci_dev, "Intel IGD found, using slot 2\n"); } xpdc->pci_qdev_realize(qdev, errp); diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index d5a40cd058..935b4b91b4 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -287,6 +287,9 @@ void pci_bus_irqs(PCIBus *bus, pci_set_irq_fn set_irq, void pci_bus_map_irqs(PCIBus *bus, pci_map_irq_fn map_irq); void pci_bus_irqs_cleanup(PCIBus *bus); int pci_bus_get_irq_level(PCIBus *bus, int irq_num); +uint32_t pci_bus_get_slot_reserved_mask(PCIBus *bus); +void pci_bus_set_slot_reserved_mask(PCIBus *bus, uint32_t mask); +void pci_bus_clear_slot_reserved_mask(PCIBus *bus, uint32_t mask); /* 0 <= pin <= 3 0 = INTA, 1 = INTB, 2 = INTC, 3 = INTD */ static inline int pci_swizzle(int slot, int pin) { From ca858a5fe94c0325bfe5f764f1bb090b160264a3 Mon Sep 17 00:00:00 2001 From: Vladimir Sementsov-Ogievskiy Date: Tue, 21 Mar 2023 23:13:23 +0300 Subject: [PATCH 17/31] vhost-user-blk-server: notify client about disk resize Currently block_resize qmp command is simply ignored by vhost-user-blk export. So, the block-node is successfully resized, but virtio config is unchanged and guest doesn't see that disk is resized. Let's handle the resize by modifying the config and notifying the guest appropriately. After this comment, lsblk in linux guest with attached vhost-user-blk-pci device shows new size immediately after block_resize QMP command on vhost-user exported block node. Signed-off-by: Vladimir Sementsov-Ogievskiy Message-Id: <20230321201323.3695923-1-vsementsov@yandex-team.ru> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- block/export/vhost-user-blk-server.c | 24 +++++++++++++++++++++++ subprojects/libvhost-user/libvhost-user.c | 10 ++++++++++ subprojects/libvhost-user/libvhost-user.h | 2 ++ 3 files changed, 36 insertions(+) diff --git a/block/export/vhost-user-blk-server.c b/block/export/vhost-user-blk-server.c index 3409d9e02e..e56b92f2e2 100644 --- a/block/export/vhost-user-blk-server.c +++ b/block/export/vhost-user-blk-server.c @@ -10,6 +10,7 @@ * later. See the COPYING file in the top-level directory. */ #include "qemu/osdep.h" +#include "qemu/error-report.h" #include "block/block.h" #include "subprojects/libvhost-user/libvhost-user.h" /* only for the type definitions */ #include "standard-headers/linux/virtio_blk.h" @@ -251,6 +252,27 @@ static void vu_blk_exp_request_shutdown(BlockExport *exp) vhost_user_server_stop(&vexp->vu_server); } +static void vu_blk_exp_resize(void *opaque) +{ + VuBlkExport *vexp = opaque; + BlockDriverState *bs = blk_bs(vexp->handler.blk); + int64_t new_size = bdrv_getlength(bs); + + if (new_size < 0) { + error_printf("Failed to get length of block node '%s'", + bdrv_get_node_name(bs)); + return; + } + + vexp->blkcfg.capacity = cpu_to_le64(new_size >> VIRTIO_BLK_SECTOR_BITS); + + vu_config_change_msg(&vexp->vu_server.vu_dev); +} + +static const BlockDevOps vu_blk_dev_ops = { + .resize_cb = vu_blk_exp_resize, +}; + static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts, Error **errp) { @@ -292,6 +314,8 @@ static int vu_blk_exp_create(BlockExport *exp, BlockExportOptions *opts, blk_add_aio_context_notifier(exp->blk, blk_aio_attached, blk_aio_detach, vexp); + blk_set_dev_ops(exp->blk, &vu_blk_dev_ops, vexp); + if (!vhost_user_server_start(&vexp->vu_server, vu_opts->addr, exp->ctx, num_queues, &vu_blk_iface, errp)) { blk_remove_aio_context_notifier(exp->blk, blk_aio_attached, diff --git a/subprojects/libvhost-user/libvhost-user.c b/subprojects/libvhost-user/libvhost-user.c index 0200b78e8e..0abd898a52 100644 --- a/subprojects/libvhost-user/libvhost-user.c +++ b/subprojects/libvhost-user/libvhost-user.c @@ -2455,6 +2455,16 @@ void vu_queue_notify_sync(VuDev *dev, VuVirtq *vq) _vu_queue_notify(dev, vq, true); } +void vu_config_change_msg(VuDev *dev) +{ + VhostUserMsg vmsg = { + .request = VHOST_USER_BACKEND_CONFIG_CHANGE_MSG, + .flags = VHOST_USER_VERSION, + }; + + vu_message_write(dev, dev->slave_fd, &vmsg); +} + static inline void vring_used_flags_set_bit(VuVirtq *vq, int mask) { diff --git a/subprojects/libvhost-user/libvhost-user.h b/subprojects/libvhost-user/libvhost-user.h index 8c5a2719e3..49208cceaa 100644 --- a/subprojects/libvhost-user/libvhost-user.h +++ b/subprojects/libvhost-user/libvhost-user.h @@ -585,6 +585,8 @@ bool vu_queue_empty(VuDev *dev, VuVirtq *vq); */ void vu_queue_notify(VuDev *dev, VuVirtq *vq); +void vu_config_change_msg(VuDev *dev); + /** * vu_queue_notify_sync: * @dev: a VuDev context From 104593c3ad48f3857e9c48d9e3e7feb3566444c7 Mon Sep 17 00:00:00 2001 From: Ani Sinha Date: Wed, 29 Mar 2023 09:38:34 +0530 Subject: [PATCH 18/31] Add my old and new work email mapping and use work email to support acpi Updating mailmap to indicate ani@anisinha.ca and anisinha@redhat.com are one and the same person. Also updating my email in MAINTAINERS for all my acpi work (reviewing patches and biosbits) to my work email. Also doing the same for bios bits test framework documentation. Signed-off-by: Ani Sinha Message-Id: <20230329040834.11973-1-anisinha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- MAINTAINERS | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/MAINTAINERS b/MAINTAINERS index 83c0373872..e365a7a47e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -1894,7 +1894,7 @@ F: hw/pci/pcie_doe.c ACPI/SMBIOS M: Michael S. Tsirkin M: Igor Mammedov -R: Ani Sinha +R: Ani Sinha S: Supported F: include/hw/acpi/* F: include/hw/firmware/smbios.h From 1af507756bae775028c27d30e602e2b9c72cd074 Mon Sep 17 00:00:00 2001 From: Ani Sinha Date: Wed, 29 Mar 2023 10:27:26 +0530 Subject: [PATCH 19/31] hw/acpi: limit warning on acpi table size to pc machines older than version 2.3 i440fx machine versions 2.3 and newer supports dynamic ram resizing. See commit a1666142db6233 ("acpi-build: make ROMs RAM blocks resizeable") . Currently supported all q35 machine types (versions 2.4 and newer) supports resizable RAM/ROM blocks.Therefore the warning generated when the ACPI table size exceeds a pre-defined value does not apply to those machine versions. Add a check limiting the warning message to only those machines that does not support expandable ram blocks (that is, i440fx machines with version 2.2 and older). Signed-off-by: Ani Sinha Message-Id: <20230329045726.14028-1-anisinha@redhat.com> Reviewed-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/acpi-build.c | 6 ++++-- hw/i386/pc.c | 1 + hw/i386/pc_piix.c | 1 + include/hw/i386/pc.h | 3 +++ 4 files changed, 9 insertions(+), 2 deletions(-) diff --git a/hw/i386/acpi-build.c b/hw/i386/acpi-build.c index 7f211e1f48..512162003b 100644 --- a/hw/i386/acpi-build.c +++ b/hw/i386/acpi-build.c @@ -2697,7 +2697,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) int legacy_table_size = ROUND_UP(tables_blob->len - aml_len + legacy_aml_len, ACPI_BUILD_ALIGN_SIZE); - if (tables_blob->len > legacy_table_size) { + if ((tables_blob->len > legacy_table_size) && + !pcmc->resizable_acpi_blob) { /* Should happen only with PCI bridges and -M pc-i440fx-2.0. */ warn_report("ACPI table size %u exceeds %d bytes," " migration may not work", @@ -2708,7 +2709,8 @@ void acpi_build(AcpiBuildTables *tables, MachineState *machine) g_array_set_size(tables_blob, legacy_table_size); } else { /* Make sure we have a buffer in case we need to resize the tables. */ - if (tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) { + if ((tables_blob->len > ACPI_BUILD_TABLE_SIZE / 2) && + !pcmc->resizable_acpi_blob) { /* As of QEMU 2.1, this fires with 160 VCPUs and 255 memory slots. */ warn_report("ACPI table size %u exceeds %d bytes," " migration may not work", diff --git a/hw/i386/pc.c b/hw/i386/pc.c index 615e1d3d06..d761c8c775 100644 --- a/hw/i386/pc.c +++ b/hw/i386/pc.c @@ -1946,6 +1946,7 @@ static void pc_machine_class_init(ObjectClass *oc, void *data) pcmc->acpi_data_size = 0x20000 + 0x8000; pcmc->pvh_enabled = true; pcmc->kvmclock_create_always = true; + pcmc->resizable_acpi_blob = true; assert(!mc->get_hotplug_handler); mc->get_hotplug_handler = pc_get_hotplug_handler; mc->hotplug_allowed = pc_hotplug_allowed; diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 21591dad8d..66a849d279 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -756,6 +756,7 @@ static void pc_i440fx_2_2_machine_options(MachineClass *m) compat_props_add(m->compat_props, hw_compat_2_2, hw_compat_2_2_len); compat_props_add(m->compat_props, pc_compat_2_2, pc_compat_2_2_len); pcmc->rsdp_in_ram = false; + pcmc->resizable_acpi_blob = false; } DEFINE_I440FX_MACHINE(v2_2, "pc-i440fx-2.2", pc_compat_2_2_fn, diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index eb668e9034..84935fc958 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -127,6 +127,9 @@ struct PCMachineClass { /* create kvmclock device even when KVM PV features are not exposed */ bool kvmclock_create_always; + + /* resizable acpi blob compat */ + bool resizable_acpi_blob; }; #define TYPE_PC_MACHINE "generic-pc-machine" From 094e29a27184430d6d82ad4bea1d3311fc48297e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 30 Mar 2023 15:11:09 +0200 Subject: [PATCH 20/31] tests: bios-tables-test: replace memset with initializer Coverity complains that memset() writes over a const field. Use an initializer instead, so that the const field is left to zero. Tests that have to write the const field already use an initializer for the whole struct, here I am choosing the smallest possible patch (which is not that small already). Cc: Michael S. Tsirkin Signed-off-by: Paolo Bonzini Message-Id: <20230330131109.47856-1-pbonzini@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Thomas Huth --- tests/qtest/bios-tables-test.c | 123 ++++++++++++--------------------- 1 file changed, 43 insertions(+), 80 deletions(-) diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c index 8733589437..464f87382e 100644 --- a/tests/qtest/bios-tables-test.c +++ b/tests/qtest/bios-tables-test.c @@ -438,10 +438,9 @@ static void test_acpi_asl(test_data *data) { int i; AcpiSdtTable *sdt, *exp_sdt; - test_data exp_data; + test_data exp_data = {}; gboolean exp_err, err, all_tables_match = true; - memset(&exp_data, 0, sizeof(exp_data)); exp_data.tables = load_expected_aml(data); dump_aml_files(data, false); for (i = 0; i < data->tables->len; ++i) { @@ -853,12 +852,11 @@ static uint8_t base_required_struct_types[] = { static void test_acpi_piix4_tcg(void) { - test_data data; + test_data data = {}; /* Supplying -machine accel argument overrides the default (qtest). * This is to make guest actually run. */ - memset(&data, 0, sizeof(data)); data.machine = MACHINE_PC; data.required_struct_types = base_required_struct_types; data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types); @@ -868,9 +866,8 @@ static void test_acpi_piix4_tcg(void) static void test_acpi_piix4_tcg_bridge(void) { - test_data data; + test_data data = {}; - memset(&data, 0, sizeof(data)); data.machine = MACHINE_PC; data.variant = ".bridge"; data.required_struct_types = base_required_struct_types; @@ -906,9 +903,8 @@ static void test_acpi_piix4_tcg_bridge(void) static void test_acpi_piix4_no_root_hotplug(void) { - test_data data; + test_data data = {}; - memset(&data, 0, sizeof(data)); data.machine = MACHINE_PC; data.variant = ".roothp"; data.required_struct_types = base_required_struct_types; @@ -923,9 +919,8 @@ static void test_acpi_piix4_no_root_hotplug(void) static void test_acpi_piix4_no_bridge_hotplug(void) { - test_data data; + test_data data = {}; - memset(&data, 0, sizeof(data)); data.machine = MACHINE_PC; data.variant = ".hpbridge"; data.required_struct_types = base_required_struct_types; @@ -940,9 +935,8 @@ static void test_acpi_piix4_no_bridge_hotplug(void) static void test_acpi_piix4_no_acpi_pci_hotplug(void) { - test_data data; + test_data data = {}; - memset(&data, 0, sizeof(data)); data.machine = MACHINE_PC; data.variant = ".hpbrroot"; data.required_struct_types = base_required_struct_types; @@ -962,9 +956,8 @@ static void test_acpi_piix4_no_acpi_pci_hotplug(void) static void test_acpi_q35_tcg(void) { - test_data data; + test_data data = {}; - memset(&data, 0, sizeof(data)); data.machine = MACHINE_Q35; data.required_struct_types = base_required_struct_types; data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types); @@ -994,9 +987,8 @@ static void test_acpi_q35_tcg_core_count2(void) static void test_acpi_q35_tcg_bridge(void) { - test_data data; + test_data data = {}; - memset(&data, 0, sizeof(data)); data.machine = MACHINE_Q35; data.variant = ".bridge"; data.required_struct_types = base_required_struct_types; @@ -1009,9 +1001,8 @@ static void test_acpi_q35_tcg_bridge(void) static void test_acpi_q35_tcg_no_acpi_hotplug(void) { - test_data data; + test_data data = {}; - memset(&data, 0, sizeof(data)); data.machine = MACHINE_Q35; data.variant = ".noacpihp"; data.required_struct_types = base_required_struct_types; @@ -1105,9 +1096,8 @@ static void test_acpi_q35_tcg_mmio64(void) static void test_acpi_piix4_tcg_cphp(void) { - test_data data; + test_data data = {}; - memset(&data, 0, sizeof(data)); data.machine = MACHINE_PC; data.variant = ".cphp"; test_acpi_one("-smp 2,cores=3,sockets=2,maxcpus=6" @@ -1121,9 +1111,8 @@ static void test_acpi_piix4_tcg_cphp(void) static void test_acpi_q35_tcg_cphp(void) { - test_data data; + test_data data = {}; - memset(&data, 0, sizeof(data)); data.machine = MACHINE_Q35; data.variant = ".cphp"; test_acpi_one(" -smp 2,cores=3,sockets=2,maxcpus=6" @@ -1141,9 +1130,8 @@ static uint8_t ipmi_required_struct_types[] = { static void test_acpi_q35_tcg_ipmi(void) { - test_data data; + test_data data = {}; - memset(&data, 0, sizeof(data)); data.machine = MACHINE_Q35; data.variant = ".ipmibt"; data.required_struct_types = ipmi_required_struct_types; @@ -1156,9 +1144,8 @@ static void test_acpi_q35_tcg_ipmi(void) static void test_acpi_q35_tcg_smbus_ipmi(void) { - test_data data; + test_data data = {}; - memset(&data, 0, sizeof(data)); data.machine = MACHINE_Q35; data.variant = ".ipmismbus"; data.required_struct_types = ipmi_required_struct_types; @@ -1171,12 +1158,11 @@ static void test_acpi_q35_tcg_smbus_ipmi(void) static void test_acpi_piix4_tcg_ipmi(void) { - test_data data; + test_data data = {}; /* Supplying -machine accel argument overrides the default (qtest). * This is to make guest actually run. */ - memset(&data, 0, sizeof(data)); data.machine = MACHINE_PC; data.variant = ".ipmikcs"; data.required_struct_types = ipmi_required_struct_types; @@ -1189,9 +1175,8 @@ static void test_acpi_piix4_tcg_ipmi(void) static void test_acpi_q35_tcg_memhp(void) { - test_data data; + test_data data = {}; - memset(&data, 0, sizeof(data)); data.machine = MACHINE_Q35; data.variant = ".memhp"; test_acpi_one(" -m 128,slots=3,maxmem=1G" @@ -1205,9 +1190,8 @@ static void test_acpi_q35_tcg_memhp(void) static void test_acpi_piix4_tcg_memhp(void) { - test_data data; + test_data data = {}; - memset(&data, 0, sizeof(data)); data.machine = MACHINE_PC; data.variant = ".memhp"; test_acpi_one(" -m 128,slots=3,maxmem=1G" @@ -1221,9 +1205,8 @@ static void test_acpi_piix4_tcg_memhp(void) static void test_acpi_piix4_tcg_nosmm(void) { - test_data data; + test_data data = {}; - memset(&data, 0, sizeof(data)); data.machine = MACHINE_PC; data.variant = ".nosmm"; test_acpi_one("-machine smm=off", &data); @@ -1232,9 +1215,8 @@ static void test_acpi_piix4_tcg_nosmm(void) static void test_acpi_piix4_tcg_smm_compat(void) { - test_data data; + test_data data = {}; - memset(&data, 0, sizeof(data)); data.machine = MACHINE_PC; data.variant = ".smm-compat"; test_acpi_one("-global PIIX4_PM.smm-compat=on", &data); @@ -1243,9 +1225,8 @@ static void test_acpi_piix4_tcg_smm_compat(void) static void test_acpi_piix4_tcg_smm_compat_nosmm(void) { - test_data data; + test_data data = {}; - memset(&data, 0, sizeof(data)); data.machine = MACHINE_PC; data.variant = ".smm-compat-nosmm"; test_acpi_one("-global PIIX4_PM.smm-compat=on -machine smm=off", &data); @@ -1254,9 +1235,8 @@ static void test_acpi_piix4_tcg_smm_compat_nosmm(void) static void test_acpi_piix4_tcg_nohpet(void) { - test_data data; + test_data data = {}; - memset(&data, 0, sizeof(data)); data.machine = MACHINE_PC; data.machine_param = ",hpet=off"; data.variant = ".nohpet"; @@ -1266,9 +1246,8 @@ static void test_acpi_piix4_tcg_nohpet(void) static void test_acpi_q35_tcg_numamem(void) { - test_data data; + test_data data = {}; - memset(&data, 0, sizeof(data)); data.machine = MACHINE_Q35; data.variant = ".numamem"; test_acpi_one(" -object memory-backend-ram,id=ram0,size=128M" @@ -1278,9 +1257,8 @@ static void test_acpi_q35_tcg_numamem(void) static void test_acpi_q35_kvm_xapic(void) { - test_data data; + test_data data = {}; - memset(&data, 0, sizeof(data)); data.machine = MACHINE_Q35; data.variant = ".xapic"; test_acpi_one(" -object memory-backend-ram,id=ram0,size=128M" @@ -1291,9 +1269,8 @@ static void test_acpi_q35_kvm_xapic(void) static void test_acpi_q35_tcg_nosmm(void) { - test_data data; + test_data data = {}; - memset(&data, 0, sizeof(data)); data.machine = MACHINE_Q35; data.variant = ".nosmm"; test_acpi_one("-machine smm=off", &data); @@ -1302,9 +1279,8 @@ static void test_acpi_q35_tcg_nosmm(void) static void test_acpi_q35_tcg_smm_compat(void) { - test_data data; + test_data data = {}; - memset(&data, 0, sizeof(data)); data.machine = MACHINE_Q35; data.variant = ".smm-compat"; test_acpi_one("-global ICH9-LPC.smm-compat=on", &data); @@ -1313,9 +1289,8 @@ static void test_acpi_q35_tcg_smm_compat(void) static void test_acpi_q35_tcg_smm_compat_nosmm(void) { - test_data data; + test_data data = {}; - memset(&data, 0, sizeof(data)); data.machine = MACHINE_Q35; data.variant = ".smm-compat-nosmm"; test_acpi_one("-global ICH9-LPC.smm-compat=on -machine smm=off", &data); @@ -1324,9 +1299,8 @@ static void test_acpi_q35_tcg_smm_compat_nosmm(void) static void test_acpi_q35_tcg_nohpet(void) { - test_data data; + test_data data = {}; - memset(&data, 0, sizeof(data)); data.machine = MACHINE_Q35; data.machine_param = ",hpet=off"; data.variant = ".nohpet"; @@ -1336,9 +1310,8 @@ static void test_acpi_q35_tcg_nohpet(void) static void test_acpi_q35_kvm_dmar(void) { - test_data data; + test_data data = {}; - memset(&data, 0, sizeof(data)); data.machine = MACHINE_Q35; data.variant = ".dmar"; test_acpi_one("-machine kernel-irqchip=split -accel kvm" @@ -1348,9 +1321,8 @@ static void test_acpi_q35_kvm_dmar(void) static void test_acpi_q35_tcg_ivrs(void) { - test_data data; + test_data data = {}; - memset(&data, 0, sizeof(data)); data.machine = MACHINE_Q35; data.variant = ".ivrs"; data.tcg_only = true, @@ -1360,9 +1332,8 @@ static void test_acpi_q35_tcg_ivrs(void) static void test_acpi_piix4_tcg_numamem(void) { - test_data data; + test_data data = {}; - memset(&data, 0, sizeof(data)); data.machine = MACHINE_PC; data.variant = ".numamem"; test_acpi_one(" -object memory-backend-ram,id=ram0,size=128M" @@ -1379,7 +1350,7 @@ static void test_acpi_tcg_tpm(const char *machine, const char *tpm_if, machine, tpm_if); char *tmp_path = g_dir_make_tmp(tmp_dir_name, NULL); TPMTestState test; - test_data data; + test_data data = {}; GThread *thread; const char *suffix = tpm_version == TPM_VERSION_2_0 ? "tpm2" : "tpm12"; char *args, *variant = g_strdup_printf(".%s.%s", tpm_if, suffix); @@ -1399,7 +1370,6 @@ static void test_acpi_tcg_tpm(const char *machine, const char *tpm_if, thread = g_thread_new(NULL, tpm_emu_ctrl_thread, &test); tpm_emu_test_wait_cond(&test); - memset(&data, 0, sizeof(data)); data.machine = machine; data.variant = variant; @@ -1434,9 +1404,8 @@ static void test_acpi_q35_tcg_tpm12_tis(void) static void test_acpi_tcg_dimm_pxm(const char *machine) { - test_data data; + test_data data = {}; - memset(&data, 0, sizeof(data)); data.machine = machine; data.variant = ".dimmpxm"; test_acpi_one(" -machine nvdimm=on,nvdimm-persistence=cpu" @@ -1504,7 +1473,6 @@ static void test_acpi_virt_tcg_memhp(void) static void test_acpi_microvm_prepare(test_data *data) { - memset(data, 0, sizeof(*data)); data->machine = "microvm"; data->required_struct_types = NULL; /* no smbios */ data->required_struct_types_len = 0; @@ -1513,7 +1481,7 @@ static void test_acpi_microvm_prepare(test_data *data) static void test_acpi_microvm_tcg(void) { - test_data data; + test_data data = {}; test_acpi_microvm_prepare(&data); test_acpi_one(" -machine microvm,acpi=on,ioapic2=off,rtc=off", @@ -1523,7 +1491,7 @@ static void test_acpi_microvm_tcg(void) static void test_acpi_microvm_usb_tcg(void) { - test_data data; + test_data data = {}; test_acpi_microvm_prepare(&data); data.variant = ".usb"; @@ -1534,7 +1502,7 @@ static void test_acpi_microvm_usb_tcg(void) static void test_acpi_microvm_rtc_tcg(void) { - test_data data; + test_data data = {}; test_acpi_microvm_prepare(&data); data.variant = ".rtc"; @@ -1545,7 +1513,7 @@ static void test_acpi_microvm_rtc_tcg(void) static void test_acpi_microvm_pcie_tcg(void) { - test_data data; + test_data data = {}; test_acpi_microvm_prepare(&data); data.variant = ".pcie"; @@ -1557,7 +1525,7 @@ static void test_acpi_microvm_pcie_tcg(void) static void test_acpi_microvm_ioapic2_tcg(void) { - test_data data; + test_data data = {}; test_acpi_microvm_prepare(&data); data.variant = ".ioapic2"; @@ -1622,9 +1590,8 @@ static void test_acpi_virt_tcg_pxb(void) static void test_acpi_tcg_acpi_hmat(const char *machine) { - test_data data; + test_data data = {}; - memset(&data, 0, sizeof(data)); data.machine = machine; data.variant = ".acpihmat"; test_acpi_one(" -machine hmat=on" @@ -1721,9 +1688,8 @@ static void test_acpi_virt_tcg_acpi_hmat(void) static void test_acpi_q35_tcg_acpi_hmat_noinitiator(void) { - test_data data; + test_data data = {}; - memset(&data, 0, sizeof(data)); data.machine = MACHINE_Q35; data.variant = ".acpihmat-noinitiator"; test_acpi_one(" -machine hmat=on" @@ -1772,9 +1738,8 @@ static void test_acpi_erst(const char *machine) { gchar *tmp_path = g_dir_make_tmp("qemu-test-erst.XXXXXX", NULL); gchar *params; - test_data data; + test_data data = {}; - memset(&data, 0, sizeof(data)); data.machine = machine; data.variant = ".acpierst"; params = g_strdup_printf( @@ -1802,7 +1767,7 @@ static void test_acpi_microvm_acpi_erst(void) { gchar *tmp_path = g_dir_make_tmp("qemu-test-erst.XXXXXX", NULL); gchar *params; - test_data data; + test_data data = {}; test_acpi_microvm_prepare(&data); data.variant = ".pcie"; @@ -2003,10 +1968,9 @@ static void test_oem_fields(test_data *data) static void test_acpi_piix4_oem_fields(void) { - test_data data; char *args; + test_data data = {}; - memset(&data, 0, sizeof(data)); data.machine = MACHINE_PC; data.required_struct_types = base_required_struct_types; data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types); @@ -2022,10 +1986,9 @@ static void test_acpi_piix4_oem_fields(void) static void test_acpi_q35_oem_fields(void) { - test_data data; char *args; + test_data data = {}; - memset(&data, 0, sizeof(data)); data.machine = MACHINE_Q35; data.required_struct_types = base_required_struct_types; data.required_struct_types_len = ARRAY_SIZE(base_required_struct_types); @@ -2041,7 +2004,7 @@ static void test_acpi_q35_oem_fields(void) static void test_acpi_microvm_oem_fields(void) { - test_data data; + test_data data = {}; char *args; test_acpi_microvm_prepare(&data); From 2b6fc0b859a1b8a5bc2a48c56e8cb595748b7c3f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Fri, 31 Mar 2023 17:04:10 +0200 Subject: [PATCH 21/31] =?UTF-8?q?MAINTAINERS:=20Add=20Eugenio=20P=C3=A9rez?= =?UTF-8?q?=20as=20vhost-shadow-virtqueue=20reviewer?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I'd like to be notified on SVQ patches and review them. Signed-off-by: Eugenio Pérez Message-Id: <20230331150410.2627214-1-eperezma@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- MAINTAINERS | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index e365a7a47e..5e2d4b2c2c 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2075,6 +2075,10 @@ F: backends/vhost-user.c F: include/sysemu/vhost-user-backend.h F: subprojects/libvhost-user/ +vhost-shadow-virtqueue +R: Eugenio Pérez +F: hw/virtio/vhost-shadow-virtqueue.* + virtio M: Michael S. Tsirkin S: Supported From 8a9ede6f511c5a028e1c1fc949a97ff30c36bebe Mon Sep 17 00:00:00 2001 From: Stefan Weil Date: Sun, 9 Apr 2023 22:18:28 +0200 Subject: [PATCH 22/31] docs/cxl: Fix sentence Signed-off-by: Stefan Weil Message-Id: <20230409201828.1159568-1-sw@weilnetz.de> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- docs/system/devices/cxl.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/system/devices/cxl.rst b/docs/system/devices/cxl.rst index f25783a4ec..4c38223069 100644 --- a/docs/system/devices/cxl.rst +++ b/docs/system/devices/cxl.rst @@ -111,7 +111,7 @@ Interfaces provided include: CXL Root Ports (CXL RP) ~~~~~~~~~~~~~~~~~~~~~~~ -A CXL Root Port servers te same purpose as a PCIe Root Port. +A CXL Root Port serves the same purpose as a PCIe Root Port. There are a number of CXL specific Designated Vendor Specific Extended Capabilities (DVSEC) in PCIe Configuration Space and associated component register access via PCI bars. From ec1a78cee97001b0ed25b5866e92dae058eb5877 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 12 Apr 2023 15:35:10 +0800 Subject: [PATCH 23/31] intel_iommu: refine iotlb hash calculation Commit 1b2b12376c8 ("intel-iommu: PASID support") takes PASID into account when calculating iotlb hash like: static guint vtd_iotlb_hash(gconstpointer v) { const struct vtd_iotlb_key *key = v; return key->gfn | ((key->sid) << VTD_IOTLB_SID_SHIFT) | (key->level) << VTD_IOTLB_LVL_SHIFT | (key->pasid) << VTD_IOTLB_PASID_SHIFT; } This turns out to be problematic since: - the shift will lose bits if not converting to uint64_t - level should be off by one in order to fit into 2 bits - VTD_IOTLB_PASID_SHIFT is 30 but PASID is 20 bits which will waste some bits - the hash result is uint64_t so we will lose bits when converting to guint So this patch fixes them by - converting the keys into uint64_t before doing the shift - off level by one to make it fit into two bits - change the sid, lvl and pasid shift to 26, 42 and 44 in order to take the full width of uint64_t - perform an XOR to the top 32bit with the bottom 32bit for the final result to fit guint Fixes: Coverity CID 1508100 Fixes: 1b2b12376c8 ("intel-iommu: PASID support") Signed-off-by: Jason Wang Message-Id: <20230412073510.7158-1-jasowang@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Peter Xu --- hw/i386/intel_iommu.c | 9 +++++---- hw/i386/intel_iommu_internal.h | 6 +++--- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/hw/i386/intel_iommu.c b/hw/i386/intel_iommu.c index a62896759c..94d52f4205 100644 --- a/hw/i386/intel_iommu.c +++ b/hw/i386/intel_iommu.c @@ -64,8 +64,8 @@ struct vtd_as_key { struct vtd_iotlb_key { uint64_t gfn; uint32_t pasid; - uint32_t level; uint16_t sid; + uint8_t level; }; static void vtd_address_space_refresh_all(IntelIOMMUState *s); @@ -221,10 +221,11 @@ static gboolean vtd_iotlb_equal(gconstpointer v1, gconstpointer v2) static guint vtd_iotlb_hash(gconstpointer v) { const struct vtd_iotlb_key *key = v; + uint64_t hash64 = key->gfn | ((uint64_t)(key->sid) << VTD_IOTLB_SID_SHIFT) | + (uint64_t)(key->level - 1) << VTD_IOTLB_LVL_SHIFT | + (uint64_t)(key->pasid) << VTD_IOTLB_PASID_SHIFT; - return key->gfn | ((key->sid) << VTD_IOTLB_SID_SHIFT) | - (key->level) << VTD_IOTLB_LVL_SHIFT | - (key->pasid) << VTD_IOTLB_PASID_SHIFT; + return (guint)((hash64 >> 32) ^ (hash64 & 0xffffffffU)); } static gboolean vtd_as_equal(gconstpointer v1, gconstpointer v2) diff --git a/hw/i386/intel_iommu_internal.h b/hw/i386/intel_iommu_internal.h index f090e61e11..2e61eec2f5 100644 --- a/hw/i386/intel_iommu_internal.h +++ b/hw/i386/intel_iommu_internal.h @@ -114,9 +114,9 @@ VTD_INTERRUPT_ADDR_FIRST + 1) /* The shift of source_id in the key of IOTLB hash table */ -#define VTD_IOTLB_SID_SHIFT 20 -#define VTD_IOTLB_LVL_SHIFT 28 -#define VTD_IOTLB_PASID_SHIFT 30 +#define VTD_IOTLB_SID_SHIFT 26 +#define VTD_IOTLB_LVL_SHIFT 42 +#define VTD_IOTLB_PASID_SHIFT 44 #define VTD_IOTLB_MAX_SIZE 1024 /* Max size of the hash table */ /* IOTLB_REG */ From 2a3f8b333b6efbc0ccaacb318fb9c9d53f25bcfd Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Fri, 14 Apr 2023 18:04:41 +0900 Subject: [PATCH 24/31] docs: Remove obsolete descriptions of SR-IOV support The documentation used to say there is no device implemented with SR-IOV, but igb and nvme support SR-IOV today. Signed-off-by: Akihiko Odaki Message-Id: <20230414090441.23156-1-akihiko.odaki@daynix.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- docs/pcie_sriov.txt | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/docs/pcie_sriov.txt b/docs/pcie_sriov.txt index 11158dbf88..7eff7f2703 100644 --- a/docs/pcie_sriov.txt +++ b/docs/pcie_sriov.txt @@ -9,10 +9,7 @@ virtual functions (VFs) for the main purpose of eliminating software overhead in I/O from virtual machines. QEMU now implements the basic common functionality to enable an emulated device -to support SR/IOV. Yet no fully implemented devices exists in QEMU, but a -proof-of-concept hack of the Intel igb can be found here: - -git://github.com/knuto/qemu.git sriov_patches_v5 +to support SR/IOV. Implementation ============== From 91208dd297f2686b778210ffda49acbfe36bd3b6 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Tue, 18 Apr 2023 09:24:54 +0530 Subject: [PATCH 25/31] virtio: i2c: Check notifier helpers for VIRTIO_CONFIG_IRQ_IDX Since the driver doesn't support interrupts, we must return early when index is set to VIRTIO_CONFIG_IRQ_IDX. Fixes: 544f0278afca ("virtio: introduce macro VIRTIO_CONFIG_IRQ_IDX") Signed-off-by: Viresh Kumar Message-Id: Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-user-i2c.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/hw/virtio/vhost-user-i2c.c b/hw/virtio/vhost-user-i2c.c index 60eaf0d95b..4eef3f0633 100644 --- a/hw/virtio/vhost-user-i2c.c +++ b/hw/virtio/vhost-user-i2c.c @@ -128,6 +128,14 @@ static void vu_i2c_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask) { VHostUserI2C *i2c = VHOST_USER_I2C(vdev); + /* + * We don't support interrupts, return early if index is set to + * VIRTIO_CONFIG_IRQ_IDX. + */ + if (idx == VIRTIO_CONFIG_IRQ_IDX) { + return; + } + vhost_virtqueue_mask(&i2c->vhost_dev, vdev, idx, mask); } @@ -135,6 +143,14 @@ static bool vu_i2c_guest_notifier_pending(VirtIODevice *vdev, int idx) { VHostUserI2C *i2c = VHOST_USER_I2C(vdev); + /* + * We don't support interrupts, return early if index is set to + * VIRTIO_CONFIG_IRQ_IDX. + */ + if (idx == VIRTIO_CONFIG_IRQ_IDX) { + return false; + } + return vhost_virtqueue_pending(&i2c->vhost_dev, idx); } From 0f689cf5ada4d5df5ab95c7f7aa9fc221afa855d Mon Sep 17 00:00:00 2001 From: Igor Mammedov Date: Tue, 18 Apr 2023 11:04:49 +0200 Subject: [PATCH 26/31] acpi: pcihp: allow repeating hot-unplug requests with Q35 using ACPI PCI hotplug by default, user's request to unplug device is ignored when it's issued before guest OS has been booted. And any additional attempt to request device hot-unplug afterwards results in following error: "Device XYZ is already in the process of unplug" arguably it can be considered as a regression introduced by [2], before which it was possible to issue unplug request multiple times. Accept new uplug requests after timeout (1ms). This brings ACPI PCI hotplug on par with native PCIe unplug behavior [1] and allows user to repeat unplug requests at propper times. Set expire timeout to arbitrary 1msec so user won't be able to flood guest with SCI interrupts by calling device_del in tight loop. PS: ACPI spec doesn't mandate what OSPM can do with GPEx.status bits set before it's booted => it's impl. depended. Status bits may be retained (I tested with one Windows version) or cleared (Linux since 2.6 kernel times) during guest's ACPI subsystem initialization. Clearing status bits (though not wrong per se) hides the unplug event from guest, and it's upto user to repeat device_del later when guest is able to handle unplug requests. 1) 18416c62e3 ("pcie: expire pending delete") 2) Fixes: cce8944cc9ef ("qdev-monitor: Forbid repeated device_del") Signed-off-by: Igor Mammedov Acked-by: Gerd Hoffmann CC: mst@redhat.com CC: anisinha@redhat.com CC: jusual@redhat.com CC: kraxel@redhat.com Message-Id: <20230418090449.2155757-1-imammedo@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Ani Sinha --- hw/acpi/pcihp.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/hw/acpi/pcihp.c b/hw/acpi/pcihp.c index dcfb779a7a..cdd6f775a1 100644 --- a/hw/acpi/pcihp.c +++ b/hw/acpi/pcihp.c @@ -357,6 +357,16 @@ void acpi_pcihp_device_unplug_request_cb(HotplugHandler *hotplug_dev, * acpi_pcihp_eject_slot() when the operation is completed. */ pdev->qdev.pending_deleted_event = true; + /* if unplug was requested before OSPM is initialized, + * linux kernel will clear GPE0.sts[] bits during boot, which effectively + * hides unplug event. And than followup qmp_device_del() calls remain + * blocked by above flag permanently. + * Unblock qmp_device_del() by setting expire limit, so user can + * repeat unplug request later when OSPM has been booted. + */ + pdev->qdev.pending_deleted_expires_ms = + qemu_clock_get_ms(QEMU_CLOCK_VIRTUAL); /* 1 msec */ + s->acpi_pcihp_pci_status[bsel].down |= (1U << slot); acpi_send_event(DEVICE(hotplug_dev), ACPI_PCI_HOTPLUG_STATUS); } From 0c0e21d1c1e2dde4f7437bcd5c53127013cc25f7 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 20 Apr 2023 17:03:32 +0100 Subject: [PATCH 27/31] docs/specs/pci-ids: Convert from txt to rST Convert the pci-ids document from plain text to reStructuredText. I opted to use definition-lists here because rST tables are super-clunky, and actually formatting these as tables didn't seem necessary. Signed-off-by: Peter Maydell Message-Id: <20230420160334.1048224-2-peter.maydell@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- docs/specs/index.rst | 1 + docs/specs/pci-ids.rst | 98 ++++++++++++++++++++++++++++++++++++++++++ docs/specs/pci-ids.txt | 70 ------------------------------ 3 files changed, 99 insertions(+), 70 deletions(-) create mode 100644 docs/specs/pci-ids.rst delete mode 100644 docs/specs/pci-ids.txt diff --git a/docs/specs/index.rst b/docs/specs/index.rst index a58d9311cb..8aa0fcb77a 100644 --- a/docs/specs/index.rst +++ b/docs/specs/index.rst @@ -8,6 +8,7 @@ guest hardware that is specific to QEMU. .. toctree:: :maxdepth: 2 + pci-ids ppc-xive ppc-spapr-xive ppc-spapr-numa diff --git a/docs/specs/pci-ids.rst b/docs/specs/pci-ids.rst new file mode 100644 index 0000000000..e1cf022006 --- /dev/null +++ b/docs/specs/pci-ids.rst @@ -0,0 +1,98 @@ +================ +PCI IDs for QEMU +================ + +Red Hat, Inc. donates a part of its device ID range to QEMU, to be used for +virtual devices. The vendor IDs are 1af4 (formerly Qumranet ID) and 1b36. + +Contact Gerd Hoffmann to get a device ID assigned +for your devices. + +1af4 vendor ID +-------------- + +The 1000 -> 10ff device ID range is used as follows for virtio-pci devices. +Note that this allocation is separate from the virtio device IDs, which are +maintained as part of the virtio specification. + +1af4:1000 + network device (legacy) +1af4:1001 + block device (legacy) +1af4:1002 + balloon device (legacy) +1af4:1003 + console device (legacy) +1af4:1004 + SCSI host bus adapter device (legacy) +1af4:1005 + entropy generator device (legacy) +1af4:1009 + 9p filesystem device (legacy) +1af4:1012 + vsock device (bug compatibility) + +1af4:1040 to 1af4:10ef + ID range for modern virtio devices. The PCI device + ID is calculated from the virtio device ID by adding the + 0x1040 offset. The virtio IDs are defined in the virtio + specification. The Linux kernel has a header file with + defines for all virtio IDs (``linux/virtio_ids.h``); QEMU has a + copy in ``include/standard-headers/``. + +1af4:10f0 to 1a4f:10ff + Available for experimental usage without registration. Must get + official ID when the code leaves the test lab (i.e. when seeking + upstream merge or shipping a distro/product) to avoid conflicts. + +1af4:1100 + Used as PCI Subsystem ID for existing hardware devices emulated + by QEMU. + +1af4:1110 + ivshmem device (shared memory, ``docs/specs/ivshmem-spec.txt``) + +All other device IDs are reserved. + +1b36 vendor ID +-------------- + +The 0000 -> 00ff device ID range is used as follows for QEMU-specific +PCI devices (other than virtio): + +1b36:0001 + PCI-PCI bridge +1b36:0002 + PCI serial port (16550A) adapter (``docs/specs/pci-serial.txt``) +1b36:0003 + PCI Dual-port 16550A adapter (``docs/specs/pci-serial.txt``) +1b36:0004 + PCI Quad-port 16550A adapter (``docs/specs/pci-serial.txt``) +1b36:0005 + PCI test device (``docs/specs/pci-testdev.txt``) +1b36:0006 + PCI Rocker Ethernet switch device +1b36:0007 + PCI SD Card Host Controller Interface (SDHCI) +1b36:0008 + PCIe host bridge +1b36:0009 + PCI Expander Bridge (-device pxb) +1b36:000a + PCI-PCI bridge (multiseat) +1b36:000b + PCIe Expander Bridge (-device pxb-pcie) +1b36:000d + PCI xhci usb host adapter +1b36:000f + mdpy (mdev sample device), ``linux/samples/vfio-mdev/mdpy.c`` +1b36:0010 + PCIe NVMe device (``-device nvme``) +1b36:0011 + PCI PVPanic device (``-device pvpanic-pci``) +1b36:0012 + PCI ACPI ERST device (``-device acpi-erst``) + +All these devices are documented in :doc:`index`. + +The 0100 device ID is used for the QXL video card device. diff --git a/docs/specs/pci-ids.txt b/docs/specs/pci-ids.txt deleted file mode 100644 index e463c4cb3a..0000000000 --- a/docs/specs/pci-ids.txt +++ /dev/null @@ -1,70 +0,0 @@ - -PCI IDs for qemu -================ - -Red Hat, Inc. donates a part of its device ID range to qemu, to be used for -virtual devices. The vendor IDs are 1af4 (formerly Qumranet ID) and 1b36. - -Contact Gerd Hoffmann to get a device ID assigned -for your devices. - -1af4 vendor ID --------------- - -The 1000 -> 10ff device ID range is used as follows for virtio-pci devices. -Note that this allocation separate from the virtio device IDs, which are -maintained as part of the virtio specification. - -1af4:1000 network device (legacy) -1af4:1001 block device (legacy) -1af4:1002 balloon device (legacy) -1af4:1003 console device (legacy) -1af4:1004 SCSI host bus adapter device (legacy) -1af4:1005 entropy generator device (legacy) -1af4:1009 9p filesystem device (legacy) -1af4:1012 vsock device (bug compatibility) - -1af4:1040 Start of ID range for modern virtio devices. The PCI device - to ID is calculated from the virtio device ID by adding the -1af4:10ef 0x1040 offset. The virtio IDs are defined in the virtio - specification. The Linux kernel has a header file with - defines for all virtio IDs (linux/virtio_ids.h), qemu has a - copy in include/standard-headers/. - -1af4:10f0 Available for experimental usage without registration. Must get - to official ID when the code leaves the test lab (i.e. when seeking -1af4:10ff upstream merge or shipping a distro/product) to avoid conflicts. - -1af4:1100 Used as PCI Subsystem ID for existing hardware devices emulated - by qemu. - -1af4:1110 ivshmem device (shared memory, docs/specs/ivshmem-spec.txt) - -All other device IDs are reserved. - -1b36 vendor ID --------------- - -The 0000 -> 00ff device ID range is used as follows for QEMU-specific -PCI devices (other than virtio): - -1b36:0001 PCI-PCI bridge -1b36:0002 PCI serial port (16550A) adapter (docs/specs/pci-serial.txt) -1b36:0003 PCI Dual-port 16550A adapter (docs/specs/pci-serial.txt) -1b36:0004 PCI Quad-port 16550A adapter (docs/specs/pci-serial.txt) -1b36:0005 PCI test device (docs/specs/pci-testdev.txt) -1b36:0006 PCI Rocker Ethernet switch device -1b36:0007 PCI SD Card Host Controller Interface (SDHCI) -1b36:0008 PCIe host bridge -1b36:0009 PCI Expander Bridge (-device pxb) -1b36:000a PCI-PCI bridge (multiseat) -1b36:000b PCIe Expander Bridge (-device pxb-pcie) -1b36:000d PCI xhci usb host adapter -1b36:000f mdpy (mdev sample device), linux/samples/vfio-mdev/mdpy.c -1b36:0010 PCIe NVMe device (-device nvme) -1b36:0011 PCI PVPanic device (-device pvpanic-pci) -1b36:0012 PCI ACPI ERST device (-device acpi-erst) - -All these devices are documented in docs/specs. - -The 0100 device ID is used for the QXL video card device. From 3669b594d88930f9e5d7c83e038d7d9ca5a75378 Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 20 Apr 2023 17:03:33 +0100 Subject: [PATCH 28/31] docs/specs: Convert pci-serial.txt to rst MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Convert pci-serial.txt to reStructuredText. This includes some wordsmithing, and the correction of the docs to note that the Windows inf file includes 2x and 4x support (as it has done since commit dc9528fdf9f61 in 2014). Signed-off-by: Peter Maydell Message-Id: <20230420160334.1048224-3-peter.maydell@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Philippe Mathieu-Daudé --- docs/specs/index.rst | 1 + docs/specs/pci-ids.rst | 6 +++--- docs/specs/pci-serial.rst | 37 +++++++++++++++++++++++++++++++++++++ docs/specs/pci-serial.txt | 34 ---------------------------------- hw/char/serial-pci-multi.c | 2 +- hw/char/serial-pci.c | 2 +- 6 files changed, 43 insertions(+), 39 deletions(-) create mode 100644 docs/specs/pci-serial.rst delete mode 100644 docs/specs/pci-serial.txt diff --git a/docs/specs/index.rst b/docs/specs/index.rst index 8aa0fcb77a..dfa136073c 100644 --- a/docs/specs/index.rst +++ b/docs/specs/index.rst @@ -9,6 +9,7 @@ guest hardware that is specific to QEMU. :maxdepth: 2 pci-ids + pci-serial ppc-xive ppc-spapr-xive ppc-spapr-numa diff --git a/docs/specs/pci-ids.rst b/docs/specs/pci-ids.rst index e1cf022006..7bc7168fb6 100644 --- a/docs/specs/pci-ids.rst +++ b/docs/specs/pci-ids.rst @@ -63,11 +63,11 @@ PCI devices (other than virtio): 1b36:0001 PCI-PCI bridge 1b36:0002 - PCI serial port (16550A) adapter (``docs/specs/pci-serial.txt``) + PCI serial port (16550A) adapter (:doc:`pci-serial`) 1b36:0003 - PCI Dual-port 16550A adapter (``docs/specs/pci-serial.txt``) + PCI Dual-port 16550A adapter (:doc:`pci-serial`) 1b36:0004 - PCI Quad-port 16550A adapter (``docs/specs/pci-serial.txt``) + PCI Quad-port 16550A adapter (:doc:`pci-serial`) 1b36:0005 PCI test device (``docs/specs/pci-testdev.txt``) 1b36:0006 diff --git a/docs/specs/pci-serial.rst b/docs/specs/pci-serial.rst new file mode 100644 index 0000000000..8d916a3669 --- /dev/null +++ b/docs/specs/pci-serial.rst @@ -0,0 +1,37 @@ +======================= +QEMU PCI serial devices +======================= + +QEMU implements some PCI serial devices which are simple PCI +wrappers around one or more 16550 UARTs. + +There is one single-port variant and two multiport-variants. Linux +guests work out-of-the box with all cards. There is a Windows inf file +(``docs/qemupciserial.inf``) to set up the cards in Windows guests. + + +Single-port card +---------------- + +Name: + ``pci-serial`` +PCI ID: + 1b36:0002 +PCI Region 0: + IO bar, 8 bytes long, with the 16550 UART mapped to it. +Interrupt: + Wired to pin A. + + +Multiport cards +--------------- + +Name: + ``pci-serial-2x``, ``pci-serial-4x`` +PCI ID: + 1b36:0003 (``-2x``) and 1b36:0004 (``-4x``) +PCI Region 0: + IO bar, with two or four 16550 UARTs mapped after each other. + The first is at offset 0, the second at offset 8, and so on. +Interrupt: + Wired to pin A. diff --git a/docs/specs/pci-serial.txt b/docs/specs/pci-serial.txt deleted file mode 100644 index 66c761f2b4..0000000000 --- a/docs/specs/pci-serial.txt +++ /dev/null @@ -1,34 +0,0 @@ - -QEMU pci serial devices -======================= - -There is one single-port variant and two muliport-variants. Linux -guests out-of-the box with all cards. There is a Windows inf file -(docs/qemupciserial.inf) to setup the single-port card in Windows -guests. - - -single-port card ----------------- - -Name: pci-serial -PCI ID: 1b36:0002 - -PCI Region 0: - IO bar, 8 bytes long, with the 16550 uart mapped to it. - Interrupt is wired to pin A. - - -multiport cards ---------------- - -Name: pci-serial-2x -PCI ID: 1b36:0003 - -Name: pci-serial-4x -PCI ID: 1b36:0004 - -PCI Region 0: - IO bar, with two/four 16550 uart mapped after each other. - The first is at offset 0, second at offset 8, ... - Interrupt is wired to pin A. diff --git a/hw/char/serial-pci-multi.c b/hw/char/serial-pci-multi.c index f18b8dcce5..5d65c534cb 100644 --- a/hw/char/serial-pci-multi.c +++ b/hw/char/serial-pci-multi.c @@ -25,7 +25,7 @@ * THE SOFTWARE. */ -/* see docs/specs/pci-serial.txt */ +/* see docs/specs/pci-serial.rst */ #include "qemu/osdep.h" #include "qapi/error.h" diff --git a/hw/char/serial-pci.c b/hw/char/serial-pci.c index 801b769aba..087da3059a 100644 --- a/hw/char/serial-pci.c +++ b/hw/char/serial-pci.c @@ -23,7 +23,7 @@ * THE SOFTWARE. */ -/* see docs/specs/pci-serial.txt */ +/* see docs/specs/pci-serial.rst */ #include "qemu/osdep.h" #include "qapi/error.h" From 4d58309388003837f8eba72c5643722060a5656a Mon Sep 17 00:00:00 2001 From: Peter Maydell Date: Thu, 20 Apr 2023 17:03:34 +0100 Subject: [PATCH 29/31] docs/specs: Convert pci-testdev.txt to rst Convert pci-testdev.txt to reStructuredText. Includes some minor wordsmithing. Signed-off-by: Peter Maydell Message-Id: <20230420160334.1048224-4-peter.maydell@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- docs/specs/index.rst | 1 + docs/specs/pci-ids.rst | 2 +- docs/specs/pci-testdev.rst | 39 ++++++++++++++++++++++++++++++++++++++ docs/specs/pci-testdev.txt | 31 ------------------------------ 4 files changed, 41 insertions(+), 32 deletions(-) create mode 100644 docs/specs/pci-testdev.rst delete mode 100644 docs/specs/pci-testdev.txt diff --git a/docs/specs/index.rst b/docs/specs/index.rst index dfa136073c..e58be38c41 100644 --- a/docs/specs/index.rst +++ b/docs/specs/index.rst @@ -10,6 +10,7 @@ guest hardware that is specific to QEMU. pci-ids pci-serial + pci-testdev ppc-xive ppc-spapr-xive ppc-spapr-numa diff --git a/docs/specs/pci-ids.rst b/docs/specs/pci-ids.rst index 7bc7168fb6..e302bea484 100644 --- a/docs/specs/pci-ids.rst +++ b/docs/specs/pci-ids.rst @@ -69,7 +69,7 @@ PCI devices (other than virtio): 1b36:0004 PCI Quad-port 16550A adapter (:doc:`pci-serial`) 1b36:0005 - PCI test device (``docs/specs/pci-testdev.txt``) + PCI test device (:doc:`pci-testdev`) 1b36:0006 PCI Rocker Ethernet switch device 1b36:0007 diff --git a/docs/specs/pci-testdev.rst b/docs/specs/pci-testdev.rst new file mode 100644 index 0000000000..4b6d36543b --- /dev/null +++ b/docs/specs/pci-testdev.rst @@ -0,0 +1,39 @@ +==================== +QEMU PCI test device +==================== + +``pci-testdev`` is a device used for testing low level IO. + +The device implements up to three BARs: BAR0, BAR1 and BAR2. +Each of BAR 0+1 can be memory or IO. Guests must detect +BAR types and act accordingly. + +BAR 0+1 size is up to 4K bytes each. +BAR 0+1 starts with the following header: + +.. code-block:: c + + typedef struct PCITestDevHdr { + uint8_t test; /* write-only, starts a given test number */ + uint8_t width_type; /* + * read-only, type and width of access for a given test. + * 1,2,4 for byte,word or long write. + * any other value if test not supported on this BAR + */ + uint8_t pad0[2]; + uint32_t offset; /* read-only, offset in this BAR for a given test */ + uint32_t data; /* read-only, data to use for a given test */ + uint32_t count; /* for debugging. number of writes detected. */ + uint8_t name[]; /* for debugging. 0-terminated ASCII string. */ + } PCITestDevHdr; + +All registers are little endian. + +The device is expected to always implement tests 0 to N on each BAR, and to add new +tests with higher numbers. In this way a guest can scan test numbers until it +detects an access type that it does not support on this BAR, then stop. + +BAR2 is a 64bit memory BAR, without backing storage. It is disabled +by default and can be enabled using the ``membar=`` property. This +can be used to test whether guests handle PCI BARs of a specific +(possibly quite large) size correctly. diff --git a/docs/specs/pci-testdev.txt b/docs/specs/pci-testdev.txt deleted file mode 100644 index 4280a1e73c..0000000000 --- a/docs/specs/pci-testdev.txt +++ /dev/null @@ -1,31 +0,0 @@ -pci-test is a device used for testing low level IO - -device implements up to three BARs: BAR0, BAR1 and BAR2. -Each of BAR 0+1 can be memory or IO. Guests must detect -BAR types and act accordingly. - -BAR 0+1 size is up to 4K bytes each. -BAR 0+1 starts with the following header: - -typedef struct PCITestDevHdr { - uint8_t test; <- write-only, starts a given test number - uint8_t width_type; <- read-only, type and width of access for a given test. - 1,2,4 for byte,word or long write. - any other value if test not supported on this BAR - uint8_t pad0[2]; - uint32_t offset; <- read-only, offset in this BAR for a given test - uint32_t data; <- read-only, data to use for a given test - uint32_t count; <- for debugging. number of writes detected. - uint8_t name[]; <- for debugging. 0-terminated ASCII string. -} PCITestDevHdr; - -All registers are little endian. - -device is expected to always implement tests 0 to N on each BAR, and to add new -tests with higher numbers. In this way a guest can scan test numbers until it -detects an access type that it does not support on this BAR, then stop. - -BAR2 is a 64bit memory bar, without backing storage. It is disabled -by default and can be enabled using the membar= property. This -can be used to test whether guests handle pci bars of a specific -(possibly quite large) size correctly. From 9136f661c7277777a2f85a7e98438f4fe6472fdc Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Thu, 20 Apr 2023 15:27:49 +0100 Subject: [PATCH 30/31] hw/pci-bridge: pci_expander_bridge fix type in pxb_cxl_dev_reset() Reproduce issue with configure --enable-qom-cast-debug ... qemu-system-x86_64 -display none -machine q35,cxl=on -device pxb-cxl,bus=pcie.0 hw/pci-bridge/pci_expander_bridge.c:54:PXB_DEV: Object 0x5570e0b1ada0 is not an instance of type pxb Aborted The type conversion results in the right state structure, but PXB_DEV is not a parent of PXB_CXL_DEV hence the error. Rather than directly cleaning up the inheritance, this is the minimal fix which will be followed by the cleanup. Fixes: 154070eaf6 ("hw/pxb-cxl: Support passthrough HDM Decoders unless overridden") Reported-by: Peter Maydell Signed-off-by: Jonathan Cameron Message-Id: <20230420142750.6950-2-Jonathan.Cameron@huawei.com> Reviewed-by: Thomas Huth Cc: qemu-stable@nongnu.org Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-bridge/pci_expander_bridge.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c index ead33f0c05..a78327b5f2 100644 --- a/hw/pci-bridge/pci_expander_bridge.c +++ b/hw/pci-bridge/pci_expander_bridge.c @@ -311,7 +311,7 @@ static void pxb_cxl_dev_reset(DeviceState *dev) * The CXL specification allows for host bridges with no HDM decoders * if they only have a single root port. */ - if (!PXB_DEV(dev)->hdm_for_passthrough) { + if (!PXB_CXL_DEV(dev)->hdm_for_passthrough) { dsp_count = pcie_count_ds_ports(hb->bus); } /* Initial reset will have 0 dsp so wait until > 0 */ From c28db9e0002df2abf88283b41dce0be17e8b0888 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Thu, 20 Apr 2023 15:27:50 +0100 Subject: [PATCH 31/31] hw/pci-bridge: Make PCIe and CXL PXB Devices inherit from TYPE_PXB_DEV Previously, PXB_CXL_DEVICE, PXB_PCIE_DEVICE and PXB_DEVICE all have PCI_DEVICE as their direct parent but share a common state struct PXBDev. convert_to_pxb() is used to get the PXBDev instance from which ever of these types it is called on. This patch switches to an explicit hierarchy based on shared functionality. To allow use of OBJECT_DECLARE_SIMPLE_TYPE() whilst minimizing code changes, all types are renamed to have the postfix _DEV rather than _DEVICE. The new heirarchy has PXB_CXL_DEV with parent PXB_PCIE_DEV which in turn has parent PXB_DEV which continues to have parent PCI_DEVICE. This allows simple use of PXB_DEV() etc rather than a custom function + removal of duplicated properties and moving the CXL specific elements out of struct PXBDev. Signed-off-by: Jonathan Cameron Message-Id: <20230420142750.6950-3-Jonathan.Cameron@huawei.com> Reviewed-by: Thomas Huth Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/cxl.c | 11 +++--- hw/cxl/cxl-host.c | 4 +- hw/pci-bridge/pci_expander_bridge.c | 59 ++++++++++------------------- include/hw/cxl/cxl.h | 4 +- include/hw/pci/pci_bridge.h | 30 ++++++++++----- 5 files changed, 50 insertions(+), 58 deletions(-) diff --git a/hw/acpi/cxl.c b/hw/acpi/cxl.c index 2bf8c07993..92b46bc932 100644 --- a/hw/acpi/cxl.c +++ b/hw/acpi/cxl.c @@ -30,9 +30,10 @@ #include "qapi/error.h" #include "qemu/uuid.h" -static void cedt_build_chbs(GArray *table_data, PXBDev *cxl) +static void cedt_build_chbs(GArray *table_data, PXBCXLDev *cxl) { - SysBusDevice *sbd = SYS_BUS_DEVICE(cxl->cxl.cxl_host_bridge); + PXBDev *pxb = PXB_DEV(cxl); + SysBusDevice *sbd = SYS_BUS_DEVICE(cxl->cxl_host_bridge); struct MemoryRegion *mr = sbd->mmio[0].memory; /* Type */ @@ -45,7 +46,7 @@ static void cedt_build_chbs(GArray *table_data, PXBDev *cxl) build_append_int_noprefix(table_data, 32, 2); /* UID - currently equal to bus number */ - build_append_int_noprefix(table_data, cxl->bus_nr, 4); + build_append_int_noprefix(table_data, pxb->bus_nr, 4); /* Version */ build_append_int_noprefix(table_data, 1, 4); @@ -112,7 +113,7 @@ static void cedt_build_cfmws(GArray *table_data, CXLState *cxls) /* Host Bridge List (list of UIDs - currently bus_nr) */ for (i = 0; i < fw->num_targets; i++) { g_assert(fw->target_hbs[i]); - build_append_int_noprefix(table_data, fw->target_hbs[i]->bus_nr, 4); + build_append_int_noprefix(table_data, PXB_DEV(fw->target_hbs[i])->bus_nr, 4); } } } @@ -121,7 +122,7 @@ static int cxl_foreach_pxb_hb(Object *obj, void *opaque) { Aml *cedt = opaque; - if (object_dynamic_cast(obj, TYPE_PXB_CXL_DEVICE)) { + if (object_dynamic_cast(obj, TYPE_PXB_CXL_DEV)) { cedt_build_chbs(cedt->buf, PXB_CXL_DEV(obj)); } diff --git a/hw/cxl/cxl-host.c b/hw/cxl/cxl-host.c index 6e923ceeaf..034c7805b3 100644 --- a/hw/cxl/cxl-host.c +++ b/hw/cxl/cxl-host.c @@ -84,7 +84,7 @@ void cxl_fmws_link_targets(CXLState *cxl_state, Error **errp) bool ambig; o = object_resolve_path_type(fw->targets[i], - TYPE_PXB_CXL_DEVICE, + TYPE_PXB_CXL_DEV, &ambig); if (!o) { error_setg(errp, "Could not resolve CXLFM target %s", @@ -141,7 +141,7 @@ static PCIDevice *cxl_cfmws_find_device(CXLFixedWindow *fw, hwaddr addr) addr += fw->base; rb_index = (addr / cxl_decode_ig(fw->enc_int_gran)) % fw->num_targets; - hb = PCI_HOST_BRIDGE(fw->target_hbs[rb_index]->cxl.cxl_host_bridge); + hb = PCI_HOST_BRIDGE(fw->target_hbs[rb_index]->cxl_host_bridge); if (!hb || !hb->bus || !pci_bus_is_cxl(hb->bus)) { return NULL; } diff --git a/hw/pci-bridge/pci_expander_bridge.c b/hw/pci-bridge/pci_expander_bridge.c index a78327b5f2..613857b601 100644 --- a/hw/pci-bridge/pci_expander_bridge.c +++ b/hw/pci-bridge/pci_expander_bridge.c @@ -50,24 +50,8 @@ struct PXBBus { char bus_path[8]; }; -#define TYPE_PXB_DEVICE "pxb" -DECLARE_INSTANCE_CHECKER(PXBDev, PXB_DEV, - TYPE_PXB_DEVICE) - -#define TYPE_PXB_PCIE_DEVICE "pxb-pcie" -DECLARE_INSTANCE_CHECKER(PXBDev, PXB_PCIE_DEV, - TYPE_PXB_PCIE_DEVICE) - -static PXBDev *convert_to_pxb(PCIDevice *dev) -{ - /* A CXL PXB's parent bus is PCIe, so the normal check won't work */ - if (object_dynamic_cast(OBJECT(dev), TYPE_PXB_CXL_DEVICE)) { - return PXB_CXL_DEV(dev); - } - - return pci_bus_is_express(pci_get_bus(dev)) - ? PXB_PCIE_DEV(dev) : PXB_DEV(dev); -} +#define TYPE_PXB_PCIE_DEV "pxb-pcie" +OBJECT_DECLARE_SIMPLE_TYPE(PXBPCIEDev, PXB_PCIE_DEV) static GList *pxb_dev_list; @@ -89,14 +73,14 @@ bool cxl_get_hb_passthrough(PCIHostState *hb) static int pxb_bus_num(PCIBus *bus) { - PXBDev *pxb = convert_to_pxb(bus->parent_dev); + PXBDev *pxb = PXB_DEV(bus->parent_dev); return pxb->bus_nr; } static uint16_t pxb_bus_numa_node(PCIBus *bus) { - PXBDev *pxb = convert_to_pxb(bus->parent_dev); + PXBDev *pxb = PXB_DEV(bus->parent_dev); return pxb->numa_node; } @@ -154,7 +138,7 @@ static char *pxb_host_ofw_unit_address(const SysBusDevice *dev) pxb_host = PCI_HOST_BRIDGE(dev); pxb_bus = pxb_host->bus; - pxb_dev = convert_to_pxb(pxb_bus->parent_dev); + pxb_dev = PXB_DEV(pxb_bus->parent_dev); position = g_list_index(pxb_dev_list, pxb_dev); assert(position >= 0); @@ -212,8 +196,8 @@ static void pxb_cxl_realize(DeviceState *dev, Error **errp) */ void pxb_cxl_hook_up_registers(CXLState *cxl_state, PCIBus *bus, Error **errp) { - PXBDev *pxb = PXB_CXL_DEV(pci_bridge_get_device(bus)); - CXLHost *cxl = pxb->cxl.cxl_host_bridge; + PXBCXLDev *pxb = PXB_CXL_DEV(pci_bridge_get_device(bus)); + CXLHost *cxl = pxb->cxl_host_bridge; CXLComponentState *cxl_cstate = &cxl->cxl_cstate; struct MemoryRegion *mr = &cxl_cstate->crb.component_registers; hwaddr offset; @@ -299,7 +283,7 @@ static int pxb_map_irq_fn(PCIDevice *pci_dev, int pin) static void pxb_cxl_dev_reset(DeviceState *dev) { - CXLHost *cxl = PXB_CXL_DEV(dev)->cxl.cxl_host_bridge; + CXLHost *cxl = PXB_CXL_DEV(dev)->cxl_host_bridge; CXLComponentState *cxl_cstate = &cxl->cxl_cstate; PCIHostState *hb = PCI_HOST_BRIDGE(cxl); uint32_t *reg_state = cxl_cstate->crb.cache_mem_registers; @@ -337,7 +321,7 @@ static gint pxb_compare(gconstpointer a, gconstpointer b) static void pxb_dev_realize_common(PCIDevice *dev, enum BusType type, Error **errp) { - PXBDev *pxb = convert_to_pxb(dev); + PXBDev *pxb = PXB_DEV(dev); DeviceState *ds, *bds = NULL; PCIBus *bus; const char *dev_name = NULL; @@ -365,7 +349,7 @@ static void pxb_dev_realize_common(PCIDevice *dev, enum BusType type, } else if (type == CXL) { bus = pci_root_bus_new(ds, dev_name, NULL, NULL, 0, TYPE_PXB_CXL_BUS); bus->flags |= PCI_BUS_CXL; - PXB_CXL_DEV(dev)->cxl.cxl_host_bridge = PXB_CXL_HOST(ds); + PXB_CXL_DEV(dev)->cxl_host_bridge = PXB_CXL_HOST(ds); } else { bus = pci_root_bus_new(ds, "pxb-internal", NULL, NULL, 0, TYPE_PXB_BUS); bds = qdev_new("pci-bridge"); @@ -418,7 +402,7 @@ static void pxb_dev_realize(PCIDevice *dev, Error **errp) static void pxb_dev_exitfn(PCIDevice *pci_dev) { - PXBDev *pxb = convert_to_pxb(pci_dev); + PXBDev *pxb = PXB_DEV(pci_dev); pxb_dev_list = g_list_remove(pxb_dev_list, pxb); } @@ -449,7 +433,7 @@ static void pxb_dev_class_init(ObjectClass *klass, void *data) } static const TypeInfo pxb_dev_info = { - .name = TYPE_PXB_DEVICE, + .name = TYPE_PXB_DEV, .parent = TYPE_PCI_DEVICE, .instance_size = sizeof(PXBDev), .class_init = pxb_dev_class_init, @@ -481,15 +465,14 @@ static void pxb_pcie_dev_class_init(ObjectClass *klass, void *data) k->class_id = PCI_CLASS_BRIDGE_HOST; dc->desc = "PCI Express Expander Bridge"; - device_class_set_props(dc, pxb_dev_properties); dc->hotpluggable = false; set_bit(DEVICE_CATEGORY_BRIDGE, dc->categories); } static const TypeInfo pxb_pcie_dev_info = { - .name = TYPE_PXB_PCIE_DEVICE, - .parent = TYPE_PCI_DEVICE, - .instance_size = sizeof(PXBDev), + .name = TYPE_PXB_PCIE_DEV, + .parent = TYPE_PXB_DEV, + .instance_size = sizeof(PXBPCIEDev), .class_init = pxb_pcie_dev_class_init, .interfaces = (InterfaceInfo[]) { { INTERFACE_CONVENTIONAL_PCI_DEVICE }, @@ -510,11 +493,7 @@ static void pxb_cxl_dev_realize(PCIDevice *dev, Error **errp) } static Property pxb_cxl_dev_properties[] = { - /* Note: 0 is not a legal PXB bus number. */ - DEFINE_PROP_UINT8("bus_nr", PXBDev, bus_nr, 0), - DEFINE_PROP_UINT16("numa_node", PXBDev, numa_node, NUMA_NODE_UNASSIGNED), - DEFINE_PROP_BOOL("bypass_iommu", PXBDev, bypass_iommu, false), - DEFINE_PROP_BOOL("hdm_for_passthrough", PXBDev, hdm_for_passthrough, false), + DEFINE_PROP_BOOL("hdm_for_passthrough", PXBCXLDev, hdm_for_passthrough, false), DEFINE_PROP_END_OF_LIST(), }; @@ -540,9 +519,9 @@ static void pxb_cxl_dev_class_init(ObjectClass *klass, void *data) } static const TypeInfo pxb_cxl_dev_info = { - .name = TYPE_PXB_CXL_DEVICE, - .parent = TYPE_PCI_DEVICE, - .instance_size = sizeof(PXBDev), + .name = TYPE_PXB_CXL_DEV, + .parent = TYPE_PXB_PCIE_DEV, + .instance_size = sizeof(PXBCXLDev), .class_init = pxb_cxl_dev_class_init, .interfaces = (InterfaceInfo[]){ diff --git a/include/hw/cxl/cxl.h b/include/hw/cxl/cxl.h index b2cffbb364..c453983e83 100644 --- a/include/hw/cxl/cxl.h +++ b/include/hw/cxl/cxl.h @@ -23,12 +23,12 @@ #define CXL_WINDOW_MAX 10 -typedef struct PXBDev PXBDev; +typedef struct PXBCXLDev PXBCXLDev; typedef struct CXLFixedWindow { uint64_t size; char **targets; - PXBDev *target_hbs[8]; + PXBCXLDev *target_hbs[8]; uint8_t num_targets; uint8_t enc_int_ways; uint8_t enc_int_gran; diff --git a/include/hw/pci/pci_bridge.h b/include/hw/pci/pci_bridge.h index 1677176b2a..01670e9e65 100644 --- a/include/hw/pci/pci_bridge.h +++ b/include/hw/pci/pci_bridge.h @@ -84,7 +84,7 @@ struct PCIBridge { #define PCI_BRIDGE_DEV_PROP_SHPC "shpc" typedef struct CXLHost CXLHost; -struct PXBDev { +typedef struct PXBDev { /*< private >*/ PCIDevice parent_obj; /*< public >*/ @@ -92,15 +92,27 @@ struct PXBDev { uint8_t bus_nr; uint16_t numa_node; bool bypass_iommu; - bool hdm_for_passthrough; - struct cxl_dev { - CXLHost *cxl_host_bridge; /* Pointer to a CXLHost */ - } cxl; -}; +} PXBDev; -#define TYPE_PXB_CXL_DEVICE "pxb-cxl" -DECLARE_INSTANCE_CHECKER(PXBDev, PXB_CXL_DEV, - TYPE_PXB_CXL_DEVICE) +typedef struct PXBPCIEDev { + /*< private >*/ + PXBDev parent_obj; +} PXBPCIEDev; + +#define TYPE_PXB_DEV "pxb" +OBJECT_DECLARE_SIMPLE_TYPE(PXBDev, PXB_DEV) + +typedef struct PXBCXLDev { + /*< private >*/ + PXBPCIEDev parent_obj; + /*< public >*/ + + bool hdm_for_passthrough; + CXLHost *cxl_host_bridge; /* Pointer to a CXLHost */ +} PXBCXLDev; + +#define TYPE_PXB_CXL_DEV "pxb-cxl" +OBJECT_DECLARE_SIMPLE_TYPE(PXBCXLDev, PXB_CXL_DEV) int pci_bridge_ssvid_init(PCIDevice *dev, uint8_t offset, uint16_t svid, uint16_t ssid,