From 2b5de4d7dff457d34b57fcf3f985a41ddc0db062 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Fri, 26 May 2023 17:37:36 +0200 Subject: [PATCH 01/66] vdpa: Remove status in reset tracing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is always 0 and it is not useful to route call through file descriptor. Reviewed-by: Stefano Garzarella Acked-by: Jason Wang Signed-off-by: Eugenio Pérez Message-Id: <20230526153736.472443-1-eperezma@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/trace-events | 2 +- hw/virtio/vhost-vdpa.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index 8f8d05cf9b..6265231683 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -44,7 +44,7 @@ vhost_vdpa_set_mem_table(void *dev, uint32_t nregions, uint32_t padding) "dev: % vhost_vdpa_dump_regions(void *dev, int i, uint64_t guest_phys_addr, uint64_t memory_size, uint64_t userspace_addr, uint64_t flags_padding) "dev: %p %d: guest_phys_addr: 0x%"PRIx64" memory_size: 0x%"PRIx64" userspace_addr: 0x%"PRIx64" flags_padding: 0x%"PRIx64 vhost_vdpa_set_features(void *dev, uint64_t features) "dev: %p features: 0x%"PRIx64 vhost_vdpa_get_device_id(void *dev, uint32_t device_id) "dev: %p device_id %"PRIu32 -vhost_vdpa_reset_device(void *dev, uint8_t status) "dev: %p status: 0x%"PRIx8 +vhost_vdpa_reset_device(void *dev) "dev: %p" vhost_vdpa_get_vq_index(void *dev, int idx, int vq_idx) "dev: %p idx: %d vq idx: %d" vhost_vdpa_set_vring_ready(void *dev) "dev: %p" vhost_vdpa_dump_config(void *dev, const char *line) "dev: %p %s" diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 3c575a9a6e..6246e5b9bf 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -859,7 +859,7 @@ static int vhost_vdpa_reset_device(struct vhost_dev *dev) uint8_t status = 0; ret = vhost_vdpa_call(dev, VHOST_VDPA_SET_STATUS, &status); - trace_vhost_vdpa_reset_device(dev, status); + trace_vhost_vdpa_reset_device(dev); v->suspended = false; return ret; } From ee071f67f7a103c66f85f68ffe083712929122e3 Mon Sep 17 00:00:00 2001 From: Viktor Prutyanov Date: Mon, 26 Jun 2023 12:12:57 +0300 Subject: [PATCH 02/66] vhost: register and change IOMMU flag depending on Device-TLB state The guest can disable or never enable Device-TLB. In these cases, it can't be used even if enabled in QEMU. So, check Device-TLB state before registering IOMMU notifier and select unmap flag depending on that. Also, implement a way to change IOMMU notifier flag if Device-TLB state is changed. Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2001312 Signed-off-by: Viktor Prutyanov Acked-by: Jason Wang Message-Id: <20230626091258.24453-2-viktor@daynix.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-stub.c | 4 ++++ hw/virtio/vhost.c | 38 ++++++++++++++++++++++++++------------ include/hw/virtio/vhost.h | 1 + 3 files changed, 31 insertions(+), 12 deletions(-) diff --git a/hw/virtio/vhost-stub.c b/hw/virtio/vhost-stub.c index c175148fce..aa858ef3fb 100644 --- a/hw/virtio/vhost-stub.c +++ b/hw/virtio/vhost-stub.c @@ -15,3 +15,7 @@ bool vhost_user_init(VhostUserState *user, CharBackend *chr, Error **errp) void vhost_user_cleanup(VhostUserState *user) { } + +void vhost_toggle_device_iotlb(VirtIODevice *vdev) +{ +} diff --git a/hw/virtio/vhost.c b/hw/virtio/vhost.c index 82394331bf..abf0d03c8d 100644 --- a/hw/virtio/vhost.c +++ b/hw/virtio/vhost.c @@ -780,7 +780,6 @@ static void vhost_iommu_region_add(MemoryListener *listener, Int128 end; int iommu_idx; IOMMUMemoryRegion *iommu_mr; - int ret; if (!memory_region_is_iommu(section->mr)) { return; @@ -795,7 +794,9 @@ static void vhost_iommu_region_add(MemoryListener *listener, iommu_idx = memory_region_iommu_attrs_to_index(iommu_mr, MEMTXATTRS_UNSPECIFIED); iommu_notifier_init(&iommu->n, vhost_iommu_unmap_notify, - IOMMU_NOTIFIER_DEVIOTLB_UNMAP, + dev->vdev->device_iotlb_enabled ? + IOMMU_NOTIFIER_DEVIOTLB_UNMAP : + IOMMU_NOTIFIER_UNMAP, section->offset_within_region, int128_get64(end), iommu_idx); @@ -803,16 +804,8 @@ static void vhost_iommu_region_add(MemoryListener *listener, iommu->iommu_offset = section->offset_within_address_space - section->offset_within_region; iommu->hdev = dev; - ret = memory_region_register_iommu_notifier(section->mr, &iommu->n, NULL); - if (ret) { - /* - * Some vIOMMUs do not support dev-iotlb yet. If so, try to use the - * UNMAP legacy message - */ - iommu->n.notifier_flags = IOMMU_NOTIFIER_UNMAP; - memory_region_register_iommu_notifier(section->mr, &iommu->n, - &error_fatal); - } + memory_region_register_iommu_notifier(section->mr, &iommu->n, + &error_fatal); QLIST_INSERT_HEAD(&dev->iommu_list, iommu, iommu_next); /* TODO: can replay help performance here? */ } @@ -840,6 +833,27 @@ static void vhost_iommu_region_del(MemoryListener *listener, } } +void vhost_toggle_device_iotlb(VirtIODevice *vdev) +{ + VirtioDeviceClass *vdc = VIRTIO_DEVICE_GET_CLASS(vdev); + struct vhost_dev *dev; + struct vhost_iommu *iommu; + + if (vdev->vhost_started) { + dev = vdc->get_vhost(vdev); + } else { + return; + } + + QLIST_FOREACH(iommu, &dev->iommu_list, iommu_next) { + memory_region_unregister_iommu_notifier(iommu->mr, &iommu->n); + iommu->n.notifier_flags = vdev->device_iotlb_enabled ? + IOMMU_NOTIFIER_DEVIOTLB_UNMAP : IOMMU_NOTIFIER_UNMAP; + memory_region_register_iommu_notifier(iommu->mr, &iommu->n, + &error_fatal); + } +} + static int vhost_virtqueue_set_addr(struct vhost_dev *dev, struct vhost_virtqueue *vq, unsigned idx, bool enable_log) diff --git a/include/hw/virtio/vhost.h b/include/hw/virtio/vhost.h index f7f10c8fb7..6a173cb9fa 100644 --- a/include/hw/virtio/vhost.h +++ b/include/hw/virtio/vhost.h @@ -320,6 +320,7 @@ bool vhost_has_free_slot(void); int vhost_net_set_backend(struct vhost_dev *hdev, struct vhost_vring_file *file); +void vhost_toggle_device_iotlb(VirtIODevice *vdev); int vhost_device_iotlb_miss(struct vhost_dev *dev, uint64_t iova, int write); int vhost_virtqueue_start(struct vhost_dev *dev, struct VirtIODevice *vdev, From cd9b8346884353ba9ae6560b44b7cccdf00a6633 Mon Sep 17 00:00:00 2001 From: Viktor Prutyanov Date: Mon, 26 Jun 2023 12:12:58 +0300 Subject: [PATCH 03/66] virtio-net: pass Device-TLB enable/disable events to vhost If vhost is enabled for virtio-net, Device-TLB enable/disable events must be passed to vhost for proper IOMMU unmap flag selection. Signed-off-by: Viktor Prutyanov Acked-by: Jason Wang Message-Id: <20230626091258.24453-3-viktor@daynix.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/net/virtio-net.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index 04783f5b94..d51067123b 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -3951,6 +3951,7 @@ static void virtio_net_class_init(ObjectClass *klass, void *data) vdc->vmsd = &vmstate_virtio_net_device; vdc->primary_unplug_pending = primary_unplug_pending; vdc->get_vhost = virtio_net_get_vhost; + vdc->toggle_device_iotlb = vhost_toggle_device_iotlb; } static const TypeInfo virtio_net_info = { From ee3729d9b088a1988cd6b0a7738fad644fbbc415 Mon Sep 17 00:00:00 2001 From: Erico Nunes Date: Mon, 26 Jun 2023 18:47:05 +0200 Subject: [PATCH 04/66] virtio-gpu: refactor generate_edid function to virtio_gpu_base MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This functionality can be shared with upcoming use in vhost-user-gpu, so move it to the shared file to avoid duplicating it. Signed-off-by: Erico Nunes Reviewed-by: Marc-André Lureau Message-Id: <20230626164708.1163239-2-ernunes@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/display/virtio-gpu-base.c | 17 +++++++++++++++++ hw/display/virtio-gpu.c | 20 +------------------- include/hw/virtio/virtio-gpu.h | 2 ++ 3 files changed, 20 insertions(+), 19 deletions(-) diff --git a/hw/display/virtio-gpu-base.c b/hw/display/virtio-gpu-base.c index a29f191aa8..7ab7d08d0a 100644 --- a/hw/display/virtio-gpu-base.c +++ b/hw/display/virtio-gpu-base.c @@ -17,6 +17,7 @@ #include "migration/blocker.h" #include "qapi/error.h" #include "qemu/error-report.h" +#include "hw/display/edid.h" #include "trace.h" void @@ -51,6 +52,22 @@ virtio_gpu_base_fill_display_info(VirtIOGPUBase *g, } } +void +virtio_gpu_base_generate_edid(VirtIOGPUBase *g, int scanout, + struct virtio_gpu_resp_edid *edid) +{ + qemu_edid_info info = { + .width_mm = g->req_state[scanout].width_mm, + .height_mm = g->req_state[scanout].height_mm, + .prefx = g->req_state[scanout].width, + .prefy = g->req_state[scanout].height, + .refresh_rate = g->req_state[scanout].refresh_rate, + }; + + edid->size = cpu_to_le32(sizeof(edid->edid)); + qemu_edid_generate(edid->edid, sizeof(edid->edid), &info); +} + static void virtio_gpu_invalidate_display(void *opaque) { } diff --git a/hw/display/virtio-gpu.c b/hw/display/virtio-gpu.c index 347e17d490..befa7d6d78 100644 --- a/hw/display/virtio-gpu.c +++ b/hw/display/virtio-gpu.c @@ -24,7 +24,6 @@ #include "hw/virtio/virtio-gpu-bswap.h" #include "hw/virtio/virtio-gpu-pixman.h" #include "hw/virtio/virtio-bus.h" -#include "hw/display/edid.h" #include "hw/qdev-properties.h" #include "qemu/log.h" #include "qemu/module.h" @@ -207,23 +206,6 @@ void virtio_gpu_get_display_info(VirtIOGPU *g, sizeof(display_info)); } -static void -virtio_gpu_generate_edid(VirtIOGPU *g, int scanout, - struct virtio_gpu_resp_edid *edid) -{ - VirtIOGPUBase *b = VIRTIO_GPU_BASE(g); - qemu_edid_info info = { - .width_mm = b->req_state[scanout].width_mm, - .height_mm = b->req_state[scanout].height_mm, - .prefx = b->req_state[scanout].width, - .prefy = b->req_state[scanout].height, - .refresh_rate = b->req_state[scanout].refresh_rate, - }; - - edid->size = cpu_to_le32(sizeof(edid->edid)); - qemu_edid_generate(edid->edid, sizeof(edid->edid), &info); -} - void virtio_gpu_get_edid(VirtIOGPU *g, struct virtio_gpu_ctrl_command *cmd) { @@ -242,7 +224,7 @@ void virtio_gpu_get_edid(VirtIOGPU *g, trace_virtio_gpu_cmd_get_edid(get_edid.scanout); memset(&edid, 0, sizeof(edid)); edid.hdr.type = VIRTIO_GPU_RESP_OK_EDID; - virtio_gpu_generate_edid(g, get_edid.scanout, &edid); + virtio_gpu_base_generate_edid(VIRTIO_GPU_BASE(g), get_edid.scanout, &edid); virtio_gpu_ctrl_response(g, cmd, &edid.hdr, sizeof(edid)); } diff --git a/include/hw/virtio/virtio-gpu.h b/include/hw/virtio/virtio-gpu.h index 7a5f8056ea..7ea8ae2bee 100644 --- a/include/hw/virtio/virtio-gpu.h +++ b/include/hw/virtio/virtio-gpu.h @@ -242,6 +242,8 @@ void virtio_gpu_base_reset(VirtIOGPUBase *g); void virtio_gpu_base_fill_display_info(VirtIOGPUBase *g, struct virtio_gpu_resp_display_info *dpy_info); +void virtio_gpu_base_generate_edid(VirtIOGPUBase *g, int scanout, + struct virtio_gpu_resp_edid *edid); /* virtio-gpu.c */ void virtio_gpu_ctrl_response(VirtIOGPU *g, struct virtio_gpu_ctrl_command *cmd, From 50cbd5b4b37de3f474495c5fd6a66d5c0e8e49c2 Mon Sep 17 00:00:00 2001 From: Erico Nunes Date: Mon, 26 Jun 2023 18:47:06 +0200 Subject: [PATCH 05/66] docs: vhost-user-gpu: add protocol changes for EDID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VHOST_USER_GPU_GET_EDID is defined as a message from the backend to the frontend to retrieve the EDID data for a given scanout. The VHOST_USER_GPU_PROTOCOL_F_EDID protocol feature is defined as a way to check whether this new message is supported or not. Signed-off-by: Erico Nunes Reviewed-by: Marc-André Lureau Message-Id: <20230626164708.1163239-3-ernunes@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- docs/interop/vhost-user-gpu.rst | 28 +++++++++++++++++++++++++--- 1 file changed, 25 insertions(+), 3 deletions(-) diff --git a/docs/interop/vhost-user-gpu.rst b/docs/interop/vhost-user-gpu.rst index 1640553729..b78806892d 100644 --- a/docs/interop/vhost-user-gpu.rst +++ b/docs/interop/vhost-user-gpu.rst @@ -124,6 +124,16 @@ VhostUserGpuDMABUFScanout :fourcc: ``i32``, the DMABUF fourcc +VhostUserGpuEdidRequest +^^^^^^^^^^^^^^^^^^^^^^^ + ++------------+ +| scanout-id | ++------------+ + +:scanout-id: ``u32``, the scanout to get edid from + + C structure ----------- @@ -141,6 +151,8 @@ In QEMU the vhost-user-gpu message is implemented with the following struct: VhostUserGpuScanout scanout; VhostUserGpuUpdate update; VhostUserGpuDMABUFScanout dmabuf_scanout; + VhostUserGpuEdidRequest edid_req; + struct virtio_gpu_resp_edid resp_edid; struct virtio_gpu_resp_display_info display_info; uint64_t u64; } payload; @@ -149,10 +161,11 @@ In QEMU the vhost-user-gpu message is implemented with the following struct: Protocol features ----------------- -None yet. +.. code:: c -As the protocol may need to evolve, new messages and communication -changes are negotiated thanks to preliminary + #define VHOST_USER_GPU_PROTOCOL_F_EDID 0 + +New messages and communication changes are negotiated thanks to the ``VHOST_USER_GPU_GET_PROTOCOL_FEATURES`` and ``VHOST_USER_GPU_SET_PROTOCOL_FEATURES`` requests. @@ -241,3 +254,12 @@ Message types Note: there is no data payload, since the scanout is shared thanks to DMABUF, that must have been set previously with ``VHOST_USER_GPU_DMABUF_SCANOUT``. + +``VHOST_USER_GPU_GET_EDID`` + :id: 11 + :request payload: ``struct VhostUserGpuEdidRequest`` + :reply payload: ``struct virtio_gpu_resp_edid`` (from virtio specification) + + Retrieve the EDID data for a given scanout. + This message requires the ``VHOST_USER_GPU_PROTOCOL_F_EDID`` protocol + feature to be supported. From c06444261e20f5c74962db08638b021e6ca2ad43 Mon Sep 17 00:00:00 2001 From: Erico Nunes Date: Mon, 26 Jun 2023 18:47:07 +0200 Subject: [PATCH 06/66] contrib/vhost-user-gpu: implement get_edid feature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement the virtio-gpu feature in contrib/vhost-user-gpu, which was unsupported until now. In this implementation, the feature is enabled inconditionally to avoid creating another optional config argument. Similarly to get_display_info, vhost-user-gpu sends a message back to the frontend to have access to all the display information. In the case of get_edid, it also needs to pass which scanout we should retrieve the edid for. The VHOST_USER_GPU_PROTOCOL_F_EDID protocol feature is required if the frontend sets the VIRTIO_GPU_F_EDID virtio-gpu feature. If the frontend sets the virtio-gpu feature but does not support the protocol feature, the backend will abort with an error. Signed-off-by: Erico Nunes Reviewed-by: Marc-André Lureau Message-Id: <20230626164708.1163239-4-ernunes@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- contrib/vhost-user-gpu/vhost-user-gpu.c | 73 +++++++++++++++++++++++-- contrib/vhost-user-gpu/virgl.c | 3 + contrib/vhost-user-gpu/vugpu.h | 11 ++++ 3 files changed, 81 insertions(+), 6 deletions(-) diff --git a/contrib/vhost-user-gpu/vhost-user-gpu.c b/contrib/vhost-user-gpu/vhost-user-gpu.c index bfb8d93cf8..2e7815a7a3 100644 --- a/contrib/vhost-user-gpu/vhost-user-gpu.c +++ b/contrib/vhost-user-gpu/vhost-user-gpu.c @@ -303,6 +303,53 @@ vg_get_display_info(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd) cmd->state = VG_CMD_STATE_PENDING; } +static gboolean +get_edid_cb(gint fd, GIOCondition condition, gpointer user_data) +{ + struct virtio_gpu_resp_edid resp_edid; + VuGpu *vg = user_data; + struct virtio_gpu_ctrl_command *cmd = QTAILQ_LAST(&vg->fenceq); + + g_debug("get edid cb"); + assert(cmd->cmd_hdr.type == VIRTIO_GPU_CMD_GET_EDID); + if (!vg_recv_msg(vg, VHOST_USER_GPU_GET_EDID, + sizeof(resp_edid), &resp_edid)) { + return G_SOURCE_CONTINUE; + } + + QTAILQ_REMOVE(&vg->fenceq, cmd, next); + vg_ctrl_response(vg, cmd, &resp_edid.hdr, sizeof(resp_edid)); + + vg->wait_in = 0; + vg_handle_ctrl(&vg->dev.parent, 0); + + return G_SOURCE_REMOVE; +} + +void +vg_get_edid(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd) +{ + struct virtio_gpu_cmd_get_edid get_edid; + + VUGPU_FILL_CMD(get_edid); + virtio_gpu_bswap_32(&get_edid, sizeof(get_edid)); + + VhostUserGpuMsg msg = { + .request = VHOST_USER_GPU_GET_EDID, + .size = sizeof(VhostUserGpuEdidRequest), + .payload.edid_req = { + .scanout_id = get_edid.scanout, + }, + }; + + assert(vg->wait_in == 0); + + vg_send_msg(vg, &msg, -1); + vg->wait_in = g_unix_fd_add(vg->sock_fd, G_IO_IN | G_IO_HUP, + get_edid_cb, vg); + cmd->state = VG_CMD_STATE_PENDING; +} + static void vg_resource_create_2d(VuGpu *g, struct virtio_gpu_ctrl_command *cmd) @@ -837,8 +884,9 @@ vg_process_cmd(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd) case VIRTIO_GPU_CMD_RESOURCE_DETACH_BACKING: vg_resource_detach_backing(vg, cmd); break; - /* case VIRTIO_GPU_CMD_GET_EDID: */ - /* break */ + case VIRTIO_GPU_CMD_GET_EDID: + vg_get_edid(vg, cmd); + break; default: g_warning("TODO handle ctrl %x\n", cmd->cmd_hdr.type); cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; @@ -1022,26 +1070,36 @@ vg_queue_set_started(VuDev *dev, int qidx, bool started) static gboolean protocol_features_cb(gint fd, GIOCondition condition, gpointer user_data) { + const uint64_t protocol_edid = (1 << VHOST_USER_GPU_PROTOCOL_F_EDID); VuGpu *g = user_data; - uint64_t u64; + uint64_t protocol_features; VhostUserGpuMsg msg = { .request = VHOST_USER_GPU_GET_PROTOCOL_FEATURES }; - if (!vg_recv_msg(g, msg.request, sizeof(u64), &u64)) { + if (!vg_recv_msg(g, msg.request, + sizeof(protocol_features), &protocol_features)) { return G_SOURCE_CONTINUE; } + protocol_features &= protocol_edid; + msg = (VhostUserGpuMsg) { .request = VHOST_USER_GPU_SET_PROTOCOL_FEATURES, .size = sizeof(uint64_t), - .payload.u64 = 0 + .payload.u64 = protocol_features, }; vg_send_msg(g, &msg, -1); g->wait_in = 0; vg_handle_ctrl(&g->dev.parent, 0); + if (g->edid_inited && !(protocol_features & protocol_edid)) { + g_printerr("EDID feature set by the frontend but it does not support " + "the EDID vhost-user-gpu protocol.\n"); + exit(EXIT_FAILURE); + } + return G_SOURCE_REMOVE; } @@ -1049,7 +1107,7 @@ static void set_gpu_protocol_features(VuGpu *g) { VhostUserGpuMsg msg = { - .request = VHOST_USER_GPU_GET_PROTOCOL_FEATURES + .request = VHOST_USER_GPU_GET_PROTOCOL_FEATURES, }; vg_send_msg(g, &msg, -1); @@ -1086,6 +1144,7 @@ vg_get_features(VuDev *dev) if (opt_virgl) { features |= 1 << VIRTIO_GPU_F_VIRGL; } + features |= 1 << VIRTIO_GPU_F_EDID; return features; } @@ -1103,6 +1162,8 @@ vg_set_features(VuDev *dev, uint64_t features) g->virgl_inited = true; } + g->edid_inited = !!(features & (1 << VIRTIO_GPU_F_EDID)); + g->virgl = virgl; } diff --git a/contrib/vhost-user-gpu/virgl.c b/contrib/vhost-user-gpu/virgl.c index 3e45e1bd33..211aa110a9 100644 --- a/contrib/vhost-user-gpu/virgl.c +++ b/contrib/vhost-user-gpu/virgl.c @@ -495,6 +495,9 @@ void vg_virgl_process_cmd(VuGpu *g, struct virtio_gpu_ctrl_command *cmd) case VIRTIO_GPU_CMD_GET_DISPLAY_INFO: vg_get_display_info(g, cmd); break; + case VIRTIO_GPU_CMD_GET_EDID: + vg_get_edid(g, cmd); + break; default: g_debug("TODO handle ctrl %x\n", cmd->cmd_hdr.type); cmd->error = VIRTIO_GPU_RESP_ERR_UNSPEC; diff --git a/contrib/vhost-user-gpu/vugpu.h b/contrib/vhost-user-gpu/vugpu.h index e2864bba68..f0f2069c47 100644 --- a/contrib/vhost-user-gpu/vugpu.h +++ b/contrib/vhost-user-gpu/vugpu.h @@ -36,6 +36,7 @@ typedef enum VhostUserGpuRequest { VHOST_USER_GPU_UPDATE, VHOST_USER_GPU_DMABUF_SCANOUT, VHOST_USER_GPU_DMABUF_UPDATE, + VHOST_USER_GPU_GET_EDID, } VhostUserGpuRequest; typedef struct VhostUserGpuDisplayInfoReply { @@ -83,6 +84,10 @@ typedef struct VhostUserGpuDMABUFScanout { int fd_drm_fourcc; } QEMU_PACKED VhostUserGpuDMABUFScanout; +typedef struct VhostUserGpuEdidRequest { + uint32_t scanout_id; +} QEMU_PACKED VhostUserGpuEdidRequest; + typedef struct VhostUserGpuMsg { uint32_t request; /* VhostUserGpuRequest */ uint32_t flags; @@ -93,6 +98,8 @@ typedef struct VhostUserGpuMsg { VhostUserGpuScanout scanout; VhostUserGpuUpdate update; VhostUserGpuDMABUFScanout dmabuf_scanout; + VhostUserGpuEdidRequest edid_req; + struct virtio_gpu_resp_edid resp_edid; struct virtio_gpu_resp_display_info display_info; uint64_t u64; } payload; @@ -104,6 +111,8 @@ static VhostUserGpuMsg m __attribute__ ((unused)); #define VHOST_USER_GPU_MSG_FLAG_REPLY 0x4 +#define VHOST_USER_GPU_PROTOCOL_F_EDID 0 + struct virtio_gpu_scanout { uint32_t width, height; int x, y; @@ -122,6 +131,7 @@ typedef struct VuGpu { bool virgl; bool virgl_inited; + bool edid_inited; uint32_t inflight; struct virtio_gpu_scanout scanout[VIRTIO_GPU_MAX_SCANOUTS]; @@ -171,6 +181,7 @@ int vg_create_mapping_iov(VuGpu *g, struct iovec **iov); void vg_cleanup_mapping_iov(VuGpu *g, struct iovec *iov, uint32_t count); void vg_get_display_info(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd); +void vg_get_edid(VuGpu *vg, struct virtio_gpu_ctrl_command *cmd); void vg_wait_ok(VuGpu *g); From 31f137e3d6c596883ecd572ff3c53eb03b0e87c7 Mon Sep 17 00:00:00 2001 From: Erico Nunes Date: Mon, 26 Jun 2023 18:47:08 +0200 Subject: [PATCH 07/66] vhost-user-gpu: implement get_edid frontend feature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement the frontend side of the get_edid feature in the qemu vhost-user-gpu frontend device. Signed-off-by: Erico Nunes Reviewed-by: Marc-André Lureau Message-Id: <20230626164708.1163239-5-ernunes@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/display/vhost-user-gpu.c | 32 ++++++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c index 15f9d99d09..543ea92721 100644 --- a/hw/display/vhost-user-gpu.c +++ b/hw/display/vhost-user-gpu.c @@ -31,6 +31,7 @@ typedef enum VhostUserGpuRequest { VHOST_USER_GPU_UPDATE, VHOST_USER_GPU_DMABUF_SCANOUT, VHOST_USER_GPU_DMABUF_UPDATE, + VHOST_USER_GPU_GET_EDID, } VhostUserGpuRequest; typedef struct VhostUserGpuDisplayInfoReply { @@ -78,6 +79,10 @@ typedef struct VhostUserGpuDMABUFScanout { int fd_drm_fourcc; } QEMU_PACKED VhostUserGpuDMABUFScanout; +typedef struct VhostUserGpuEdidRequest { + uint32_t scanout_id; +} QEMU_PACKED VhostUserGpuEdidRequest; + typedef struct VhostUserGpuMsg { uint32_t request; /* VhostUserGpuRequest */ uint32_t flags; @@ -88,6 +93,8 @@ typedef struct VhostUserGpuMsg { VhostUserGpuScanout scanout; VhostUserGpuUpdate update; VhostUserGpuDMABUFScanout dmabuf_scanout; + VhostUserGpuEdidRequest edid_req; + struct virtio_gpu_resp_edid resp_edid; struct virtio_gpu_resp_display_info display_info; uint64_t u64; } payload; @@ -99,6 +106,8 @@ static VhostUserGpuMsg m __attribute__ ((unused)); #define VHOST_USER_GPU_MSG_FLAG_REPLY 0x4 +#define VHOST_USER_GPU_PROTOCOL_F_EDID 0 + static void vhost_user_gpu_update_blocked(VhostUserGPU *g, bool blocked); static void @@ -161,6 +170,9 @@ vhost_user_gpu_handle_display(VhostUserGPU *g, VhostUserGpuMsg *msg) .request = msg->request, .flags = VHOST_USER_GPU_MSG_FLAG_REPLY, .size = sizeof(uint64_t), + .payload = { + .u64 = (1 << VHOST_USER_GPU_PROTOCOL_F_EDID) + } }; vhost_user_gpu_send_msg(g, &reply); @@ -184,6 +196,26 @@ vhost_user_gpu_handle_display(VhostUserGPU *g, VhostUserGpuMsg *msg) vhost_user_gpu_send_msg(g, &reply); break; } + case VHOST_USER_GPU_GET_EDID: { + VhostUserGpuEdidRequest *m = &msg->payload.edid_req; + struct virtio_gpu_resp_edid resp = { {} }; + VhostUserGpuMsg reply = { + .request = msg->request, + .flags = VHOST_USER_GPU_MSG_FLAG_REPLY, + .size = sizeof(reply.payload.resp_edid), + }; + + if (m->scanout_id >= g->parent_obj.conf.max_outputs) { + error_report("invalid scanout: %d", m->scanout_id); + break; + } + + resp.hdr.type = VIRTIO_GPU_RESP_OK_EDID; + virtio_gpu_base_generate_edid(VIRTIO_GPU_BASE(g), m->scanout_id, &resp); + memcpy(&reply.payload.resp_edid, &resp, sizeof(resp)); + vhost_user_gpu_send_msg(g, &reply); + break; + } case VHOST_USER_GPU_SCANOUT: { VhostUserGpuScanout *m = &msg->payload.scanout; From a5dab090e1425929a1a5a3034768cb33dab69bf4 Mon Sep 17 00:00:00 2001 From: Milan Zamazal Date: Wed, 28 Jun 2023 12:05:22 +0200 Subject: [PATCH 08/66] hw/virtio: Add boilerplate for vhost-user-scmi device This creates the QEMU side of the vhost-user-scmi device which connects to the remote daemon. It is based on code of similar vhost-user devices. Signed-off-by: Milan Zamazal Message-Id: <20230628100524.342666-2-mzamazal@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- MAINTAINERS | 6 + hw/virtio/Kconfig | 5 + hw/virtio/meson.build | 1 + hw/virtio/vhost-user-scmi.c | 306 ++++++++++++++++++++++++++++ include/hw/virtio/vhost-user-scmi.h | 30 +++ 5 files changed, 348 insertions(+) create mode 100644 hw/virtio/vhost-user-scmi.c create mode 100644 include/hw/virtio/vhost-user-scmi.h diff --git a/MAINTAINERS b/MAINTAINERS index 1817cfc62f..504dddb7dd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2217,6 +2217,12 @@ F: hw/virtio/vhost-user-gpio* F: include/hw/virtio/vhost-user-gpio.h F: tests/qtest/libqos/virtio-gpio.* +vhost-user-scmi +R: mzamazal@redhat.com +S: Supported +F: hw/virtio/vhost-user-scmi* +F: include/hw/virtio/vhost-user-scmi.h + virtio-crypto M: Gonglei S: Supported diff --git a/hw/virtio/Kconfig b/hw/virtio/Kconfig index de7a35429a..a9ee09062f 100644 --- a/hw/virtio/Kconfig +++ b/hw/virtio/Kconfig @@ -96,3 +96,8 @@ config VHOST_VDPA_DEV bool default y depends on VIRTIO && VHOST_VDPA && LINUX + +config VHOST_USER_SCMI + bool + default y + depends on VIRTIO && VHOST_USER diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index f32b22f61b..4a4d7e1c27 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -35,6 +35,7 @@ specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_I2C', if_true: files('vhost-user specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user-rng.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_GPIO', if_true: files('vhost-user-gpio.c')) specific_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_GPIO'], if_true: files('vhost-user-gpio-pci.c')) +specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_SCMI', if_true: files('vhost-user-scmi.c')) virtio_pci_ss = ss.source_set() virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c')) diff --git a/hw/virtio/vhost-user-scmi.c b/hw/virtio/vhost-user-scmi.c new file mode 100644 index 0000000000..d386fb2df9 --- /dev/null +++ b/hw/virtio/vhost-user-scmi.c @@ -0,0 +1,306 @@ +/* + * Vhost-user SCMI virtio device + * + * SPDX-FileCopyrightText: Red Hat, Inc. + * SPDX-License-Identifier: GPL-2.0-or-later + * + * Implementation based on other vhost-user devices in QEMU. + */ + +#include "qemu/osdep.h" +#include "qapi/error.h" +#include "qemu/error-report.h" +#include "hw/virtio/virtio-bus.h" +#include "hw/virtio/vhost-user-scmi.h" +#include "standard-headers/linux/virtio_ids.h" +#include "standard-headers/linux/virtio_scmi.h" +#include "trace.h" + +/* + * In this version, we don't support VIRTIO_SCMI_F_SHARED_MEMORY. + * Note that VIRTIO_SCMI_F_SHARED_MEMORY is currently not supported in + * Linux VirtIO SCMI guest driver. + */ +static const int feature_bits[] = { + VIRTIO_F_VERSION_1, + VIRTIO_F_NOTIFY_ON_EMPTY, + VIRTIO_RING_F_INDIRECT_DESC, + VIRTIO_RING_F_EVENT_IDX, + VIRTIO_F_RING_RESET, + VIRTIO_SCMI_F_P2A_CHANNELS, + VHOST_INVALID_FEATURE_BIT +}; + +static int vu_scmi_start(VirtIODevice *vdev) +{ + VHostUserSCMI *scmi = VHOST_USER_SCMI(vdev); + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + struct vhost_dev *vhost_dev = &scmi->vhost_dev; + int ret, i; + + if (!k->set_guest_notifiers) { + error_report("binding does not support guest notifiers"); + return -ENOSYS; + } + + ret = vhost_dev_enable_notifiers(vhost_dev, vdev); + if (ret < 0) { + error_report("Error enabling host notifiers: %d", ret); + return ret; + } + + ret = k->set_guest_notifiers(qbus->parent, vhost_dev->nvqs, true); + if (ret < 0) { + error_report("Error binding guest notifier: %d", ret); + goto err_host_notifiers; + } + + vhost_ack_features(&scmi->vhost_dev, feature_bits, vdev->guest_features); + + ret = vhost_dev_start(&scmi->vhost_dev, vdev, true); + if (ret < 0) { + error_report("Error starting vhost-user-scmi: %d", ret); + goto err_guest_notifiers; + } + + /* + * guest_notifier_mask/pending not used yet, so just unmask + * everything here. virtio-pci will do the right thing by + * enabling/disabling irqfd. + */ + for (i = 0; i < scmi->vhost_dev.nvqs; i++) { + vhost_virtqueue_mask(&scmi->vhost_dev, vdev, i, false); + } + return 0; + +err_guest_notifiers: + k->set_guest_notifiers(qbus->parent, vhost_dev->nvqs, false); +err_host_notifiers: + vhost_dev_disable_notifiers(vhost_dev, vdev); + + return ret; +} + +static void vu_scmi_stop(VirtIODevice *vdev) +{ + VHostUserSCMI *scmi = VHOST_USER_SCMI(vdev); + BusState *qbus = BUS(qdev_get_parent_bus(DEVICE(vdev))); + VirtioBusClass *k = VIRTIO_BUS_GET_CLASS(qbus); + struct vhost_dev *vhost_dev = &scmi->vhost_dev; + int ret; + + if (!k->set_guest_notifiers) { + return; + } + + vhost_dev_stop(vhost_dev, vdev, true); + + ret = k->set_guest_notifiers(qbus->parent, vhost_dev->nvqs, false); + if (ret < 0) { + error_report("vhost guest notifier cleanup failed: %d", ret); + return; + } + vhost_dev_disable_notifiers(vhost_dev, vdev); +} + +static void vu_scmi_set_status(VirtIODevice *vdev, uint8_t status) +{ + VHostUserSCMI *scmi = VHOST_USER_SCMI(vdev); + bool should_start = virtio_device_should_start(vdev, status); + + if (!scmi->connected) { + return; + } + if (vhost_dev_is_started(&scmi->vhost_dev) == should_start) { + return; + } + + if (should_start) { + vu_scmi_start(vdev); + } else { + vu_scmi_stop(vdev); + } +} + +static uint64_t vu_scmi_get_features(VirtIODevice *vdev, uint64_t features, + Error **errp) +{ + VHostUserSCMI *scmi = VHOST_USER_SCMI(vdev); + + return vhost_get_features(&scmi->vhost_dev, feature_bits, features); +} + +static void vu_scmi_handle_output(VirtIODevice *vdev, VirtQueue *vq) +{ + /* + * Not normally called; it's the daemon that handles the queue; + * however virtio's cleanup path can call this. + */ +} + +static void vu_scmi_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask) +{ + VHostUserSCMI *scmi = VHOST_USER_SCMI(vdev); + + if (idx == VIRTIO_CONFIG_IRQ_IDX) { + return; + } + + vhost_virtqueue_mask(&scmi->vhost_dev, vdev, idx, mask); +} + +static bool vu_scmi_guest_notifier_pending(VirtIODevice *vdev, int idx) +{ + VHostUserSCMI *scmi = VHOST_USER_SCMI(vdev); + + return vhost_virtqueue_pending(&scmi->vhost_dev, idx); +} + +static void vu_scmi_connect(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserSCMI *scmi = VHOST_USER_SCMI(vdev); + + if (scmi->connected) { + return; + } + scmi->connected = true; + + /* restore vhost state */ + if (virtio_device_started(vdev, vdev->status)) { + vu_scmi_start(vdev); + } +} + +static void vu_scmi_disconnect(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserSCMI *scmi = VHOST_USER_SCMI(vdev); + + if (!scmi->connected) { + return; + } + scmi->connected = false; + + if (vhost_dev_is_started(&scmi->vhost_dev)) { + vu_scmi_stop(vdev); + } +} + +static void vu_scmi_event(void *opaque, QEMUChrEvent event) +{ + DeviceState *dev = opaque; + + switch (event) { + case CHR_EVENT_OPENED: + vu_scmi_connect(dev); + break; + case CHR_EVENT_CLOSED: + vu_scmi_disconnect(dev); + break; + case CHR_EVENT_BREAK: + case CHR_EVENT_MUX_IN: + case CHR_EVENT_MUX_OUT: + /* Ignore */ + break; + } +} + +static void do_vhost_user_cleanup(VirtIODevice *vdev, VHostUserSCMI *scmi) +{ + virtio_delete_queue(scmi->cmd_vq); + virtio_delete_queue(scmi->event_vq); + g_free(scmi->vhost_dev.vqs); + virtio_cleanup(vdev); + vhost_user_cleanup(&scmi->vhost_user); +} + +static void vu_scmi_device_realize(DeviceState *dev, Error **errp) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserSCMI *scmi = VHOST_USER_SCMI(dev); + int ret; + + if (!scmi->chardev.chr) { + error_setg(errp, "vhost-user-scmi: chardev is mandatory"); + return; + } + + vdev->host_features |= (1ULL << VIRTIO_SCMI_F_P2A_CHANNELS); + + if (!vhost_user_init(&scmi->vhost_user, &scmi->chardev, errp)) { + return; + } + + virtio_init(vdev, VIRTIO_ID_SCMI, 0); + + scmi->cmd_vq = virtio_add_queue(vdev, 256, vu_scmi_handle_output); + scmi->event_vq = virtio_add_queue(vdev, 256, vu_scmi_handle_output); + scmi->vhost_dev.nvqs = 2; + scmi->vhost_dev.vqs = g_new0(struct vhost_virtqueue, scmi->vhost_dev.nvqs); + + ret = vhost_dev_init(&scmi->vhost_dev, &scmi->vhost_user, + VHOST_BACKEND_TYPE_USER, 0, errp); + if (ret < 0) { + error_setg_errno(errp, -ret, + "vhost-user-scmi: vhost_dev_init() failed"); + do_vhost_user_cleanup(vdev, scmi); + return; + } + + qemu_chr_fe_set_handlers(&scmi->chardev, NULL, NULL, vu_scmi_event, NULL, + dev, NULL, true); + + return; +} + +static void vu_scmi_device_unrealize(DeviceState *dev) +{ + VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VHostUserSCMI *scmi = VHOST_USER_SCMI(dev); + + vu_scmi_set_status(vdev, 0); + vhost_dev_cleanup(&scmi->vhost_dev); + do_vhost_user_cleanup(vdev, scmi); +} + +static const VMStateDescription vu_scmi_vmstate = { + .name = "vhost-user-scmi", + .unmigratable = 1, +}; + +static Property vu_scmi_properties[] = { + DEFINE_PROP_CHR("chardev", VHostUserSCMI, chardev), + DEFINE_PROP_END_OF_LIST(), +}; + +static void vu_scmi_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioDeviceClass *vdc = VIRTIO_DEVICE_CLASS(klass); + + device_class_set_props(dc, vu_scmi_properties); + dc->vmsd = &vu_scmi_vmstate; + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); + vdc->realize = vu_scmi_device_realize; + vdc->unrealize = vu_scmi_device_unrealize; + vdc->get_features = vu_scmi_get_features; + vdc->set_status = vu_scmi_set_status; + vdc->guest_notifier_mask = vu_scmi_guest_notifier_mask; + vdc->guest_notifier_pending = vu_scmi_guest_notifier_pending; +} + +static const TypeInfo vu_scmi_info = { + .name = TYPE_VHOST_USER_SCMI, + .parent = TYPE_VIRTIO_DEVICE, + .instance_size = sizeof(VHostUserSCMI), + .class_init = vu_scmi_class_init, +}; + +static void vu_scmi_register_types(void) +{ + type_register_static(&vu_scmi_info); +} + +type_init(vu_scmi_register_types) diff --git a/include/hw/virtio/vhost-user-scmi.h b/include/hw/virtio/vhost-user-scmi.h new file mode 100644 index 0000000000..6175a74ebd --- /dev/null +++ b/include/hw/virtio/vhost-user-scmi.h @@ -0,0 +1,30 @@ +/* + * Vhost-user SCMI virtio device + * + * Copyright (c) 2023 Red Hat, Inc. + * + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef _QEMU_VHOST_USER_SCMI_H +#define _QEMU_VHOST_USER_SCMI_H + +#include "hw/virtio/virtio.h" +#include "hw/virtio/vhost.h" +#include "hw/virtio/vhost-user.h" + +#define TYPE_VHOST_USER_SCMI "vhost-user-scmi" +OBJECT_DECLARE_SIMPLE_TYPE(VHostUserSCMI, VHOST_USER_SCMI); + +struct VHostUserSCMI { + VirtIODevice parent; + CharBackend chardev; + struct vhost_virtqueue *vhost_vqs; + struct vhost_dev vhost_dev; + VhostUserState vhost_user; + VirtQueue *cmd_vq; + VirtQueue *event_vq; + bool connected; +}; + +#endif /* _QEMU_VHOST_USER_SCMI_H */ From c46b20cf83ed5088626671447e214de165ca5297 Mon Sep 17 00:00:00 2001 From: Milan Zamazal Date: Wed, 28 Jun 2023 12:05:23 +0200 Subject: [PATCH 09/66] hw/virtio: Add vhost-user-scmi-pci boilerplate This allows is to instantiate a vhost-user-scmi device as part of a PCI bus. It is mostly boilerplate similar to the other vhost-user-*-pci boilerplates of similar devices. Signed-off-by: Milan Zamazal Message-Id: <20230628100524.342666-3-mzamazal@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/meson.build | 1 + hw/virtio/vhost-user-scmi-pci.c | 68 +++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) create mode 100644 hw/virtio/vhost-user-scmi-pci.c diff --git a/hw/virtio/meson.build b/hw/virtio/meson.build index 4a4d7e1c27..c4f4fe05fa 100644 --- a/hw/virtio/meson.build +++ b/hw/virtio/meson.build @@ -36,6 +36,7 @@ specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_RNG', if_true: files('vhost-user specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_GPIO', if_true: files('vhost-user-gpio.c')) specific_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_GPIO'], if_true: files('vhost-user-gpio-pci.c')) specific_virtio_ss.add(when: 'CONFIG_VHOST_USER_SCMI', if_true: files('vhost-user-scmi.c')) +specific_virtio_ss.add(when: ['CONFIG_VIRTIO_PCI', 'CONFIG_VHOST_USER_SCMI'], if_true: files('vhost-user-scmi-pci.c')) virtio_pci_ss = ss.source_set() virtio_pci_ss.add(when: 'CONFIG_VHOST_VSOCK', if_true: files('vhost-vsock-pci.c')) diff --git a/hw/virtio/vhost-user-scmi-pci.c b/hw/virtio/vhost-user-scmi-pci.c new file mode 100644 index 0000000000..7f53af7fce --- /dev/null +++ b/hw/virtio/vhost-user-scmi-pci.c @@ -0,0 +1,68 @@ +/* + * Vhost-user SCMI virtio device PCI glue + * + * SPDX-FileCopyrightText: Red Hat, Inc. + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#include "qemu/osdep.h" +#include "hw/qdev-properties.h" +#include "hw/virtio/vhost-user-scmi.h" +#include "hw/virtio/virtio-pci.h" + +struct VHostUserSCMIPCI { + VirtIOPCIProxy parent_obj; + VHostUserSCMI vdev; +}; + +typedef struct VHostUserSCMIPCI VHostUserSCMIPCI; + +#define TYPE_VHOST_USER_SCMI_PCI "vhost-user-scmi-pci-base" + +DECLARE_INSTANCE_CHECKER(VHostUserSCMIPCI, VHOST_USER_SCMI_PCI, + TYPE_VHOST_USER_SCMI_PCI) + +static void vhost_user_scmi_pci_realize(VirtIOPCIProxy *vpci_dev, Error **errp) +{ + VHostUserSCMIPCI *dev = VHOST_USER_SCMI_PCI(vpci_dev); + DeviceState *vdev = DEVICE(&dev->vdev); + + vpci_dev->nvectors = 1; + qdev_realize(vdev, BUS(&vpci_dev->bus), errp); +} + +static void vhost_user_scmi_pci_class_init(ObjectClass *klass, void *data) +{ + DeviceClass *dc = DEVICE_CLASS(klass); + VirtioPCIClass *k = VIRTIO_PCI_CLASS(klass); + PCIDeviceClass *pcidev_k = PCI_DEVICE_CLASS(klass); + k->realize = vhost_user_scmi_pci_realize; + set_bit(DEVICE_CATEGORY_INPUT, dc->categories); + pcidev_k->vendor_id = PCI_VENDOR_ID_REDHAT_QUMRANET; + pcidev_k->device_id = 0; /* Set by virtio-pci based on virtio id */ + pcidev_k->revision = 0x00; + pcidev_k->class_id = PCI_CLASS_COMMUNICATION_OTHER; +} + +static void vhost_user_scmi_pci_instance_init(Object *obj) +{ + VHostUserSCMIPCI *dev = VHOST_USER_SCMI_PCI(obj); + + virtio_instance_init_common(obj, &dev->vdev, sizeof(dev->vdev), + TYPE_VHOST_USER_SCMI); +} + +static const VirtioPCIDeviceTypeInfo vhost_user_scmi_pci_info = { + .base_name = TYPE_VHOST_USER_SCMI_PCI, + .non_transitional_name = "vhost-user-scmi-pci", + .instance_size = sizeof(VHostUserSCMIPCI), + .instance_init = vhost_user_scmi_pci_instance_init, + .class_init = vhost_user_scmi_pci_class_init, +}; + +static void vhost_user_scmi_pci_register(void) +{ + virtio_pci_types_register(&vhost_user_scmi_pci_info); +} + +type_init(vhost_user_scmi_pci_register); From b6f53ae005a1c05034769beebf799e861b82d48a Mon Sep 17 00:00:00 2001 From: Milan Zamazal Date: Wed, 28 Jun 2023 12:05:24 +0200 Subject: [PATCH 10/66] tests/qtest: enable tests for virtio-scmi We don't have a virtio-scmi implementation in QEMU and only support a vhost-user backend. This is very similar to virtio-gpio and we add the same set of tests, just passing some vhost-user messages over the control socket. Signed-off-by: Milan Zamazal Acked-by: Thomas Huth Message-Id: <20230628100524.342666-4-mzamazal@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- MAINTAINERS | 1 + tests/qtest/libqos/meson.build | 1 + tests/qtest/libqos/virtio-scmi.c | 174 +++++++++++++++++++++++++++++++ tests/qtest/libqos/virtio-scmi.h | 34 ++++++ tests/qtest/vhost-user-test.c | 44 ++++++++ 5 files changed, 254 insertions(+) create mode 100644 tests/qtest/libqos/virtio-scmi.c create mode 100644 tests/qtest/libqos/virtio-scmi.h diff --git a/MAINTAINERS b/MAINTAINERS index 504dddb7dd..8f642141d0 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -2222,6 +2222,7 @@ R: mzamazal@redhat.com S: Supported F: hw/virtio/vhost-user-scmi* F: include/hw/virtio/vhost-user-scmi.h +F: tests/qtest/libqos/virtio-scmi.* virtio-crypto M: Gonglei diff --git a/tests/qtest/libqos/meson.build b/tests/qtest/libqos/meson.build index cc209a8de5..90aae42a22 100644 --- a/tests/qtest/libqos/meson.build +++ b/tests/qtest/libqos/meson.build @@ -46,6 +46,7 @@ libqos_srcs = files( 'virtio-serial.c', 'virtio-iommu.c', 'virtio-gpio.c', + 'virtio-scmi.c', 'generic-pcihost.c', # qgraph machines: diff --git a/tests/qtest/libqos/virtio-scmi.c b/tests/qtest/libqos/virtio-scmi.c new file mode 100644 index 0000000000..ce8f4d5c06 --- /dev/null +++ b/tests/qtest/libqos/virtio-scmi.c @@ -0,0 +1,174 @@ +/* + * virtio-scmi nodes for testing + * + * SPDX-FileCopyrightText: Linaro Ltd + * SPDX-FileCopyrightText: Red Hat, Inc. + * SPDX-License-Identifier: GPL-2.0-or-later + * + * Based on virtio-gpio.c, doing basically the same thing. + */ + +#include "qemu/osdep.h" +#include "standard-headers/linux/virtio_config.h" +#include "../libqtest.h" +#include "qemu/module.h" +#include "qgraph.h" +#include "virtio-scmi.h" + +static QGuestAllocator *alloc; + +static void virtio_scmi_cleanup(QVhostUserSCMI *scmi) +{ + QVirtioDevice *vdev = scmi->vdev; + int i; + + for (i = 0; i < 2; i++) { + qvirtqueue_cleanup(vdev->bus, scmi->queues[i], alloc); + } + g_free(scmi->queues); +} + +/* + * This handles the VirtIO setup from the point of view of the driver + * frontend and therefore doesn't present any vhost specific features + * and in fact masks of the re-used bit. + */ +static void virtio_scmi_setup(QVhostUserSCMI *scmi) +{ + QVirtioDevice *vdev = scmi->vdev; + uint64_t features; + int i; + + features = qvirtio_get_features(vdev); + features &= ~QVIRTIO_F_BAD_FEATURE; + qvirtio_set_features(vdev, features); + + scmi->queues = g_new(QVirtQueue *, 2); + for (i = 0; i < 2; i++) { + scmi->queues[i] = qvirtqueue_setup(vdev, alloc, i); + } + qvirtio_set_driver_ok(vdev); +} + +static void *qvirtio_scmi_get_driver(QVhostUserSCMI *v_scmi, + const char *interface) +{ + if (!g_strcmp0(interface, "vhost-user-scmi")) { + return v_scmi; + } + if (!g_strcmp0(interface, "virtio")) { + return v_scmi->vdev; + } + + g_assert_not_reached(); +} + +static void *qvirtio_scmi_device_get_driver(void *object, + const char *interface) +{ + QVhostUserSCMIDevice *v_scmi = object; + return qvirtio_scmi_get_driver(&v_scmi->scmi, interface); +} + +/* virtio-scmi (mmio) */ +static void qvirtio_scmi_device_destructor(QOSGraphObject *obj) +{ + QVhostUserSCMIDevice *scmi_dev = (QVhostUserSCMIDevice *) obj; + virtio_scmi_cleanup(&scmi_dev->scmi); +} + +static void qvirtio_scmi_device_start_hw(QOSGraphObject *obj) +{ + QVhostUserSCMIDevice *scmi_dev = (QVhostUserSCMIDevice *) obj; + virtio_scmi_setup(&scmi_dev->scmi); +} + +static void *virtio_scmi_device_create(void *virtio_dev, + QGuestAllocator *t_alloc, + void *addr) +{ + QVhostUserSCMIDevice *virtio_device = g_new0(QVhostUserSCMIDevice, 1); + QVhostUserSCMI *interface = &virtio_device->scmi; + + interface->vdev = virtio_dev; + alloc = t_alloc; + + virtio_device->obj.get_driver = qvirtio_scmi_device_get_driver; + virtio_device->obj.start_hw = qvirtio_scmi_device_start_hw; + virtio_device->obj.destructor = qvirtio_scmi_device_destructor; + + return &virtio_device->obj; +} + +/* virtio-scmi-pci */ +static void qvirtio_scmi_pci_destructor(QOSGraphObject *obj) +{ + QVhostUserSCMIPCI *scmi_pci = (QVhostUserSCMIPCI *) obj; + QOSGraphObject *pci_vobj = &scmi_pci->pci_vdev.obj; + + virtio_scmi_cleanup(&scmi_pci->scmi); + qvirtio_pci_destructor(pci_vobj); +} + +static void qvirtio_scmi_pci_start_hw(QOSGraphObject *obj) +{ + QVhostUserSCMIPCI *scmi_pci = (QVhostUserSCMIPCI *) obj; + QOSGraphObject *pci_vobj = &scmi_pci->pci_vdev.obj; + + qvirtio_pci_start_hw(pci_vobj); + virtio_scmi_setup(&scmi_pci->scmi); +} + +static void *qvirtio_scmi_pci_get_driver(void *object, const char *interface) +{ + QVhostUserSCMIPCI *v_scmi = object; + + if (!g_strcmp0(interface, "pci-device")) { + return v_scmi->pci_vdev.pdev; + } + return qvirtio_scmi_get_driver(&v_scmi->scmi, interface); +} + +static void *virtio_scmi_pci_create(void *pci_bus, QGuestAllocator *t_alloc, + void *addr) +{ + QVhostUserSCMIPCI *virtio_spci = g_new0(QVhostUserSCMIPCI, 1); + QVhostUserSCMI *interface = &virtio_spci->scmi; + QOSGraphObject *obj = &virtio_spci->pci_vdev.obj; + + virtio_pci_init(&virtio_spci->pci_vdev, pci_bus, addr); + interface->vdev = &virtio_spci->pci_vdev.vdev; + alloc = t_alloc; + + obj->get_driver = qvirtio_scmi_pci_get_driver; + obj->start_hw = qvirtio_scmi_pci_start_hw; + obj->destructor = qvirtio_scmi_pci_destructor; + + return obj; +} + +static void virtio_scmi_register_nodes(void) +{ + QPCIAddress addr = { + .devfn = QPCI_DEVFN(4, 0), + }; + + QOSGraphEdgeOptions edge_opts = { }; + + /* vhost-user-scmi-device */ + edge_opts.extra_device_opts = "id=scmi,chardev=chr-vhost-user-test " + "-global virtio-mmio.force-legacy=false"; + qos_node_create_driver("vhost-user-scmi-device", + virtio_scmi_device_create); + qos_node_consumes("vhost-user-scmi-device", "virtio-bus", &edge_opts); + qos_node_produces("vhost-user-scmi-device", "vhost-user-scmi"); + + /* virtio-scmi-pci */ + edge_opts.extra_device_opts = "id=scmi,addr=04.0,chardev=chr-vhost-user-test"; + add_qpci_address(&edge_opts, &addr); + qos_node_create_driver("vhost-user-scmi-pci", virtio_scmi_pci_create); + qos_node_consumes("vhost-user-scmi-pci", "pci-bus", &edge_opts); + qos_node_produces("vhost-user-scmi-pci", "vhost-user-scmi"); +} + +libqos_init(virtio_scmi_register_nodes); diff --git a/tests/qtest/libqos/virtio-scmi.h b/tests/qtest/libqos/virtio-scmi.h new file mode 100644 index 0000000000..cb5670da6e --- /dev/null +++ b/tests/qtest/libqos/virtio-scmi.h @@ -0,0 +1,34 @@ +/* + * virtio-scmi structures + * + * SPDX-FileCopyrightText: Red Hat, Inc. + * SPDX-License-Identifier: GPL-2.0-or-later + */ + +#ifndef TESTS_LIBQOS_VIRTIO_SCMI_H +#define TESTS_LIBQOS_VIRTIO_SCMI_H + +#include "qgraph.h" +#include "virtio.h" +#include "virtio-pci.h" + +typedef struct QVhostUserSCMI QVhostUserSCMI; +typedef struct QVhostUserSCMIPCI QVhostUserSCMIPCI; +typedef struct QVhostUserSCMIDevice QVhostUserSCMIDevice; + +struct QVhostUserSCMI { + QVirtioDevice *vdev; + QVirtQueue **queues; +}; + +struct QVhostUserSCMIPCI { + QVirtioPCIDevice pci_vdev; + QVhostUserSCMI scmi; +}; + +struct QVhostUserSCMIDevice { + QOSGraphObject obj; + QVhostUserSCMI scmi; +}; + +#endif diff --git a/tests/qtest/vhost-user-test.c b/tests/qtest/vhost-user-test.c index dfb8003597..d4e437265f 100644 --- a/tests/qtest/vhost-user-test.c +++ b/tests/qtest/vhost-user-test.c @@ -33,6 +33,7 @@ #include "standard-headers/linux/virtio_ids.h" #include "standard-headers/linux/virtio_net.h" #include "standard-headers/linux/virtio_gpio.h" +#include "standard-headers/linux/virtio_scmi.h" #ifdef CONFIG_LINUX #include @@ -145,6 +146,7 @@ enum { enum { VHOST_USER_NET, VHOST_USER_GPIO, + VHOST_USER_SCMI, }; typedef struct TestServer { @@ -1157,3 +1159,45 @@ static void register_vhost_gpio_test(void) "vhost-user-gpio", test_read_guest_mem, &opts); } libqos_init(register_vhost_gpio_test); + +static uint64_t vu_scmi_get_features(TestServer *s) +{ + return 0x1ULL << VIRTIO_F_VERSION_1 | + 0x1ULL << VIRTIO_SCMI_F_P2A_CHANNELS | + 0x1ULL << VHOST_USER_F_PROTOCOL_FEATURES; +} + +static void vu_scmi_get_protocol_features(TestServer *s, CharBackend *chr, + VhostUserMsg *msg) +{ + msg->flags |= VHOST_USER_REPLY_MASK; + msg->size = sizeof(m.payload.u64); + msg->payload.u64 = 1ULL << VHOST_USER_PROTOCOL_F_MQ; + + qemu_chr_fe_write_all(chr, (uint8_t *)msg, VHOST_USER_HDR_SIZE + msg->size); +} + +static struct vhost_user_ops g_vu_scmi_ops = { + .type = VHOST_USER_SCMI, + + .append_opts = append_vhost_gpio_opts, + + .get_features = vu_scmi_get_features, + .set_features = vu_net_set_features, + .get_protocol_features = vu_scmi_get_protocol_features, +}; + +static void register_vhost_scmi_test(void) +{ + QOSGraphTestOptions opts = { + .before = vhost_user_test_setup, + .subprocess = true, + .arg = &g_vu_scmi_ops, + }; + + qemu_add_opts(&qemu_chardev_opts); + + qos_add_test("scmi/read-guest-mem/memfile", + "vhost-user-scmi", test_read_guest_mem, &opts); +} +libqos_init(register_vhost_scmi_test); From a1d027be95bc375238e5b9292c6aa661a8ddef4c Mon Sep 17 00:00:00 2001 From: Zhao Liu Date: Wed, 28 Jun 2023 21:54:34 +0800 Subject: [PATCH 11/66] machine: Add helpers to get cores/threads per socket The number of cores/threads per socket are needed for smbios, and are also useful for other modules. Provide the helpers to wrap the calculation of cores/threads per socket so that we can avoid calculation errors caused by other modules miss topology changes. Suggested-by: Igor Mammedov Signed-off-by: Zhao Liu Message-Id: <20230628135437.1145805-2-zhao1.liu@linux.intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/core/machine-smp.c | 10 ++++++++++ include/hw/boards.h | 2 ++ 2 files changed, 12 insertions(+) diff --git a/hw/core/machine-smp.c b/hw/core/machine-smp.c index 89fe0cda42..0f4d9b6f7a 100644 --- a/hw/core/machine-smp.c +++ b/hw/core/machine-smp.c @@ -197,3 +197,13 @@ void machine_parse_smp_config(MachineState *ms, return; } } + +unsigned int machine_topo_get_cores_per_socket(const MachineState *ms) +{ + return ms->smp.cores * ms->smp.clusters * ms->smp.dies; +} + +unsigned int machine_topo_get_threads_per_socket(const MachineState *ms) +{ + return ms->smp.threads * machine_topo_get_cores_per_socket(ms); +} diff --git a/include/hw/boards.h b/include/hw/boards.h index 6b267c21ce..12d9e9d17c 100644 --- a/include/hw/boards.h +++ b/include/hw/boards.h @@ -35,6 +35,8 @@ void machine_set_cpu_numa_node(MachineState *machine, Error **errp); void machine_parse_smp_config(MachineState *ms, const SMPConfiguration *config, Error **errp); +unsigned int machine_topo_get_cores_per_socket(const MachineState *ms); +unsigned int machine_topo_get_threads_per_socket(const MachineState *ms); /** * machine_class_allow_dynamic_sysbus_dev: Add type to list of valid devices From d79a284a44bb7d88b233fb6bb12ea3723f43469d Mon Sep 17 00:00:00 2001 From: Zhao Liu Date: Wed, 28 Jun 2023 21:54:35 +0800 Subject: [PATCH 12/66] hw/smbios: Fix smbios_smp_sockets caculation smp.sockets is the number of sockets which is configured by "-smp" ( otherwise, the default is 1). Trying to recalculate it here with another rules leads to errors, such as: 1. 003f230e37d7 ("machine: Tweak the order of topology members in struct CpuTopology") changes the meaning of smp.cores but doesn't fix original smp.cores uses. With the introduction of cluster, now smp.cores means the number of cores in one cluster. So smp.cores * smp.threads just means the threads in a cluster not in a socket. 2. On the other hand, we shouldn't use smp.cpus here because it indicates the initial number of online CPUs at the boot time, and is not mathematically related to smp.sockets. So stop reinventing the another wheel and use the topo values that has been calculated. Fixes: 003f230e37d7 ("machine: Tweak the order of topology members in struct CpuTopology") Signed-off-by: Zhao Liu Message-Id: <20230628135437.1145805-3-zhao1.liu@linux.intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/smbios/smbios.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c index d2007e70fb..d67415d44d 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -1088,8 +1088,7 @@ void smbios_get_tables(MachineState *ms, smbios_build_type_2_table(); smbios_build_type_3_table(); - smbios_smp_sockets = DIV_ROUND_UP(ms->smp.cpus, - ms->smp.cores * ms->smp.threads); + smbios_smp_sockets = ms->smp.sockets; assert(smbios_smp_sockets >= 1); for (i = 0; i < smbios_smp_sockets; i++) { From 7298fd7de5551c4501f54381228458e3c21cab4b Mon Sep 17 00:00:00 2001 From: Zhao Liu Date: Wed, 28 Jun 2023 21:54:36 +0800 Subject: [PATCH 13/66] hw/smbios: Fix thread count in type4 >From SMBIOS 3.0 specification, thread count field means: Thread Count is the total number of threads detected by the BIOS for this processor socket. It is a processor-wide count, not a thread-per-core count. [1] So here we should use threads per socket other than threads per core. [1] SMBIOS 3.0.0, section 7.5.8, Processor Information - Thread Count Fixes: c97294ec1b9e ("SMBIOS: Build aggregate smbios tables and entry point") Signed-off-by: Zhao Liu Message-Id: <20230628135437.1145805-4-zhao1.liu@linux.intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/smbios/smbios.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c index d67415d44d..3aae9328c0 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -713,6 +713,7 @@ static void smbios_build_type_4_table(MachineState *ms, unsigned instance) { char sock_str[128]; size_t tbl_len = SMBIOS_TYPE_4_LEN_V28; + unsigned threads_per_socket; if (smbios_ep_type == SMBIOS_ENTRY_POINT_TYPE_64) { tbl_len = SMBIOS_TYPE_4_LEN_V30; @@ -747,17 +748,19 @@ static void smbios_build_type_4_table(MachineState *ms, unsigned instance) SMBIOS_TABLE_SET_STR(4, asset_tag_number_str, type4.asset); SMBIOS_TABLE_SET_STR(4, part_number_str, type4.part); + threads_per_socket = machine_topo_get_threads_per_socket(ms); + t->core_count = (ms->smp.cores > 255) ? 0xFF : ms->smp.cores; t->core_enabled = t->core_count; - t->thread_count = (ms->smp.threads > 255) ? 0xFF : ms->smp.threads; + t->thread_count = (threads_per_socket > 255) ? 0xFF : threads_per_socket; t->processor_characteristics = cpu_to_le16(0x02); /* Unknown */ t->processor_family2 = cpu_to_le16(0x01); /* Other */ if (tbl_len == SMBIOS_TYPE_4_LEN_V30) { t->core_count2 = t->core_enabled2 = cpu_to_le16(ms->smp.cores); - t->thread_count2 = cpu_to_le16(ms->smp.threads); + t->thread_count2 = cpu_to_le16(threads_per_socket); } SMBIOS_BUILD_TABLE_POST; From 196ea60a734c346d7d75f1d89aa37703d4d854e7 Mon Sep 17 00:00:00 2001 From: Zhao Liu Date: Wed, 28 Jun 2023 21:54:37 +0800 Subject: [PATCH 14/66] hw/smbios: Fix core count in type4 >From SMBIOS 3.0 specification, core count field means: Core Count is the number of cores detected by the BIOS for this processor socket. [1] Before 003f230e37d7 ("machine: Tweak the order of topology members in struct CpuTopology"), MachineState.smp.cores means "the number of cores in one package", and it's correct to use smp.cores for core count. But 003f230e37d7 changes the smp.cores' meaning to "the number of cores in one die" and doesn't change the original smp.cores' use in smbios as well, which makes core count in type4 go wrong. Fix this issue with the correct "cores per socket" caculation. [1] SMBIOS 3.0.0, section 7.5.6, Processor Information - Core Count Fixes: 003f230e37d7 ("machine: Tweak the order of topology members in struct CpuTopology") Signed-off-by: Zhao Liu Message-Id: <20230628135437.1145805-5-zhao1.liu@linux.intel.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/smbios/smbios.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/hw/smbios/smbios.c b/hw/smbios/smbios.c index 3aae9328c0..10cd22f610 100644 --- a/hw/smbios/smbios.c +++ b/hw/smbios/smbios.c @@ -714,6 +714,7 @@ static void smbios_build_type_4_table(MachineState *ms, unsigned instance) char sock_str[128]; size_t tbl_len = SMBIOS_TYPE_4_LEN_V28; unsigned threads_per_socket; + unsigned cores_per_socket; if (smbios_ep_type == SMBIOS_ENTRY_POINT_TYPE_64) { tbl_len = SMBIOS_TYPE_4_LEN_V30; @@ -749,8 +750,9 @@ static void smbios_build_type_4_table(MachineState *ms, unsigned instance) SMBIOS_TABLE_SET_STR(4, part_number_str, type4.part); threads_per_socket = machine_topo_get_threads_per_socket(ms); + cores_per_socket = machine_topo_get_cores_per_socket(ms); - t->core_count = (ms->smp.cores > 255) ? 0xFF : ms->smp.cores; + t->core_count = (cores_per_socket > 255) ? 0xFF : cores_per_socket; t->core_enabled = t->core_count; t->thread_count = (threads_per_socket > 255) ? 0xFF : threads_per_socket; @@ -759,7 +761,7 @@ static void smbios_build_type_4_table(MachineState *ms, unsigned instance) t->processor_family2 = cpu_to_le16(0x01); /* Other */ if (tbl_len == SMBIOS_TYPE_4_LEN_V30) { - t->core_count2 = t->core_enabled2 = cpu_to_le16(ms->smp.cores); + t->core_count2 = t->core_enabled2 = cpu_to_le16(cores_per_socket); t->thread_count2 = cpu_to_le16(threads_per_socket); } From 0dcb4172f2cecdfae113239ff931718eefdeb2d2 Mon Sep 17 00:00:00 2001 From: Tom Lonergan Date: Wed, 28 Jun 2023 16:39:26 +0000 Subject: [PATCH 15/66] vhost-user: Change one_time to per_device request Some devices, like virtio-scsi, consist of one vhost_dev, while others, like virtio-net, contain multiple vhost_devs. The QEMU vhost-user code has a concept of one-time messages which is misleading. One-time messages are sent once per operation on the device, not once for the lifetime of the device. Therefore, as discussed in [1], vhost_user_one_time_request should be renamed to vhost_user_per_device_request and the relevant comments updated to match the real functionality. [1] https://lore.kernel.org/qemu-devel/20230127083027-mutt-send-email-mst@kernel.org/ Signed-off-by: Tom Lonergan Message-Id: <20230628163927.108171-2-tom.lonergan@nutanix.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Raphael Norwitz --- hw/virtio/vhost-user.c | 18 ++++++++++++------ 1 file changed, 12 insertions(+), 6 deletions(-) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index c4e0cbd702..65d6299343 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -367,7 +367,7 @@ static int process_message_reply(struct vhost_dev *dev, return msg_reply.payload.u64 ? -EIO : 0; } -static bool vhost_user_one_time_request(VhostUserRequest request) +static bool vhost_user_per_device_request(VhostUserRequest request) { switch (request) { case VHOST_USER_SET_OWNER: @@ -392,11 +392,17 @@ static int vhost_user_write(struct vhost_dev *dev, VhostUserMsg *msg, int ret, size = VHOST_USER_HDR_SIZE + msg->hdr.size; /* - * For non-vring specific requests, like VHOST_USER_SET_MEM_TABLE, - * we just need send it once in the first time. For later such - * request, we just ignore it. + * Some devices, like virtio-scsi, are implemented as a single vhost_dev, + * while others, like virtio-net, contain multiple vhost_devs. For + * operations such as configuring device memory mappings or issuing device + * resets, which affect the whole device instead of individual VQs, + * vhost-user messages should only be sent once. + * + * Devices with multiple vhost_devs are given an associated dev->vq_index + * so per_device requests are only sent if vq_index is 0. */ - if (vhost_user_one_time_request(msg->hdr.request) && dev->vq_index != 0) { + if (vhost_user_per_device_request(msg->hdr.request) + && dev->vq_index != 0) { msg->hdr.flags &= ~VHOST_USER_NEED_REPLY_MASK; return 0; } @@ -1256,7 +1262,7 @@ static int vhost_user_get_u64(struct vhost_dev *dev, int request, uint64_t *u64) .hdr.flags = VHOST_USER_VERSION, }; - if (vhost_user_one_time_request(request) && dev->vq_index != 0) { + if (vhost_user_per_device_request(request) && dev->vq_index != 0) { return 0; } From 667e58aef1aa7a0294f635ef070c591efebf75c1 Mon Sep 17 00:00:00 2001 From: Tom Lonergan Date: Wed, 28 Jun 2023 16:39:27 +0000 Subject: [PATCH 16/66] vhost-user: Make RESET_DEVICE a per device message A device reset is issued per device, not per VQ. The legacy device reset message, VHOST_USER_RESET_OWNER, is already a per device message. Therefore, this change adds the proper message, VHOST_USER_RESET_DEVICE, to per device messages. Signed-off-by: Tom Lonergan Message-Id: <20230628163927.108171-3-tom.lonergan@nutanix.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Raphael Norwitz --- hw/virtio/vhost-user.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/virtio/vhost-user.c b/hw/virtio/vhost-user.c index 65d6299343..8dcf049d42 100644 --- a/hw/virtio/vhost-user.c +++ b/hw/virtio/vhost-user.c @@ -375,6 +375,7 @@ static bool vhost_user_per_device_request(VhostUserRequest request) case VHOST_USER_SET_MEM_TABLE: case VHOST_USER_GET_QUEUE_NUM: case VHOST_USER_NET_SET_MTU: + case VHOST_USER_RESET_DEVICE: case VHOST_USER_ADD_MEM_REG: case VHOST_USER_REM_MEM_REG: return true; From 00f52e77d742c788ade7d713a7bf558481a89eec Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Fri, 30 Jun 2023 09:37:04 +0200 Subject: [PATCH 17/66] hw/i386/pc_q35: Resolve redundant q35_host variable MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The variable is redundant to "phb" and is never used by its real type. Signed-off-by: Bernhard Beschow Reviewed-by: Thomas Huth Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20230630073720.21297-2-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc_q35.c | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 11a7084ea1..d9f3764184 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -120,8 +120,7 @@ static void pc_q35_init(MachineState *machine) PCMachineState *pcms = PC_MACHINE(machine); PCMachineClass *pcmc = PC_MACHINE_GET_CLASS(pcms); X86MachineState *x86ms = X86_MACHINE(machine); - Q35PCIHost *q35_host; - PCIHostState *phb; + Object *phb; PCIBus *host_bus; PCIDevice *lpc; DeviceState *lpc_dev; @@ -207,10 +206,10 @@ static void pc_q35_init(MachineState *machine) } /* create pci host bus */ - q35_host = Q35_HOST_DEVICE(qdev_new(TYPE_Q35_HOST_DEVICE)); + phb = OBJECT(qdev_new(TYPE_Q35_HOST_DEVICE)); if (pcmc->pci_enabled) { - pci_hole64_size = object_property_get_uint(OBJECT(q35_host), + pci_hole64_size = object_property_get_uint(phb, PCI_HOST_PROP_PCI_HOLE64_SIZE, &error_abort); } @@ -218,23 +217,23 @@ static void pc_q35_init(MachineState *machine) /* allocate ram and load rom/bios */ pc_memory_init(pcms, system_memory, rom_memory, pci_hole64_size); - object_property_add_child(OBJECT(machine), "q35", OBJECT(q35_host)); - object_property_set_link(OBJECT(q35_host), MCH_HOST_PROP_RAM_MEM, + object_property_add_child(OBJECT(machine), "q35", phb); + object_property_set_link(phb, MCH_HOST_PROP_RAM_MEM, OBJECT(machine->ram), NULL); - object_property_set_link(OBJECT(q35_host), MCH_HOST_PROP_PCI_MEM, + object_property_set_link(phb, MCH_HOST_PROP_PCI_MEM, OBJECT(pci_memory), NULL); - object_property_set_link(OBJECT(q35_host), MCH_HOST_PROP_SYSTEM_MEM, + object_property_set_link(phb, MCH_HOST_PROP_SYSTEM_MEM, OBJECT(system_memory), NULL); - object_property_set_link(OBJECT(q35_host), MCH_HOST_PROP_IO_MEM, + object_property_set_link(phb, MCH_HOST_PROP_IO_MEM, OBJECT(system_io), NULL); - object_property_set_int(OBJECT(q35_host), PCI_HOST_BELOW_4G_MEM_SIZE, + object_property_set_int(phb, PCI_HOST_BELOW_4G_MEM_SIZE, x86ms->below_4g_mem_size, NULL); - object_property_set_int(OBJECT(q35_host), PCI_HOST_ABOVE_4G_MEM_SIZE, + object_property_set_int(phb, PCI_HOST_ABOVE_4G_MEM_SIZE, x86ms->above_4g_mem_size, NULL); + /* pci */ - sysbus_realize_and_unref(SYS_BUS_DEVICE(q35_host), &error_fatal); - phb = PCI_HOST_BRIDGE(q35_host); - host_bus = phb->bus; + sysbus_realize_and_unref(SYS_BUS_DEVICE(phb), &error_fatal); + host_bus = PCI_BUS(qdev_get_child_bus(DEVICE(phb), "pcie.0")); /* create ISA bus */ lpc = pci_new_multifunction(PCI_DEVFN(ICH9_LPC_DEV, ICH9_LPC_FUNC), true, TYPE_ICH9_LPC_DEVICE); From 8cf08065b1b19cea1958a43187755cec6546a5d0 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Fri, 30 Jun 2023 09:37:05 +0200 Subject: [PATCH 18/66] hw/pci-host/q35: Fix double, contradicting .endianness assignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes the following clangd warning (-Winitializer-overrides): q35.c:297:19: Initializer overrides prior initialization of this subobject q35.c:292:19: previous initialization is here Settle on little endian which is consistent with using pci_host_conf_le_ops. Fixes: bafc90bdc594 ("q35: implement TSEG") Signed-off-by: Bernhard Beschow Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20230630073720.21297-3-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-host/q35.c | 1 - 1 file changed, 1 deletion(-) diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index fd18920e7f..84137b9ad9 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -285,7 +285,6 @@ static void blackhole_write(void *opaque, hwaddr addr, uint64_t val, static const MemoryRegionOps blackhole_ops = { .read = blackhole_read, .write = blackhole_write, - .endianness = DEVICE_NATIVE_ENDIAN, .valid.min_access_size = 1, .valid.max_access_size = 4, .impl.min_access_size = 4, From 06a492bd2b75653970e30c3ae0ea79ab1ed4ae31 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Fri, 30 Jun 2023 09:37:06 +0200 Subject: [PATCH 19/66] hw/pci-host/q35: Initialize PCMachineState::bus in board code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Q35 PCI host currently sets the PC machine's PCI bus attribute through global state, thereby assuming the machine to be a PC machine. The Q35 machine code already holds on to Q35's pci bus attribute, so can easily set its own property while preserving encapsulation. Signed-off-by: Bernhard Beschow Reviewed-by: Igor Mammedov Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20230630073720.21297-4-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc_q35.c | 4 +++- hw/pci-host/q35.c | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index d9f3764184..4edc0b35f4 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -230,10 +230,12 @@ static void pc_q35_init(MachineState *machine) x86ms->below_4g_mem_size, NULL); object_property_set_int(phb, PCI_HOST_ABOVE_4G_MEM_SIZE, x86ms->above_4g_mem_size, NULL); + sysbus_realize_and_unref(SYS_BUS_DEVICE(phb), &error_fatal); /* pci */ - sysbus_realize_and_unref(SYS_BUS_DEVICE(phb), &error_fatal); host_bus = PCI_BUS(qdev_get_child_bus(DEVICE(phb), "pcie.0")); + pcms->bus = host_bus; + /* create ISA bus */ lpc = pci_new_multifunction(PCI_DEVFN(ICH9_LPC_DEV, ICH9_LPC_FUNC), true, TYPE_ICH9_LPC_DEVICE); diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index 84137b9ad9..0604464074 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -66,7 +66,6 @@ static void q35_host_realize(DeviceState *dev, Error **errp) s->mch.pci_address_space, s->mch.address_space_io, 0, TYPE_PCIE_BUS); - PC_MACHINE(qdev_get_machine())->bus = pci->bus; pci->bypass_iommu = PC_MACHINE(qdev_get_machine())->default_bus_bypass_iommu; qdev_realize(DEVICE(&s->mch), BUS(pci->bus), &error_fatal); From 3b20f4ca59b53f10240d94cdb3f482c116640437 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Fri, 30 Jun 2023 09:37:07 +0200 Subject: [PATCH 20/66] hw/pci/pci_host: Introduce PCI_HOST_BYPASS_IOMMU macro Introduce a macro to avoid copy and pasting strings which can easily cause typos. Suggested-by: Michael S. Tsirkin Signed-off-by: Bernhard Beschow Reviewed-by: Igor Mammedov Message-Id: <20230630073720.21297-5-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci/pci_host.c | 2 +- include/hw/pci/pci_host.h | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/hw/pci/pci_host.c b/hw/pci/pci_host.c index dfd185bbb4..7af8afdcbe 100644 --- a/hw/pci/pci_host.c +++ b/hw/pci/pci_host.c @@ -232,7 +232,7 @@ const VMStateDescription vmstate_pcihost = { static Property pci_host_properties_common[] = { DEFINE_PROP_BOOL("x-config-reg-migration-enabled", PCIHostState, mig_enabled, true), - DEFINE_PROP_BOOL("bypass-iommu", PCIHostState, bypass_iommu, false), + DEFINE_PROP_BOOL(PCI_HOST_BYPASS_IOMMU, PCIHostState, bypass_iommu, false), DEFINE_PROP_END_OF_LIST(), }; diff --git a/include/hw/pci/pci_host.h b/include/hw/pci/pci_host.h index c6f4eb4585..e52d8ec2cd 100644 --- a/include/hw/pci/pci_host.h +++ b/include/hw/pci/pci_host.h @@ -31,6 +31,8 @@ #include "hw/sysbus.h" #include "qom/object.h" +#define PCI_HOST_BYPASS_IOMMU "bypass-iommu" + #define TYPE_PCI_HOST_BRIDGE "pci-host-bridge" OBJECT_DECLARE_TYPE(PCIHostState, PCIHostBridgeClass, PCI_HOST_BRIDGE) From e36102cb07dddb3f3758398322535a50e7ed41f2 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Fri, 30 Jun 2023 09:37:08 +0200 Subject: [PATCH 21/66] hw/pci-host/q35: Initialize PCI_HOST_BYPASS_IOMMU property from board code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The Q35 PCI host already has a PCI_HOST_BYPASS_IOMMU property. However, the host initializes this property itself by accessing global machine state, thereby assuming it to be a PC machine. Avoid this by having board code set this property. Signed-off-by: Bernhard Beschow Reviewed-by: Igor Mammedov Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20230630073720.21297-6-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc_q35.c | 2 ++ hw/pci-host/q35.c | 3 +-- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 4edc0b35f4..852250e8cb 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -230,6 +230,8 @@ static void pc_q35_init(MachineState *machine) x86ms->below_4g_mem_size, NULL); object_property_set_int(phb, PCI_HOST_ABOVE_4G_MEM_SIZE, x86ms->above_4g_mem_size, NULL); + object_property_set_bool(phb, PCI_HOST_BYPASS_IOMMU, + pcms->default_bus_bypass_iommu, NULL); sysbus_realize_and_unref(SYS_BUS_DEVICE(phb), &error_fatal); /* pci */ diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index 0604464074..d2830cee34 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -66,8 +66,7 @@ static void q35_host_realize(DeviceState *dev, Error **errp) s->mch.pci_address_space, s->mch.address_space_io, 0, TYPE_PCIE_BUS); - pci->bypass_iommu = - PC_MACHINE(qdev_get_machine())->default_bus_bypass_iommu; + qdev_realize(DEVICE(&s->mch), BUS(pci->bus), &error_fatal); } From 3d664a9a386dd5968962b038d056caf182ef29a1 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Fri, 30 Jun 2023 09:37:09 +0200 Subject: [PATCH 22/66] hw/pci-host/q35: Make some property name macros reusable by i440fx Signed-off-by: Bernhard Beschow Message-Id: <20230630073720.21297-7-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc_q35.c | 8 ++++---- hw/pci-host/q35.c | 8 ++++---- include/hw/i386/pc.h | 4 ++++ include/hw/pci-host/q35.h | 5 ----- 4 files changed, 12 insertions(+), 13 deletions(-) diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 852250e8cb..02dd274276 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -218,13 +218,13 @@ static void pc_q35_init(MachineState *machine) pc_memory_init(pcms, system_memory, rom_memory, pci_hole64_size); object_property_add_child(OBJECT(machine), "q35", phb); - object_property_set_link(phb, MCH_HOST_PROP_RAM_MEM, + object_property_set_link(phb, PCI_HOST_PROP_RAM_MEM, OBJECT(machine->ram), NULL); - object_property_set_link(phb, MCH_HOST_PROP_PCI_MEM, + object_property_set_link(phb, PCI_HOST_PROP_PCI_MEM, OBJECT(pci_memory), NULL); - object_property_set_link(phb, MCH_HOST_PROP_SYSTEM_MEM, + object_property_set_link(phb, PCI_HOST_PROP_SYSTEM_MEM, OBJECT(system_memory), NULL); - object_property_set_link(phb, MCH_HOST_PROP_IO_MEM, + object_property_set_link(phb, PCI_HOST_PROP_IO_MEM, OBJECT(system_io), NULL); object_property_set_int(phb, PCI_HOST_BELOW_4G_MEM_SIZE, x86ms->below_4g_mem_size, NULL); diff --git a/hw/pci-host/q35.c b/hw/pci-host/q35.c index d2830cee34..91c46df9ae 100644 --- a/hw/pci-host/q35.c +++ b/hw/pci-host/q35.c @@ -240,19 +240,19 @@ static void q35_host_initfn(Object *obj) object_property_add_uint64_ptr(obj, PCIE_HOST_MCFG_SIZE, &pehb->size, OBJ_PROP_FLAG_READ); - object_property_add_link(obj, MCH_HOST_PROP_RAM_MEM, TYPE_MEMORY_REGION, + object_property_add_link(obj, PCI_HOST_PROP_RAM_MEM, TYPE_MEMORY_REGION, (Object **) &s->mch.ram_memory, qdev_prop_allow_set_link_before_realize, 0); - object_property_add_link(obj, MCH_HOST_PROP_PCI_MEM, TYPE_MEMORY_REGION, + object_property_add_link(obj, PCI_HOST_PROP_PCI_MEM, TYPE_MEMORY_REGION, (Object **) &s->mch.pci_address_space, qdev_prop_allow_set_link_before_realize, 0); - object_property_add_link(obj, MCH_HOST_PROP_SYSTEM_MEM, TYPE_MEMORY_REGION, + object_property_add_link(obj, PCI_HOST_PROP_SYSTEM_MEM, TYPE_MEMORY_REGION, (Object **) &s->mch.system_memory, qdev_prop_allow_set_link_before_realize, 0); - object_property_add_link(obj, MCH_HOST_PROP_IO_MEM, TYPE_MEMORY_REGION, + object_property_add_link(obj, PCI_HOST_PROP_IO_MEM, TYPE_MEMORY_REGION, (Object **) &s->mch.address_space_io, qdev_prop_allow_set_link_before_realize, 0); } diff --git a/include/hw/i386/pc.h b/include/hw/i386/pc.h index 6eec0fc51d..c34c698cdd 100644 --- a/include/hw/i386/pc.h +++ b/include/hw/i386/pc.h @@ -146,6 +146,10 @@ void pc_acpi_smi_interrupt(void *opaque, int irq, int level); void pc_guest_info_init(PCMachineState *pcms); +#define PCI_HOST_PROP_RAM_MEM "ram-mem" +#define PCI_HOST_PROP_PCI_MEM "pci-mem" +#define PCI_HOST_PROP_SYSTEM_MEM "system-mem" +#define PCI_HOST_PROP_IO_MEM "io-mem" #define PCI_HOST_PROP_PCI_HOLE_START "pci-hole-start" #define PCI_HOST_PROP_PCI_HOLE_END "pci-hole-end" #define PCI_HOST_PROP_PCI_HOLE64_START "pci-hole64-start" diff --git a/include/hw/pci-host/q35.h b/include/hw/pci-host/q35.h index e89329c51e..1d98bbfe0d 100644 --- a/include/hw/pci-host/q35.h +++ b/include/hw/pci-host/q35.h @@ -74,11 +74,6 @@ struct Q35PCIHost { * gmch part */ -#define MCH_HOST_PROP_RAM_MEM "ram-mem" -#define MCH_HOST_PROP_PCI_MEM "pci-mem" -#define MCH_HOST_PROP_SYSTEM_MEM "system-mem" -#define MCH_HOST_PROP_IO_MEM "io-mem" - /* PCI configuration */ #define MCH_HOST_BRIDGE "MCH" From b90d7bff18c48500df86a35db56fecfb59d85088 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Fri, 30 Jun 2023 09:37:10 +0200 Subject: [PATCH 23/66] hw/i386/pc_piix: Turn some local variables into initializers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Eliminates an else branch. Suggested-by: Igor Mammedov Signed-off-by: Bernhard Beschow Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20230630073720.21297-8-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc_piix.c | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index f9947fbc10..6a5b6dad2f 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -122,11 +122,11 @@ static void pc_init1(MachineState *machine, BusState *idebus[MAX_IDE_BUS]; ISADevice *rtc_state; MemoryRegion *ram_memory; - MemoryRegion *pci_memory; - MemoryRegion *rom_memory; + MemoryRegion *pci_memory = NULL; + MemoryRegion *rom_memory = system_memory; ram_addr_t lowmem; - uint64_t hole64_size; - DeviceState *i440fx_host; + uint64_t hole64_size = 0; + DeviceState *i440fx_host = NULL; /* * Calculate ram split, for memory below and above 4G. It's a bit @@ -205,11 +205,6 @@ static void pc_init1(MachineState *machine, hole64_size = object_property_get_uint(OBJECT(i440fx_host), PCI_HOST_PROP_PCI_HOLE64_SIZE, &error_abort); - } else { - pci_memory = NULL; - rom_memory = system_memory; - i440fx_host = NULL; - hole64_size = 0; } pc_guest_info_init(pcms); From f00f5e4b00e2cb414927b560cd6a82fad4dfc6e2 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Fri, 30 Jun 2023 09:37:11 +0200 Subject: [PATCH 24/66] hw/pci-host/i440fx: Add "i440fx" child property in board code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The parent-child relation is usually established near a child's qdev_new(). For i440fx this allows for reusing the machine parameter, thus avoiding qdev_get_machine() which relies on a global variable. Suggested-by: Philippe Mathieu-Daudé Signed-off-by: Bernhard Beschow Message-Id: <20230630073720.21297-9-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc_piix.c | 2 ++ hw/pci-host/i440fx.c | 1 - 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 6a5b6dad2f..26e8473a4d 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -202,6 +202,8 @@ static void pc_init1(MachineState *machine, memory_region_init(pci_memory, NULL, "pci", UINT64_MAX); rom_memory = pci_memory; i440fx_host = qdev_new(host_type); + object_property_add_child(OBJECT(machine), "i440fx", + OBJECT(i440fx_host)); hole64_size = object_property_get_uint(OBJECT(i440fx_host), PCI_HOST_PROP_PCI_HOLE64_SIZE, &error_abort); diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c index 61e7b97ff4..d95d9229d3 100644 --- a/hw/pci-host/i440fx.c +++ b/hw/pci-host/i440fx.c @@ -259,7 +259,6 @@ PCIBus *i440fx_init(const char *pci_type, b = pci_root_bus_new(dev, NULL, pci_address_space, address_space_io, 0, TYPE_PCI_BUS); s->bus = b; - object_property_add_child(qdev_get_machine(), "i440fx", OBJECT(dev)); sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); d = pci_create_simple(b, 0, pci_type); From cda39f134ba197e2c8c9660dd3fb8fddf4c8647b Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Fri, 30 Jun 2023 09:37:12 +0200 Subject: [PATCH 25/66] hw/pci-host/i440fx: Replace magic values by existing constants MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Bernhard Beschow Reviewed-by: Igor Mammedov Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20230630073720.21297-10-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-host/i440fx.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c index d95d9229d3..b7c24a4e1d 100644 --- a/hw/pci-host/i440fx.c +++ b/hw/pci-host/i440fx.c @@ -276,8 +276,8 @@ PCIBus *i440fx_init(const char *pci_type, /* if *disabled* show SMRAM to all CPUs */ memory_region_init_alias(&f->smram_region, OBJECT(d), "smram-region", - f->pci_address_space, 0xa0000, 0x20000); - memory_region_add_subregion_overlap(f->system_memory, 0xa0000, + f->pci_address_space, SMRAM_C_BASE, SMRAM_C_SIZE); + memory_region_add_subregion_overlap(f->system_memory, SMRAM_C_BASE, &f->smram_region, 1); memory_region_set_enabled(&f->smram_region, true); @@ -285,9 +285,9 @@ PCIBus *i440fx_init(const char *pci_type, memory_region_init(&f->smram, OBJECT(d), "smram", 4 * GiB); memory_region_set_enabled(&f->smram, true); memory_region_init_alias(&f->low_smram, OBJECT(d), "smram-low", - f->ram_memory, 0xa0000, 0x20000); + f->ram_memory, SMRAM_C_BASE, SMRAM_C_SIZE); memory_region_set_enabled(&f->low_smram, true); - memory_region_add_subregion(&f->smram, 0xa0000, &f->low_smram); + memory_region_add_subregion(&f->smram, SMRAM_C_BASE, &f->low_smram); object_property_add_const_link(qdev_get_machine(), "smram", OBJECT(&f->smram)); From a707466dd62712e4a76e30217a8880e6ab8c7b10 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Fri, 30 Jun 2023 09:37:13 +0200 Subject: [PATCH 26/66] hw/pci-host/i440fx: Have common names for some local variables MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit `PCIHostState` is often referred to as `phb`, own device state usually as `s`. Signed-off-by: Bernhard Beschow Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20230630073720.21297-11-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-host/i440fx.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c index b7c24a4e1d..0b76fe71af 100644 --- a/hw/pci-host/i440fx.c +++ b/hw/pci-host/i440fx.c @@ -205,28 +205,28 @@ static void i440fx_pcihost_get_pci_hole64_end(Object *obj, Visitor *v, static void i440fx_pcihost_initfn(Object *obj) { - PCIHostState *s = PCI_HOST_BRIDGE(obj); + PCIHostState *phb = PCI_HOST_BRIDGE(obj); - memory_region_init_io(&s->conf_mem, obj, &pci_host_conf_le_ops, s, + memory_region_init_io(&phb->conf_mem, obj, &pci_host_conf_le_ops, phb, "pci-conf-idx", 4); - memory_region_init_io(&s->data_mem, obj, &pci_host_data_le_ops, s, + memory_region_init_io(&phb->data_mem, obj, &pci_host_data_le_ops, phb, "pci-conf-data", 4); } static void i440fx_pcihost_realize(DeviceState *dev, Error **errp) { - PCIHostState *s = PCI_HOST_BRIDGE(dev); + PCIHostState *phb = PCI_HOST_BRIDGE(dev); SysBusDevice *sbd = SYS_BUS_DEVICE(dev); - memory_region_add_subregion(s->bus->address_space_io, 0xcf8, &s->conf_mem); + memory_region_add_subregion(phb->bus->address_space_io, 0xcf8, &phb->conf_mem); sysbus_init_ioports(sbd, 0xcf8, 4); - memory_region_add_subregion(s->bus->address_space_io, 0xcfc, &s->data_mem); + memory_region_add_subregion(phb->bus->address_space_io, 0xcfc, &phb->data_mem); sysbus_init_ioports(sbd, 0xcfc, 4); /* register i440fx 0xcf8 port as coalesced pio */ - memory_region_set_flush_coalesced(&s->data_mem); - memory_region_add_coalescing(&s->conf_mem, 0, 4); + memory_region_set_flush_coalesced(&phb->data_mem); + memory_region_add_coalescing(&phb->conf_mem, 0, 4); } static void i440fx_realize(PCIDevice *dev, Error **errp) @@ -248,17 +248,16 @@ PCIBus *i440fx_init(const char *pci_type, MemoryRegion *pci_address_space, MemoryRegion *ram_memory) { + I440FXState *s = I440FX_PCI_HOST_BRIDGE(dev); + PCIHostState *phb = PCI_HOST_BRIDGE(dev); PCIBus *b; PCIDevice *d; - PCIHostState *s; PCII440FXState *f; unsigned i; - I440FXState *i440fx; - s = PCI_HOST_BRIDGE(dev); b = pci_root_bus_new(dev, NULL, pci_address_space, address_space_io, 0, TYPE_PCI_BUS); - s->bus = b; + phb->bus = b; sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); d = pci_create_simple(b, 0, pci_type); @@ -267,8 +266,7 @@ PCIBus *i440fx_init(const char *pci_type, f->pci_address_space = pci_address_space; f->ram_memory = ram_memory; - i440fx = I440FX_PCI_HOST_BRIDGE(dev); - range_set_bounds(&i440fx->pci_hole, below_4g_mem_size, + range_set_bounds(&s->pci_hole, below_4g_mem_size, IO_APIC_DEFAULT_ADDRESS - 1); /* setup pci memory mapping */ From 44df0552a020efd6191714da44edede5ded37ec8 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Fri, 30 Jun 2023 09:37:14 +0200 Subject: [PATCH 27/66] hw/pci-host/i440fx: Move i440fx_realize() into PCII440FXState section MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit i440fx_realize() realizes the PCI device inside the host bridge (PCII440FXState), but is implemented between i440fx_pcihost_realize() and i440fx_init() which deal with the host bridge itself (I440FXState). Since we want to append i440fx_init() to i440fx_pcihost_realize() later let's move i440fx_realize() out of the way. Signed-off-by: Bernhard Beschow Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20230630073720.21297-12-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-host/i440fx.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c index 0b76fe71af..e84fcd50b6 100644 --- a/hw/pci-host/i440fx.c +++ b/hw/pci-host/i440fx.c @@ -65,6 +65,15 @@ struct I440FXState { */ #define I440FX_COREBOOT_RAM_SIZE 0x57 +static void i440fx_realize(PCIDevice *dev, Error **errp) +{ + dev->config[I440FX_SMRAM] = 0x02; + + if (object_property_get_bool(qdev_get_machine(), "iommu", NULL)) { + warn_report("i440fx doesn't support emulated iommu"); + } +} + static void i440fx_update_memory_mappings(PCII440FXState *d) { int i; @@ -229,15 +238,6 @@ static void i440fx_pcihost_realize(DeviceState *dev, Error **errp) memory_region_add_coalescing(&phb->conf_mem, 0, 4); } -static void i440fx_realize(PCIDevice *dev, Error **errp) -{ - dev->config[I440FX_SMRAM] = 0x02; - - if (object_property_get_bool(qdev_get_machine(), "iommu", NULL)) { - warn_report("i440fx doesn't support emulated iommu"); - } -} - PCIBus *i440fx_init(const char *pci_type, DeviceState *dev, MemoryRegion *address_space_mem, From 09f85b7b93a05f1551509b245be99529a9e278f9 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Fri, 30 Jun 2023 09:37:15 +0200 Subject: [PATCH 28/66] hw/pci-host/i440fx: Make MemoryRegion pointers accessible as properties MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The goal is to eliminate i440fx_init() which is a legacy init function. This neccessitates the memory regions to be properties, like in Q35, which will be assigned in board code. Since i440fx needs different PCI devices in Xen mode, and since i440fx shall be self-contained, the PCI device will be created during realization of the host. Thus the pointers need to be moved to the host structure to be usable as properties. Signed-off-by: Bernhard Beschow Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20230630073720.21297-13-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-host/i440fx.c | 42 +++++++++++++++++++++++++----------- include/hw/pci-host/i440fx.h | 3 --- 2 files changed, 30 insertions(+), 15 deletions(-) diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c index e84fcd50b6..b9530fc3a0 100644 --- a/hw/pci-host/i440fx.c +++ b/hw/pci-host/i440fx.c @@ -47,6 +47,10 @@ OBJECT_DECLARE_SIMPLE_TYPE(I440FXState, I440FX_PCI_HOST_BRIDGE) struct I440FXState { PCIHostState parent_obj; + + MemoryRegion *system_memory; + MemoryRegion *pci_address_space; + MemoryRegion *ram_memory; Range pci_hole; uint64_t pci_hole64_size; bool pci_hole64_fix; @@ -214,12 +218,25 @@ static void i440fx_pcihost_get_pci_hole64_end(Object *obj, Visitor *v, static void i440fx_pcihost_initfn(Object *obj) { + I440FXState *s = I440FX_PCI_HOST_BRIDGE(obj); PCIHostState *phb = PCI_HOST_BRIDGE(obj); memory_region_init_io(&phb->conf_mem, obj, &pci_host_conf_le_ops, phb, "pci-conf-idx", 4); memory_region_init_io(&phb->data_mem, obj, &pci_host_data_le_ops, phb, "pci-conf-data", 4); + + object_property_add_link(obj, PCI_HOST_PROP_RAM_MEM, TYPE_MEMORY_REGION, + (Object **) &s->ram_memory, + qdev_prop_allow_set_link_before_realize, 0); + + object_property_add_link(obj, PCI_HOST_PROP_PCI_MEM, TYPE_MEMORY_REGION, + (Object **) &s->pci_address_space, + qdev_prop_allow_set_link_before_realize, 0); + + object_property_add_link(obj, PCI_HOST_PROP_SYSTEM_MEM, TYPE_MEMORY_REGION, + (Object **) &s->system_memory, + qdev_prop_allow_set_link_before_realize, 0); } static void i440fx_pcihost_realize(DeviceState *dev, Error **errp) @@ -255,27 +272,28 @@ PCIBus *i440fx_init(const char *pci_type, PCII440FXState *f; unsigned i; - b = pci_root_bus_new(dev, NULL, pci_address_space, + s->system_memory = address_space_mem; + s->pci_address_space = pci_address_space; + s->ram_memory = ram_memory; + + b = pci_root_bus_new(dev, NULL, s->pci_address_space, address_space_io, 0, TYPE_PCI_BUS); phb->bus = b; sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); d = pci_create_simple(b, 0, pci_type); f = I440FX_PCI_DEVICE(d); - f->system_memory = address_space_mem; - f->pci_address_space = pci_address_space; - f->ram_memory = ram_memory; range_set_bounds(&s->pci_hole, below_4g_mem_size, IO_APIC_DEFAULT_ADDRESS - 1); /* setup pci memory mapping */ - pc_pci_as_mapping_init(f->system_memory, f->pci_address_space); + pc_pci_as_mapping_init(s->system_memory, s->pci_address_space); /* if *disabled* show SMRAM to all CPUs */ memory_region_init_alias(&f->smram_region, OBJECT(d), "smram-region", - f->pci_address_space, SMRAM_C_BASE, SMRAM_C_SIZE); - memory_region_add_subregion_overlap(f->system_memory, SMRAM_C_BASE, + s->pci_address_space, SMRAM_C_BASE, SMRAM_C_SIZE); + memory_region_add_subregion_overlap(s->system_memory, SMRAM_C_BASE, &f->smram_region, 1); memory_region_set_enabled(&f->smram_region, true); @@ -283,17 +301,17 @@ PCIBus *i440fx_init(const char *pci_type, memory_region_init(&f->smram, OBJECT(d), "smram", 4 * GiB); memory_region_set_enabled(&f->smram, true); memory_region_init_alias(&f->low_smram, OBJECT(d), "smram-low", - f->ram_memory, SMRAM_C_BASE, SMRAM_C_SIZE); + s->ram_memory, SMRAM_C_BASE, SMRAM_C_SIZE); memory_region_set_enabled(&f->low_smram, true); memory_region_add_subregion(&f->smram, SMRAM_C_BASE, &f->low_smram); object_property_add_const_link(qdev_get_machine(), "smram", OBJECT(&f->smram)); - init_pam(&f->pam_regions[0], OBJECT(d), f->ram_memory, f->system_memory, - f->pci_address_space, PAM_BIOS_BASE, PAM_BIOS_SIZE); + init_pam(&f->pam_regions[0], OBJECT(d), s->ram_memory, s->system_memory, + s->pci_address_space, PAM_BIOS_BASE, PAM_BIOS_SIZE); for (i = 0; i < ARRAY_SIZE(f->pam_regions) - 1; ++i) { - init_pam(&f->pam_regions[i + 1], OBJECT(d), f->ram_memory, - f->system_memory, f->pci_address_space, + init_pam(&f->pam_regions[i + 1], OBJECT(d), s->ram_memory, + s->system_memory, s->pci_address_space, PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE); } diff --git a/include/hw/pci-host/i440fx.h b/include/hw/pci-host/i440fx.h index bf57216c78..e3a550021e 100644 --- a/include/hw/pci-host/i440fx.h +++ b/include/hw/pci-host/i440fx.h @@ -25,9 +25,6 @@ struct PCII440FXState { PCIDevice parent_obj; /*< public >*/ - MemoryRegion *system_memory; - MemoryRegion *pci_address_space; - MemoryRegion *ram_memory; PAMMemoryRegion pam_regions[PAM_REGIONS_COUNT]; MemoryRegion smram_region; MemoryRegion smram, low_smram; From c84858fd907067dcfb7624083dd44879664cc019 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Fri, 30 Jun 2023 09:37:16 +0200 Subject: [PATCH 29/66] hw/pci-host/i440fx: Add PCI_HOST_PROP_IO_MEM property Introduce the property in anticipation of QOM'ification; Q35 has the same property. Signed-off-by: Bernhard Beschow Message-Id: <20230630073720.21297-14-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-host/i440fx.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c index b9530fc3a0..de14c75e95 100644 --- a/hw/pci-host/i440fx.c +++ b/hw/pci-host/i440fx.c @@ -27,7 +27,6 @@ #include "qemu/range.h" #include "hw/i386/pc.h" #include "hw/pci/pci.h" -#include "hw/pci/pci_bus.h" #include "hw/pci/pci_host.h" #include "hw/pci-host/i440fx.h" #include "hw/qdev-properties.h" @@ -49,6 +48,7 @@ struct I440FXState { PCIHostState parent_obj; MemoryRegion *system_memory; + MemoryRegion *io_memory; MemoryRegion *pci_address_space; MemoryRegion *ram_memory; Range pci_hole; @@ -237,17 +237,22 @@ static void i440fx_pcihost_initfn(Object *obj) object_property_add_link(obj, PCI_HOST_PROP_SYSTEM_MEM, TYPE_MEMORY_REGION, (Object **) &s->system_memory, qdev_prop_allow_set_link_before_realize, 0); + + object_property_add_link(obj, PCI_HOST_PROP_IO_MEM, TYPE_MEMORY_REGION, + (Object **) &s->io_memory, + qdev_prop_allow_set_link_before_realize, 0); } static void i440fx_pcihost_realize(DeviceState *dev, Error **errp) { + I440FXState *s = I440FX_PCI_HOST_BRIDGE(dev); PCIHostState *phb = PCI_HOST_BRIDGE(dev); SysBusDevice *sbd = SYS_BUS_DEVICE(dev); - memory_region_add_subregion(phb->bus->address_space_io, 0xcf8, &phb->conf_mem); + memory_region_add_subregion(s->io_memory, 0xcf8, &phb->conf_mem); sysbus_init_ioports(sbd, 0xcf8, 4); - memory_region_add_subregion(phb->bus->address_space_io, 0xcfc, &phb->data_mem); + memory_region_add_subregion(s->io_memory, 0xcfc, &phb->data_mem); sysbus_init_ioports(sbd, 0xcfc, 4); /* register i440fx 0xcf8 port as coalesced pio */ @@ -273,11 +278,12 @@ PCIBus *i440fx_init(const char *pci_type, unsigned i; s->system_memory = address_space_mem; + s->io_memory = address_space_io; s->pci_address_space = pci_address_space; s->ram_memory = ram_memory; b = pci_root_bus_new(dev, NULL, s->pci_address_space, - address_space_io, 0, TYPE_PCI_BUS); + s->io_memory, 0, TYPE_PCI_BUS); phb->bus = b; sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); From 82feef45f42db607b9439411d4aad7c21bdf3047 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Fri, 30 Jun 2023 09:37:17 +0200 Subject: [PATCH 30/66] hw/pci-host/i440fx: Add PCI_HOST_{ABOVE, BELOW}_4G_MEM_SIZE properties Introduce the properties in anticipation of QOM'ification; Q35 has the same properties. Note that we want to avoid a "ram size" property in the QOM interface since it seems redundant to both properties introduced in this change. Thus the removal of the ram_size parameter. We assume the invariant of both properties to sum up to "ram size" which is already asserted in pc_memory_init(). Under Xen the invariant seems to hold as well, so we now also check it there. Signed-off-by: Bernhard Beschow Message-Id: <20230630073720.21297-15-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc_piix.c | 5 ++++- hw/pci-host/i440fx.c | 12 ++++++++++-- include/hw/pci-host/i440fx.h | 1 - 3 files changed, 14 insertions(+), 4 deletions(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 26e8473a4d..c36783809f 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -224,6 +224,9 @@ static void pc_init1(MachineState *machine, if (!xen_enabled()) { pc_memory_init(pcms, system_memory, rom_memory, hole64_size); } else { + assert(machine->ram_size == x86ms->below_4g_mem_size + + x86ms->above_4g_mem_size); + pc_system_flash_cleanup_unused(pcms); if (machine->kernel_filename != NULL) { /* For xen HVM direct kernel boot, load linux here */ @@ -239,7 +242,7 @@ static void pc_init1(MachineState *machine, pci_bus = i440fx_init(pci_type, i440fx_host, - system_memory, system_io, machine->ram_size, + system_memory, system_io, x86ms->below_4g_mem_size, x86ms->above_4g_mem_size, pci_memory, ram_memory); diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c index de14c75e95..8731740a1b 100644 --- a/hw/pci-host/i440fx.c +++ b/hw/pci-host/i440fx.c @@ -52,6 +52,8 @@ struct I440FXState { MemoryRegion *pci_address_space; MemoryRegion *ram_memory; Range pci_hole; + uint64_t below_4g_mem_size; + uint64_t above_4g_mem_size; uint64_t pci_hole64_size; bool pci_hole64_fix; uint32_t short_root_bus; @@ -264,7 +266,6 @@ PCIBus *i440fx_init(const char *pci_type, DeviceState *dev, MemoryRegion *address_space_mem, MemoryRegion *address_space_io, - ram_addr_t ram_size, ram_addr_t below_4g_mem_size, ram_addr_t above_4g_mem_size, MemoryRegion *pci_address_space, @@ -281,6 +282,8 @@ PCIBus *i440fx_init(const char *pci_type, s->io_memory = address_space_io; s->pci_address_space = pci_address_space; s->ram_memory = ram_memory; + s->below_4g_mem_size = below_4g_mem_size; + s->above_4g_mem_size = above_4g_mem_size; b = pci_root_bus_new(dev, NULL, s->pci_address_space, s->io_memory, 0, TYPE_PCI_BUS); @@ -290,7 +293,7 @@ PCIBus *i440fx_init(const char *pci_type, d = pci_create_simple(b, 0, pci_type); f = I440FX_PCI_DEVICE(d); - range_set_bounds(&s->pci_hole, below_4g_mem_size, + range_set_bounds(&s->pci_hole, s->below_4g_mem_size, IO_APIC_DEFAULT_ADDRESS - 1); /* setup pci memory mapping */ @@ -321,6 +324,7 @@ PCIBus *i440fx_init(const char *pci_type, PAM_EXPAN_BASE + i * PAM_EXPAN_SIZE, PAM_EXPAN_SIZE); } + ram_addr_t ram_size = s->below_4g_mem_size + s->above_4g_mem_size; ram_size = ram_size / 8 / 1024 / 1024; if (ram_size > 255) { ram_size = 255; @@ -380,6 +384,10 @@ static Property i440fx_props[] = { DEFINE_PROP_SIZE(PCI_HOST_PROP_PCI_HOLE64_SIZE, I440FXState, pci_hole64_size, I440FX_PCI_HOST_HOLE64_SIZE_DEFAULT), DEFINE_PROP_UINT32("short_root_bus", I440FXState, short_root_bus, 0), + DEFINE_PROP_SIZE(PCI_HOST_BELOW_4G_MEM_SIZE, I440FXState, + below_4g_mem_size, 0), + DEFINE_PROP_SIZE(PCI_HOST_ABOVE_4G_MEM_SIZE, I440FXState, + above_4g_mem_size, 0), DEFINE_PROP_BOOL("x-pci-hole64-fix", I440FXState, pci_hole64_fix, true), DEFINE_PROP_END_OF_LIST(), }; diff --git a/include/hw/pci-host/i440fx.h b/include/hw/pci-host/i440fx.h index e3a550021e..7e38456ebb 100644 --- a/include/hw/pci-host/i440fx.h +++ b/include/hw/pci-host/i440fx.h @@ -36,7 +36,6 @@ PCIBus *i440fx_init(const char *pci_type, DeviceState *dev, MemoryRegion *address_space_mem, MemoryRegion *address_space_io, - ram_addr_t ram_size, ram_addr_t below_4g_mem_size, ram_addr_t above_4g_mem_size, MemoryRegion *pci_memory, From ff0a8cc4bed6414ab09816b14e006a11140d0d73 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Fri, 30 Jun 2023 09:37:18 +0200 Subject: [PATCH 31/66] hw/pci-host/i440fx: Add I440FX_HOST_PROP_PCI_TYPE property I440FX needs a different PCI device model if the "igd-passthru" property is enabled. The type name is currently passed as a parameter to i440fx_init(). This parameter will be replaced by a property assignment once i440fx_init() gets resolved. Signed-off-by: Bernhard Beschow Message-Id: <20230630073720.21297-16-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/pci-host/i440fx.c | 6 +++++- include/hw/pci-host/i440fx.h | 2 ++ 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c index 8731740a1b..c458987405 100644 --- a/hw/pci-host/i440fx.c +++ b/hw/pci-host/i440fx.c @@ -57,6 +57,8 @@ struct I440FXState { uint64_t pci_hole64_size; bool pci_hole64_fix; uint32_t short_root_bus; + + char *pci_type; }; #define I440FX_PAM 0x59 @@ -284,13 +286,14 @@ PCIBus *i440fx_init(const char *pci_type, s->ram_memory = ram_memory; s->below_4g_mem_size = below_4g_mem_size; s->above_4g_mem_size = above_4g_mem_size; + s->pci_type = (char *)pci_type; b = pci_root_bus_new(dev, NULL, s->pci_address_space, s->io_memory, 0, TYPE_PCI_BUS); phb->bus = b; sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); - d = pci_create_simple(b, 0, pci_type); + d = pci_create_simple(b, 0, s->pci_type); f = I440FX_PCI_DEVICE(d); range_set_bounds(&s->pci_hole, s->below_4g_mem_size, @@ -389,6 +392,7 @@ static Property i440fx_props[] = { DEFINE_PROP_SIZE(PCI_HOST_ABOVE_4G_MEM_SIZE, I440FXState, above_4g_mem_size, 0), DEFINE_PROP_BOOL("x-pci-hole64-fix", I440FXState, pci_hole64_fix, true), + DEFINE_PROP_STRING(I440FX_HOST_PROP_PCI_TYPE, I440FXState, pci_type), DEFINE_PROP_END_OF_LIST(), }; diff --git a/include/hw/pci-host/i440fx.h b/include/hw/pci-host/i440fx.h index 7e38456ebb..2d7bae5a45 100644 --- a/include/hw/pci-host/i440fx.h +++ b/include/hw/pci-host/i440fx.h @@ -15,6 +15,8 @@ #include "hw/pci-host/pam.h" #include "qom/object.h" +#define I440FX_HOST_PROP_PCI_TYPE "pci-type" + #define TYPE_I440FX_PCI_HOST_BRIDGE "i440FX-pcihost" #define TYPE_I440FX_PCI_DEVICE "i440FX" From ce5ac09a7548d90ac414f60bbe87c69a8487de10 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Fri, 30 Jun 2023 09:37:19 +0200 Subject: [PATCH 32/66] hw/pci-host/i440fx: Resolve i440fx_init() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit i440fx_init() is a legacy init function. The previous patches worked towards TYPE_I440FX_PCI_HOST_BRIDGE to be instantiated the QOM way. Do this now by transforming the parameters passed to i440fx_init() into property assignments. Signed-off-by: Bernhard Beschow Message-Id: <20230630073720.21297-17-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Philippe Mathieu-Daudé --- hw/i386/pc_piix.c | 32 +++++++++++++++++++++----------- hw/pci-host/i440fx.c | 33 +++++---------------------------- include/hw/pci-host/i440fx.h | 10 ---------- 3 files changed, 26 insertions(+), 49 deletions(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index c36783809f..62148d7636 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -126,7 +126,7 @@ static void pc_init1(MachineState *machine, MemoryRegion *rom_memory = system_memory; ram_addr_t lowmem; uint64_t hole64_size = 0; - DeviceState *i440fx_host = NULL; + Object *phb = NULL; /* * Calculate ram split, for memory below and above 4G. It's a bit @@ -201,10 +201,9 @@ static void pc_init1(MachineState *machine, pci_memory = g_new(MemoryRegion, 1); memory_region_init(pci_memory, NULL, "pci", UINT64_MAX); rom_memory = pci_memory; - i440fx_host = qdev_new(host_type); - object_property_add_child(OBJECT(machine), "i440fx", - OBJECT(i440fx_host)); - hole64_size = object_property_get_uint(OBJECT(i440fx_host), + phb = OBJECT(qdev_new(host_type)); + object_property_add_child(OBJECT(machine), "i440fx", phb); + hole64_size = object_property_get_uint(phb, PCI_HOST_PROP_PCI_HOLE64_SIZE, &error_abort); } @@ -240,12 +239,23 @@ static void pc_init1(MachineState *machine, PIIX3State *piix3; PCIDevice *pci_dev; - pci_bus = i440fx_init(pci_type, - i440fx_host, - system_memory, system_io, - x86ms->below_4g_mem_size, - x86ms->above_4g_mem_size, - pci_memory, ram_memory); + object_property_set_link(phb, PCI_HOST_PROP_RAM_MEM, + OBJECT(ram_memory), &error_fatal); + object_property_set_link(phb, PCI_HOST_PROP_PCI_MEM, + OBJECT(pci_memory), &error_fatal); + object_property_set_link(phb, PCI_HOST_PROP_SYSTEM_MEM, + OBJECT(system_memory), &error_fatal); + object_property_set_link(phb, PCI_HOST_PROP_IO_MEM, + OBJECT(system_io), &error_fatal); + object_property_set_uint(phb, PCI_HOST_BELOW_4G_MEM_SIZE, + x86ms->below_4g_mem_size, &error_fatal); + object_property_set_uint(phb, PCI_HOST_ABOVE_4G_MEM_SIZE, + x86ms->above_4g_mem_size, &error_fatal); + object_property_set_str(phb, I440FX_HOST_PROP_PCI_TYPE, + pci_type, &error_fatal); + sysbus_realize_and_unref(SYS_BUS_DEVICE(phb), &error_fatal); + + pci_bus = PCI_BUS(qdev_get_child_bus(DEVICE(phb), "pci.0")); pci_bus_map_irqs(pci_bus, xen_enabled() ? xen_pci_slot_get_pirq : pc_pci_slot_get_pirq); diff --git a/hw/pci-host/i440fx.c b/hw/pci-host/i440fx.c index c458987405..62d6287681 100644 --- a/hw/pci-host/i440fx.c +++ b/hw/pci-host/i440fx.c @@ -249,9 +249,14 @@ static void i440fx_pcihost_initfn(Object *obj) static void i440fx_pcihost_realize(DeviceState *dev, Error **errp) { + ERRP_GUARD(); I440FXState *s = I440FX_PCI_HOST_BRIDGE(dev); PCIHostState *phb = PCI_HOST_BRIDGE(dev); SysBusDevice *sbd = SYS_BUS_DEVICE(dev); + PCIBus *b; + PCIDevice *d; + PCII440FXState *f; + unsigned i; memory_region_add_subregion(s->io_memory, 0xcf8, &phb->conf_mem); sysbus_init_ioports(sbd, 0xcf8, 4); @@ -262,36 +267,10 @@ static void i440fx_pcihost_realize(DeviceState *dev, Error **errp) /* register i440fx 0xcf8 port as coalesced pio */ memory_region_set_flush_coalesced(&phb->data_mem); memory_region_add_coalescing(&phb->conf_mem, 0, 4); -} - -PCIBus *i440fx_init(const char *pci_type, - DeviceState *dev, - MemoryRegion *address_space_mem, - MemoryRegion *address_space_io, - ram_addr_t below_4g_mem_size, - ram_addr_t above_4g_mem_size, - MemoryRegion *pci_address_space, - MemoryRegion *ram_memory) -{ - I440FXState *s = I440FX_PCI_HOST_BRIDGE(dev); - PCIHostState *phb = PCI_HOST_BRIDGE(dev); - PCIBus *b; - PCIDevice *d; - PCII440FXState *f; - unsigned i; - - s->system_memory = address_space_mem; - s->io_memory = address_space_io; - s->pci_address_space = pci_address_space; - s->ram_memory = ram_memory; - s->below_4g_mem_size = below_4g_mem_size; - s->above_4g_mem_size = above_4g_mem_size; - s->pci_type = (char *)pci_type; b = pci_root_bus_new(dev, NULL, s->pci_address_space, s->io_memory, 0, TYPE_PCI_BUS); phb->bus = b; - sysbus_realize_and_unref(SYS_BUS_DEVICE(dev), &error_fatal); d = pci_create_simple(b, 0, s->pci_type); f = I440FX_PCI_DEVICE(d); @@ -335,8 +314,6 @@ PCIBus *i440fx_init(const char *pci_type, d->config[I440FX_COREBOOT_RAM_SIZE] = ram_size; i440fx_update_memory_mappings(f); - - return b; } static void i440fx_class_init(ObjectClass *klass, void *data) diff --git a/include/hw/pci-host/i440fx.h b/include/hw/pci-host/i440fx.h index 2d7bae5a45..c988f70890 100644 --- a/include/hw/pci-host/i440fx.h +++ b/include/hw/pci-host/i440fx.h @@ -34,14 +34,4 @@ struct PCII440FXState { #define TYPE_IGD_PASSTHROUGH_I440FX_PCI_DEVICE "igd-passthrough-i440FX" -PCIBus *i440fx_init(const char *pci_type, - DeviceState *dev, - MemoryRegion *address_space_mem, - MemoryRegion *address_space_io, - ram_addr_t below_4g_mem_size, - ram_addr_t above_4g_mem_size, - MemoryRegion *pci_memory, - MemoryRegion *ram_memory); - - #endif From c589f7cf0860534dfd21023dec2ec322688a5a03 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Fri, 30 Jun 2023 09:37:20 +0200 Subject: [PATCH 33/66] hw/i386/pc_piix: Move i440fx' realize near its qdev_new() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I440FX realization is currently mixed with PIIX3 creation. Furthermore, it is common practice to only set properties between a device's qdev_new() and qdev_realize(). Clean up to resolve both issues. Since I440FX spawns a PCI bus let's also move the pci_bus initialization there. Note that when running `qemu-system-x86_64 -M pc -S` before and after this patch, `info mtree` in the QEMU console doesn't show any differences except that the ordering is different. Signed-off-by: Bernhard Beschow Reviewed-by: Philippe Mathieu-Daudé Message-Id: <20230630073720.21297-18-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc_piix.c | 51 ++++++++++++++++++++++++----------------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index 62148d7636..b18443d3df 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -114,7 +114,7 @@ static void pc_init1(MachineState *machine, X86MachineState *x86ms = X86_MACHINE(machine); MemoryRegion *system_memory = get_system_memory(); MemoryRegion *system_io = get_system_io(); - PCIBus *pci_bus; + PCIBus *pci_bus = NULL; ISABus *isa_bus; int piix3_devfn = -1; qemu_irq smi_irq; @@ -126,7 +126,6 @@ static void pc_init1(MachineState *machine, MemoryRegion *rom_memory = system_memory; ram_addr_t lowmem; uint64_t hole64_size = 0; - Object *phb = NULL; /* * Calculate ram split, for memory below and above 4G. It's a bit @@ -198,11 +197,36 @@ static void pc_init1(MachineState *machine, } if (pcmc->pci_enabled) { + Object *phb; + pci_memory = g_new(MemoryRegion, 1); memory_region_init(pci_memory, NULL, "pci", UINT64_MAX); rom_memory = pci_memory; + phb = OBJECT(qdev_new(host_type)); object_property_add_child(OBJECT(machine), "i440fx", phb); + object_property_set_link(phb, PCI_HOST_PROP_RAM_MEM, + OBJECT(ram_memory), &error_fatal); + object_property_set_link(phb, PCI_HOST_PROP_PCI_MEM, + OBJECT(pci_memory), &error_fatal); + object_property_set_link(phb, PCI_HOST_PROP_SYSTEM_MEM, + OBJECT(system_memory), &error_fatal); + object_property_set_link(phb, PCI_HOST_PROP_IO_MEM, + OBJECT(system_io), &error_fatal); + object_property_set_uint(phb, PCI_HOST_BELOW_4G_MEM_SIZE, + x86ms->below_4g_mem_size, &error_fatal); + object_property_set_uint(phb, PCI_HOST_ABOVE_4G_MEM_SIZE, + x86ms->above_4g_mem_size, &error_fatal); + object_property_set_str(phb, I440FX_HOST_PROP_PCI_TYPE, pci_type, + &error_fatal); + sysbus_realize_and_unref(SYS_BUS_DEVICE(phb), &error_fatal); + + pci_bus = PCI_BUS(qdev_get_child_bus(DEVICE(phb), "pci.0")); + pci_bus_map_irqs(pci_bus, + xen_enabled() ? xen_pci_slot_get_pirq + : pc_pci_slot_get_pirq); + pcms->bus = pci_bus; + hole64_size = object_property_get_uint(phb, PCI_HOST_PROP_PCI_HOLE64_SIZE, &error_abort); @@ -239,28 +263,6 @@ static void pc_init1(MachineState *machine, PIIX3State *piix3; PCIDevice *pci_dev; - object_property_set_link(phb, PCI_HOST_PROP_RAM_MEM, - OBJECT(ram_memory), &error_fatal); - object_property_set_link(phb, PCI_HOST_PROP_PCI_MEM, - OBJECT(pci_memory), &error_fatal); - object_property_set_link(phb, PCI_HOST_PROP_SYSTEM_MEM, - OBJECT(system_memory), &error_fatal); - object_property_set_link(phb, PCI_HOST_PROP_IO_MEM, - OBJECT(system_io), &error_fatal); - object_property_set_uint(phb, PCI_HOST_BELOW_4G_MEM_SIZE, - x86ms->below_4g_mem_size, &error_fatal); - object_property_set_uint(phb, PCI_HOST_ABOVE_4G_MEM_SIZE, - x86ms->above_4g_mem_size, &error_fatal); - object_property_set_str(phb, I440FX_HOST_PROP_PCI_TYPE, - pci_type, &error_fatal); - sysbus_realize_and_unref(SYS_BUS_DEVICE(phb), &error_fatal); - - pci_bus = PCI_BUS(qdev_get_child_bus(DEVICE(phb), "pci.0")); - pci_bus_map_irqs(pci_bus, - xen_enabled() ? xen_pci_slot_get_pirq - : pc_pci_slot_get_pirq); - pcms->bus = pci_bus; - pci_dev = pci_create_simple_multifunction(pci_bus, -1, true, TYPE_PIIX3_DEVICE); @@ -285,7 +287,6 @@ static void pc_init1(MachineState *machine, rtc_state = ISA_DEVICE(object_resolve_path_component(OBJECT(pci_dev), "rtc")); } else { - pci_bus = NULL; isa_bus = isa_bus_new(NULL, system_memory, system_io, &error_abort); From e052944a966ca4eb0131121646fa0ef047c25e52 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 4 Mar 2023 12:40:42 +0100 Subject: [PATCH 34/66] hw/pci/pci: Remove multifunction parameter from pci_create_simple_multifunction() There is also pci_create_simple() which creates non-multifunction PCI devices. Accordingly the parameter is always set to true when a multi function PCI device is to be created. The reason for the parameter's existence seems to be that it is used in the internal PCI code as well which is the only location where it gets set to false. This one usage can be replaced by trivial code. Remove this redundant, error-prone parameter. Signed-off-by: Bernhard Beschow Message-Id: <20230304114043.121024-5-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc_piix.c | 3 +-- hw/i386/pc_q35.c | 4 ++-- hw/mips/boston.c | 3 +-- hw/mips/fuloong2e.c | 2 +- hw/mips/malta.c | 2 +- hw/pci/pci.c | 7 ++++--- hw/ppc/pegasos2.c | 2 +- include/hw/pci/pci.h | 1 - 8 files changed, 11 insertions(+), 13 deletions(-) diff --git a/hw/i386/pc_piix.c b/hw/i386/pc_piix.c index b18443d3df..ac72e8f5be 100644 --- a/hw/i386/pc_piix.c +++ b/hw/i386/pc_piix.c @@ -263,8 +263,7 @@ static void pc_init1(MachineState *machine, PIIX3State *piix3; PCIDevice *pci_dev; - pci_dev = pci_create_simple_multifunction(pci_bus, -1, true, - TYPE_PIIX3_DEVICE); + pci_dev = pci_create_simple_multifunction(pci_bus, -1, TYPE_PIIX3_DEVICE); if (xen_enabled()) { pci_device_set_intx_routing_notifier( diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index 02dd274276..fdab1f6a56 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -304,7 +304,7 @@ static void pc_q35_init(MachineState *machine) ahci = pci_create_simple_multifunction(host_bus, PCI_DEVFN(ICH9_SATA1_DEV, ICH9_SATA1_FUNC), - true, "ich9-ahci"); + "ich9-ahci"); idebus[0] = qdev_get_child_bus(&ahci->qdev, "ide.0"); idebus[1] = qdev_get_child_bus(&ahci->qdev, "ide.1"); g_assert(MAX_SATA_PORTS == ahci_get_num_ports(ahci)); @@ -326,7 +326,7 @@ static void pc_q35_init(MachineState *machine) smb = pci_create_simple_multifunction(host_bus, PCI_DEVFN(ICH9_SMB_DEV, ICH9_SMB_FUNC), - true, TYPE_ICH9_SMB_DEVICE); + TYPE_ICH9_SMB_DEVICE); pcms->smbus = I2C_BUS(qdev_get_child_bus(DEVICE(smb), "i2c")); smbus_eeprom_init(pcms->smbus, 8, NULL, 0); diff --git a/hw/mips/boston.c b/hw/mips/boston.c index 21ad844519..4e11ff6cd6 100644 --- a/hw/mips/boston.c +++ b/hw/mips/boston.c @@ -770,8 +770,7 @@ static void boston_mach_init(MachineState *machine) boston_lcd_event, NULL, s, NULL, true); ahci = pci_create_simple_multifunction(&PCI_BRIDGE(&pcie2->root)->sec_bus, - PCI_DEVFN(0, 0), - true, TYPE_ICH9_AHCI); + PCI_DEVFN(0, 0), TYPE_ICH9_AHCI); g_assert(ARRAY_SIZE(hd) == ahci_get_num_ports(ahci)); ide_drive_get(hd, ahci_get_num_ports(ahci)); ahci_ide_create_devs(ahci, hd); diff --git a/hw/mips/fuloong2e.c b/hw/mips/fuloong2e.c index cfc8ca6ae4..c827f615f3 100644 --- a/hw/mips/fuloong2e.c +++ b/hw/mips/fuloong2e.c @@ -297,7 +297,7 @@ static void mips_fuloong2e_init(MachineState *machine) /* South bridge -> IP5 */ pci_dev = pci_create_simple_multifunction(pci_bus, PCI_DEVFN(FULOONG2E_VIA_SLOT, 0), - true, TYPE_VT82C686B_ISA); + TYPE_VT82C686B_ISA); object_property_add_alias(OBJECT(machine), "rtc-time", object_resolve_path_component(OBJECT(pci_dev), "rtc"), diff --git a/hw/mips/malta.c b/hw/mips/malta.c index 47cb49f691..f9618fa5f5 100644 --- a/hw/mips/malta.c +++ b/hw/mips/malta.c @@ -1251,7 +1251,7 @@ void mips_malta_init(MachineState *machine) pci_bus_map_irqs(pci_bus, malta_pci_slot_get_pirq); /* Southbridge */ - piix4 = pci_create_simple_multifunction(pci_bus, PIIX4_PCI_DEVFN, true, + piix4 = pci_create_simple_multifunction(pci_bus, PIIX4_PCI_DEVFN, TYPE_PIIX4_PCI_DEVICE); isa_bus = ISA_BUS(qdev_get_child_bus(DEVICE(piix4), "isa.0")); diff --git a/hw/pci/pci.c b/hw/pci/pci.c index e2eb4c3b4a..5152989c10 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -2186,17 +2186,18 @@ bool pci_realize_and_unref(PCIDevice *dev, PCIBus *bus, Error **errp) } PCIDevice *pci_create_simple_multifunction(PCIBus *bus, int devfn, - bool multifunction, const char *name) { - PCIDevice *dev = pci_new_multifunction(devfn, multifunction, name); + PCIDevice *dev = pci_new_multifunction(devfn, true, name); pci_realize_and_unref(dev, bus, &error_fatal); return dev; } PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name) { - return pci_create_simple_multifunction(bus, devfn, false, name); + PCIDevice *dev = pci_new(devfn, name); + pci_realize_and_unref(dev, bus, &error_fatal); + return dev; } static uint8_t pci_find_space(PCIDevice *pdev, uint8_t size) diff --git a/hw/ppc/pegasos2.c b/hw/ppc/pegasos2.c index 9c9944188b..4447bbe8ec 100644 --- a/hw/ppc/pegasos2.c +++ b/hw/ppc/pegasos2.c @@ -180,7 +180,7 @@ static void pegasos2_init(MachineState *machine) /* VIA VT8231 South Bridge (multifunction PCI device) */ via = OBJECT(pci_create_simple_multifunction(pci_bus, PCI_DEVFN(12, 0), - true, TYPE_VT8231_ISA)); + TYPE_VT8231_ISA)); for (i = 0; i < PCI_NUM_PINS; i++) { pm->via_pirq[i] = qdev_get_gpio_in_named(DEVICE(via), "pirq", i); } diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index e6d0574a29..18c35b64db 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -583,7 +583,6 @@ PCIDevice *pci_new(int devfn, const char *name); bool pci_realize_and_unref(PCIDevice *dev, PCIBus *bus, Error **errp); PCIDevice *pci_create_simple_multifunction(PCIBus *bus, int devfn, - bool multifunction, const char *name); PCIDevice *pci_create_simple(PCIBus *bus, int devfn, const char *name); From c925f40a29906355b516bad4c6ef80d30cf971b6 Mon Sep 17 00:00:00 2001 From: Bernhard Beschow Date: Sat, 4 Mar 2023 12:40:43 +0100 Subject: [PATCH 35/66] hw/pci/pci: Remove multifunction parameter from pci_new_multifunction() There is also pci_new() which creates non-multifunction PCI devices. Accordingly the parameter is always set to true when a multi function PCI device is to be created. The reason for the parameter's existence seems to be that it is used in the internal PCI code as well which is the only location where it gets set to false. This one usage can be resolved by factoring out an internal helper function. Remove this redundant, error-prone parameter. Signed-off-by: Bernhard Beschow Message-Id: <20230304114043.121024-6-shentey@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/i386/pc_q35.c | 6 +++--- hw/pci-host/sabre.c | 6 ++---- hw/pci/pci.c | 13 +++++++++---- hw/sparc64/sun4u.c | 5 ++--- include/hw/pci/pci.h | 3 +-- 5 files changed, 17 insertions(+), 16 deletions(-) diff --git a/hw/i386/pc_q35.c b/hw/i386/pc_q35.c index fdab1f6a56..dc27a9e223 100644 --- a/hw/i386/pc_q35.c +++ b/hw/i386/pc_q35.c @@ -100,12 +100,12 @@ static int ehci_create_ich9_with_companions(PCIBus *bus, int slot) return -1; } - ehci = pci_new_multifunction(PCI_DEVFN(slot, 7), true, name); + ehci = pci_new_multifunction(PCI_DEVFN(slot, 7), name); pci_realize_and_unref(ehci, bus, &error_fatal); usbbus = QLIST_FIRST(&ehci->qdev.child_bus); for (i = 0; i < 3; i++) { - uhci = pci_new_multifunction(PCI_DEVFN(slot, comp[i].func), true, + uhci = pci_new_multifunction(PCI_DEVFN(slot, comp[i].func), comp[i].name); qdev_prop_set_string(&uhci->qdev, "masterbus", usbbus->name); qdev_prop_set_uint32(&uhci->qdev, "firstport", comp[i].port); @@ -239,7 +239,7 @@ static void pc_q35_init(MachineState *machine) pcms->bus = host_bus; /* create ISA bus */ - lpc = pci_new_multifunction(PCI_DEVFN(ICH9_LPC_DEV, ICH9_LPC_FUNC), true, + lpc = pci_new_multifunction(PCI_DEVFN(ICH9_LPC_DEV, ICH9_LPC_FUNC), TYPE_ICH9_LPC_DEVICE); qdev_prop_set_bit(DEVICE(lpc), "smm-enabled", x86_machine_is_smm_enabled(x86ms)); diff --git a/hw/pci-host/sabre.c b/hw/pci-host/sabre.c index 949ecc21f2..dcb2e230b6 100644 --- a/hw/pci-host/sabre.c +++ b/hw/pci-host/sabre.c @@ -387,14 +387,12 @@ static void sabre_realize(DeviceState *dev, Error **errp) pci_setup_iommu(phb->bus, sabre_pci_dma_iommu, s->iommu); /* APB secondary busses */ - pci_dev = pci_new_multifunction(PCI_DEVFN(1, 0), true, - TYPE_SIMBA_PCI_BRIDGE); + pci_dev = pci_new_multifunction(PCI_DEVFN(1, 0), TYPE_SIMBA_PCI_BRIDGE); s->bridgeB = PCI_BRIDGE(pci_dev); pci_bridge_map_irq(s->bridgeB, "pciB", pci_simbaB_map_irq); pci_realize_and_unref(pci_dev, phb->bus, &error_fatal); - pci_dev = pci_new_multifunction(PCI_DEVFN(1, 1), true, - TYPE_SIMBA_PCI_BRIDGE); + pci_dev = pci_new_multifunction(PCI_DEVFN(1, 1), TYPE_SIMBA_PCI_BRIDGE); s->bridgeA = PCI_BRIDGE(pci_dev); pci_bridge_map_irq(s->bridgeA, "pciA", pci_simbaA_map_irq); pci_realize_and_unref(pci_dev, phb->bus, &error_fatal); diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 5152989c10..fb17138f7d 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -2164,8 +2164,8 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp) pci_dev->msi_trigger = pci_msi_trigger; } -PCIDevice *pci_new_multifunction(int devfn, bool multifunction, - const char *name) +static PCIDevice *pci_new_internal(int devfn, bool multifunction, + const char *name) { DeviceState *dev; @@ -2175,9 +2175,14 @@ PCIDevice *pci_new_multifunction(int devfn, bool multifunction, return PCI_DEVICE(dev); } +PCIDevice *pci_new_multifunction(int devfn, const char *name) +{ + return pci_new_internal(devfn, true, name); +} + PCIDevice *pci_new(int devfn, const char *name) { - return pci_new_multifunction(devfn, false, name); + return pci_new_internal(devfn, false, name); } bool pci_realize_and_unref(PCIDevice *dev, PCIBus *bus, Error **errp) @@ -2188,7 +2193,7 @@ bool pci_realize_and_unref(PCIDevice *dev, PCIBus *bus, Error **errp) PCIDevice *pci_create_simple_multifunction(PCIBus *bus, int devfn, const char *name) { - PCIDevice *dev = pci_new_multifunction(devfn, true, name); + PCIDevice *dev = pci_new_multifunction(devfn, name); pci_realize_and_unref(dev, bus, &error_fatal); return dev; } diff --git a/hw/sparc64/sun4u.c b/hw/sparc64/sun4u.c index 29e9b6cc26..d908a38f73 100644 --- a/hw/sparc64/sun4u.c +++ b/hw/sparc64/sun4u.c @@ -612,7 +612,7 @@ static void sun4uv_init(MemoryRegion *address_space_mem, pci_bus_set_slot_reserved_mask(pci_busA, 0xfffffff1); pci_bus_set_slot_reserved_mask(pci_busB, 0xfffffff0); - ebus = pci_new_multifunction(PCI_DEVFN(1, 0), true, TYPE_EBUS); + ebus = pci_new_multifunction(PCI_DEVFN(1, 0), TYPE_EBUS); qdev_prop_set_uint64(DEVICE(ebus), "console-serial-base", hwdef->console_serial_base); pci_realize_and_unref(ebus, pci_busA, &error_fatal); @@ -648,8 +648,7 @@ static void sun4uv_init(MemoryRegion *address_space_mem, if (!nd->model || strcmp(nd->model, mc->default_nic) == 0) { if (!onboard_nic) { - pci_dev = pci_new_multifunction(PCI_DEVFN(1, 1), - true, mc->default_nic); + pci_dev = pci_new_multifunction(PCI_DEVFN(1, 1), mc->default_nic); bus = pci_busA; memcpy(&macaddr, &nd->macaddr.a, sizeof(MACAddr)); onboard_nic = true; diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 18c35b64db..ab2bd65a3a 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -577,8 +577,7 @@ pci_set_quad_by_mask(uint8_t *config, uint64_t mask, uint64_t reg) pci_set_quad(config, (~mask & val) | (mask & rval)); } -PCIDevice *pci_new_multifunction(int devfn, bool multifunction, - const char *name); +PCIDevice *pci_new_multifunction(int devfn, const char *name); PCIDevice *pci_new(int devfn, const char *name); bool pci_realize_and_unref(PCIDevice *dev, PCIBus *bus, Error **errp); From 08f632848008544b7e1ab8a8aa50df02c39a26a1 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Tue, 11 Apr 2023 18:04:08 +0900 Subject: [PATCH 36/66] pcie: Release references of virtual functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pci_new() automatically retains a reference to a virtual function when registering it so we need to release the reference when unregistering. Fixes: 7c0fa8dff8 ("pcie: Add support for Single Root I/O Virtualization (SR/IOV)") Signed-off-by: Akihiko Odaki Message-Id: <20230411090408.48366-1-akihiko.odaki@daynix.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Cédric Le Goater --- hw/pci/pcie_sriov.c | 1 + 1 file changed, 1 insertion(+) diff --git a/hw/pci/pcie_sriov.c b/hw/pci/pcie_sriov.c index aa5a757b11..76a3b6917e 100644 --- a/hw/pci/pcie_sriov.c +++ b/hw/pci/pcie_sriov.c @@ -211,6 +211,7 @@ static void unregister_vfs(PCIDevice *dev) error_free(local_err); } object_unparent(OBJECT(vf)); + object_unref(OBJECT(vf)); } g_free(dev->exp.sriov_pf.vf); dev->exp.sriov_pf.vf = NULL; From b479bc3c9d5e473553137641fd31069c251f0d6e Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Tue, 4 Jul 2023 11:34:33 +0800 Subject: [PATCH 37/66] vdpa: Return -EIO if device ack is VIRTIO_NET_ERR in _load_mac() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to VirtIO standard, "The class, command and command-specific-data are set by the driver, and the device sets the ack byte. There is little it can do except issue a diagnostic if ack is not VIRTIO_NET_OK." Therefore, QEMU should stop sending the queued SVQ commands and cancel the device startup if the device's ack is not VIRTIO_NET_OK. Yet the problem is that, vhost_vdpa_net_load_mac() returns 1 based on `*s->status != VIRTIO_NET_OK` when the device's ack is VIRTIO_NET_ERR. As a result, net->nc->info->load() also returns 1, this makes vhost_net_start_one() incorrectly assume the device state is successfully loaded by vhost_vdpa_net_load() and return 0, instead of goto `fail` label to cancel the device startup, as vhost_net_start_one() only cancels the device startup when net->nc->info->load() returns a negative value. This patch fixes this problem by returning -EIO when the device's ack is not VIRTIO_NET_OK. Fixes: f73c0c43ac ("vdpa: extract vhost_vdpa_net_load_mac from vhost_vdpa_net_load") Signed-off-by: Hawkins Jiawei Acked-by: Jason Wang Acked-by: Eugenio Pérez Message-Id: Tested-by: Lei Yang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index c0e93ce568..345a301d62 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -652,8 +652,9 @@ static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n) if (unlikely(dev_written < 0)) { return dev_written; } - - return *s->status != VIRTIO_NET_OK; + if (*s->status != VIRTIO_NET_OK) { + return -EIO; + } } return 0; From f45fd95ec9e8104f6af801c734375029dda0f542 Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Tue, 4 Jul 2023 11:34:34 +0800 Subject: [PATCH 38/66] vdpa: Return -EIO if device ack is VIRTIO_NET_ERR in _load_mq() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to VirtIO standard, "The class, command and command-specific-data are set by the driver, and the device sets the ack byte. There is little it can do except issue a diagnostic if ack is not VIRTIO_NET_OK." Therefore, QEMU should stop sending the queued SVQ commands and cancel the device startup if the device's ack is not VIRTIO_NET_OK. Yet the problem is that, vhost_vdpa_net_load_mq() returns 1 based on `*s->status != VIRTIO_NET_OK` when the device's ack is VIRTIO_NET_ERR. As a result, net->nc->info->load() also returns 1, this makes vhost_net_start_one() incorrectly assume the device state is successfully loaded by vhost_vdpa_net_load() and return 0, instead of goto `fail` label to cancel the device startup, as vhost_net_start_one() only cancels the device startup when net->nc->info->load() returns a negative value. This patch fixes this problem by returning -EIO when the device's ack is not VIRTIO_NET_OK. Fixes: f64c7cda69 ("vdpa: Add vhost_vdpa_net_load_mq") Signed-off-by: Hawkins Jiawei Acked-by: Jason Wang Acked-by: Eugenio Pérez Message-Id: Tested-by: Lei Yang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 345a301d62..8477ed2579 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -677,8 +677,11 @@ static int vhost_vdpa_net_load_mq(VhostVDPAState *s, if (unlikely(dev_written < 0)) { return dev_written; } + if (*s->status != VIRTIO_NET_OK) { + return -EIO; + } - return *s->status != VIRTIO_NET_OK; + return 0; } static int vhost_vdpa_net_load_offloads(VhostVDPAState *s, From 6f34807116ffef7c449a656dbe2091d4f4da89c8 Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Tue, 4 Jul 2023 11:34:35 +0800 Subject: [PATCH 39/66] vdpa: Return -EIO if device ack is VIRTIO_NET_ERR in _load_offloads() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to VirtIO standard, "The class, command and command-specific-data are set by the driver, and the device sets the ack byte. There is little it can do except issue a diagnostic if ack is not VIRTIO_NET_OK." Therefore, QEMU should stop sending the queued SVQ commands and cancel the device startup if the device's ack is not VIRTIO_NET_OK. Yet the problem is that, vhost_vdpa_net_load_offloads() returns 1 based on `*s->status != VIRTIO_NET_OK` when the device's ack is VIRTIO_NET_ERR. As a result, net->nc->info->load() also returns 1, this makes vhost_net_start_one() incorrectly assume the device state is successfully loaded by vhost_vdpa_net_load() and return 0, instead of goto `fail` label to cancel the device startup, as vhost_net_start_one() only cancels the device startup when net->nc->info->load() returns a negative value. This patch fixes this problem by returning -EIO when the device's ack is not VIRTIO_NET_OK. Fixes: 0b58d3686a ("vdpa: Add vhost_vdpa_net_load_offloads()") Signed-off-by: Hawkins Jiawei Acked-by: Jason Wang Acked-by: Eugenio Pérez Message-Id: Tested-by: Lei Yang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 8477ed2579..679ef4bed0 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -718,8 +718,11 @@ static int vhost_vdpa_net_load_offloads(VhostVDPAState *s, if (unlikely(dev_written < 0)) { return dev_written; } + if (*s->status != VIRTIO_NET_OK) { + return -EIO; + } - return *s->status != VIRTIO_NET_OK; + return 0; } static int vhost_vdpa_net_load(NetClientState *nc) From 77812aa7b1fdf8f547c35a7f9a4eb1cbf3a073db Mon Sep 17 00:00:00 2001 From: Laurent Vivier Date: Tue, 4 Jul 2023 09:19:31 +0200 Subject: [PATCH 40/66] vhost-vdpa: mute unaligned memory error report With TPM CRM device, vhost-vdpa reports an error when it tries to register a listener for a non aligned memory region: qemu-system-x86_64: vhost_vdpa_listener_region_add received unaligned region qemu-system-x86_64: vhost_vdpa_listener_region_del received unaligned region This error can be confusing for the user whereas we only need to skip the region (as it's already done after the error_report()) Rather than introducing a special case for TPM CRB memory section to not display the message in this case, simply replace the error_report() by a trace function (with more information, like the memory region name). Signed-off-by: Laurent Vivier Message-Id: <20230704071931.575888-2-lvivier@redhat.com> Reviewed-by: David Hildenbrand Acked-by: Jason Wang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/trace-events | 2 ++ hw/virtio/vhost-vdpa.c | 8 ++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index 6265231683..4e39ed8a95 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -34,7 +34,9 @@ vhost_vdpa_dma_map(void *vdpa, int fd, uint32_t msg_type, uint32_t asid, uint64_ vhost_vdpa_dma_unmap(void *vdpa, int fd, uint32_t msg_type, uint32_t asid, uint64_t iova, uint64_t size, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" asid: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" type: %"PRIu8 vhost_vdpa_listener_begin_batch(void *v, int fd, uint32_t msg_type, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8 vhost_vdpa_listener_commit(void *v, int fd, uint32_t msg_type, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8 +vhost_vdpa_listener_region_add_unaligned(void *v, const char *name, uint64_t offset_as, uint64_t offset_page) "vdpa: %p region %s offset_within_address_space %"PRIu64" offset_within_region %"PRIu64 vhost_vdpa_listener_region_add(void *vdpa, uint64_t iova, uint64_t llend, void *vaddr, bool readonly) "vdpa: %p iova 0x%"PRIx64" llend 0x%"PRIx64" vaddr: %p read-only: %d" +vhost_vdpa_listener_region_del_unaligned(void *v, const char *name, uint64_t offset_as, uint64_t offset_page) "vdpa: %p region %s offset_within_address_space %"PRIu64" offset_within_region %"PRIu64 vhost_vdpa_listener_region_del(void *vdpa, uint64_t iova, uint64_t llend) "vdpa: %p iova 0x%"PRIx64" llend 0x%"PRIx64 vhost_vdpa_add_status(void *dev, uint8_t status) "dev: %p status: 0x%"PRIx8 vhost_vdpa_init(void *dev, void *vdpa) "dev: %p vdpa: %p" diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 6246e5b9bf..42f2a4bae9 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -323,7 +323,9 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != (section->offset_within_region & ~TARGET_PAGE_MASK))) { - error_report("%s received unaligned region", __func__); + trace_vhost_vdpa_listener_region_add_unaligned(v, section->mr->name, + section->offset_within_address_space & ~TARGET_PAGE_MASK, + section->offset_within_region & ~TARGET_PAGE_MASK); return; } @@ -405,7 +407,9 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, if (unlikely((section->offset_within_address_space & ~TARGET_PAGE_MASK) != (section->offset_within_region & ~TARGET_PAGE_MASK))) { - error_report("%s received unaligned region", __func__); + trace_vhost_vdpa_listener_region_del_unaligned(v, section->mr->name, + section->offset_within_address_space & ~TARGET_PAGE_MASK, + section->offset_within_region & ~TARGET_PAGE_MASK); return; } From 8d60105a5973320ffd72a48f208cfd2b041d6c0b Mon Sep 17 00:00:00 2001 From: Ani Sinha Date: Wed, 5 Jul 2023 17:29:19 +0530 Subject: [PATCH 41/66] tests/acpi: allow changes in DSDT.noacpihp table blob We are going to fix bio-tables-test in the next patch and hence need to make sure the acpi tests continue to pass. Signed-off-by: Ani Sinha Acked-by: Igor Mammedov Message-Id: <20230705115925.5339-2-anisinha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/qtest/bios-tables-test-allowed-diff.h | 1 + 1 file changed, 1 insertion(+) diff --git a/tests/qtest/bios-tables-test-allowed-diff.h b/tests/qtest/bios-tables-test-allowed-diff.h index dfb8523c8b..31df9c6187 100644 --- a/tests/qtest/bios-tables-test-allowed-diff.h +++ b/tests/qtest/bios-tables-test-allowed-diff.h @@ -1 +1,2 @@ /* List of comma-separated changed AML files to ignore */ +"tests/data/acpi/q35/DSDT.noacpihp", From 7b0ba7b1c8b362a83bfb30170dae6035a71192c2 Mon Sep 17 00:00:00 2001 From: Ani Sinha Date: Wed, 5 Jul 2023 17:29:20 +0530 Subject: [PATCH 42/66] tests/acpi/bios-tables-test: use the correct slot on the pcie-root-port PCIE ports only have one slot, slot 0. Hence, non-zero slots are not available for PCIE devices on PCIE root ports. Fix test_acpi_q35_tcg_no_acpi_hotplug() so that the test does not use them. Signed-off-by: Ani Sinha Reviewed-by: Igor Mammedov Message-Id: <20230705115925.5339-3-anisinha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/qtest/bios-tables-test.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c index ed1c69cf01..47ba20b957 100644 --- a/tests/qtest/bios-tables-test.c +++ b/tests/qtest/bios-tables-test.c @@ -1020,9 +1020,9 @@ static void test_acpi_q35_tcg_no_acpi_hotplug(void) " -device pci-testdev,bus=nohprp,acpi-index=501" " -device pcie-root-port,id=nohprpint,port=0x0,chassis=3,hotplug=off," "multifunction=on,addr=8.0" - " -device pci-testdev,bus=nohprpint,acpi-index=601,addr=8.1" + " -device pci-testdev,bus=nohprpint,acpi-index=601,addr=0.1" " -device pcie-root-port,id=hprp2,port=0x0,chassis=4,bus=nohprpint," - "addr=9.0" + "addr=0.2" " -device pci-testdev,bus=hprp2,acpi-index=602" , &data); free_test_data(&data); From bac4711b07ef92be45628ae7b399a09d850ee596 Mon Sep 17 00:00:00 2001 From: Ani Sinha Date: Wed, 5 Jul 2023 17:29:21 +0530 Subject: [PATCH 43/66] tests/acpi/bios-tables-test: update acpi blob q35/DSDT.noacpihp Some fixes were committed in bios-tables-test in the previous commit. Update the acpi blob and clear bios-tables-test-allowed-diff.h so that the test continues to pass with the changes in the bios-tables-test. Following is the asl diff between the old and the newly updated blob: @@ -1,30 +1,30 @@ /* * Intel ACPI Component Architecture * AML/ASL+ Disassembler version 20210604 (64-bit version) * Copyright (c) 2000 - 2021 Intel Corporation * * Disassembling to symbolic ASL+ operators * - * Disassembly of tests/data/acpi/q35/DSDT.noacpihp, Wed Jun 21 18:26:52 2023 + * Disassembly of /tmp/aml-O8SU61, Wed Jun 21 18:26:52 2023 * * Original Table Header: * Signature "DSDT" - * Length 0x00002038 (8248) + * Length 0x00002031 (8241) * Revision 0x01 **** 32-bit table (V1), no 64-bit math support - * Checksum 0x4A + * Checksum 0x89 * OEM ID "BOCHS " * OEM Table ID "BXPC " * OEM Revision 0x00000001 (1) * Compiler ID "BXPC" * Compiler Version 0x00000001 (1) */ DefinitionBlock ("", "DSDT", 1, "BOCHS ", "BXPC ", 0x00000001) { Scope (\) { OperationRegion (DBG, SystemIO, 0x0402, One) Field (DBG, ByteAcc, NoLock, Preserve) { DBGB, 8 } @@ -3148,48 +3148,48 @@ { Name (_ADR, Zero) // _ADR: Address Method (_DSM, 4, Serialized) // _DSM: Device-Specific Method { Local0 = Package (0x01) { 0x01F5 } Return (EDSM (Arg0, Arg1, Arg2, Arg3, Local0)) } } } Device (S40) { Name (_ADR, 0x00080000) // _ADR: Address - Device (S41) + Device (S01) { - Name (_ADR, 0x00080001) // _ADR: Address + Name (_ADR, One) // _ADR: Address Method (_DSM, 4, Serialized) // _DSM: Device-Specific Method { Local0 = Package (0x01) { 0x0259 } Return (EDSM (Arg0, Arg1, Arg2, Arg3, Local0)) } } - Device (S48) + Device (S02) { - Name (_ADR, 0x00090000) // _ADR: Address + Name (_ADR, 0x02) // _ADR: Address Device (S00) { Name (_ADR, Zero) // _ADR: Address } } } Device (SF8) { Name (_ADR, 0x001F0000) // _ADR: Address OperationRegion (PIRQ, PCI_Config, 0x60, 0x0C) Scope (\_SB) { Field (PCI0.SF8.PIRQ, ByteAcc, NoLock, Preserve) { PRQA, 8, Signed-off-by: Ani Sinha Acked-by: Igor Mammedov Message-Id: <20230705115925.5339-4-anisinha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/data/acpi/q35/DSDT.noacpihp | Bin 8248 -> 8241 bytes tests/qtest/bios-tables-test-allowed-diff.h | 1 - 2 files changed, 1 deletion(-) diff --git a/tests/data/acpi/q35/DSDT.noacpihp b/tests/data/acpi/q35/DSDT.noacpihp index 6ab1f0e52543fcb7f84a7fd1327fe5aa42010565..8cab2f8eb9ae94e0165f3f17857ec7d080fb0e13 100644 GIT binary patch delta 109 zcmdntu+f3bCDi)r&-xoSoL DyqFtK delta 94 zcmdn!u)~4NCD Date: Wed, 5 Jul 2023 17:29:22 +0530 Subject: [PATCH 44/66] tests/qtest/hd-geo-test: fix incorrect pcie-root-port usage and simplify test The test attaches a SCSI controller to a non-zero slot and a pcie-to-pci bridge on slot 0 on the same pcie-root-port. Since a downstream device can be attached to a pcie-root-port only on slot 0, the above test configuration is not allowed. Additionally using pcie.0 as id for pcie-to-pci bridge is incorrect as that id is reserved only for the root bus. In the test scenario, there is no need to attach a pcie-root-port to the root complex. A SCSI controller can be attached to a pcie-to-pci bridge which can then be directly attached to the root bus (pcie.0). Fix the test and simplify it. CC: mst@redhat.com CC: imammedo@redhat.com CC: Michael Labiuk Acked-by: Thomas Huth Reviewed-by: Igor Mammedov Signed-off-by: Ani Sinha Message-Id: <20230705115925.5339-5-anisinha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/qtest/hd-geo-test.c | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) diff --git a/tests/qtest/hd-geo-test.c b/tests/qtest/hd-geo-test.c index 5aa258a2b3..d08bffad91 100644 --- a/tests/qtest/hd-geo-test.c +++ b/tests/qtest/hd-geo-test.c @@ -784,14 +784,12 @@ static void test_override_scsi(void) test_override(args, "pc", expected); } -static void setup_pci_bridge(TestArgs *args, const char *id, const char *rootid) +static void setup_pci_bridge(TestArgs *args, const char *id) { - char *root, *br; - root = g_strdup_printf("-device pcie-root-port,id=%s", rootid); - br = g_strdup_printf("-device pcie-pci-bridge,bus=%s,id=%s", rootid, id); + char *br; + br = g_strdup_printf("-device pcie-pci-bridge,bus=pcie.0,id=%s", id); - args->argc = append_arg(args->argc, args->argv, ARGV_SIZE, root); args->argc = append_arg(args->argc, args->argv, ARGV_SIZE, br); } @@ -811,8 +809,8 @@ static void test_override_scsi_q35(void) add_drive_with_mbr(args, empty_mbr, 1); add_drive_with_mbr(args, empty_mbr, 1); add_drive_with_mbr(args, empty_mbr, 1); - setup_pci_bridge(args, "pcie.0", "br"); - add_scsi_controller(args, "lsi53c895a", "br", 3); + setup_pci_bridge(args, "pcie-pci-br"); + add_scsi_controller(args, "lsi53c895a", "pcie-pci-br", 3); add_scsi_disk(args, 0, 0, 0, 0, 0, 10000, 120, 30); add_scsi_disk(args, 1, 0, 0, 1, 0, 9000, 120, 30); add_scsi_disk(args, 2, 0, 0, 2, 0, 1, 0, 0); @@ -868,9 +866,9 @@ static void test_override_virtio_blk_q35(void) }; add_drive_with_mbr(args, empty_mbr, 1); add_drive_with_mbr(args, empty_mbr, 1); - setup_pci_bridge(args, "pcie.0", "br"); - add_virtio_disk(args, 0, "br", 3, 10000, 120, 30); - add_virtio_disk(args, 1, "br", 4, 9000, 120, 30); + setup_pci_bridge(args, "pcie-pci-br"); + add_virtio_disk(args, 0, "pcie-pci-br", 3, 10000, 120, 30); + add_virtio_disk(args, 1, "pcie-pci-br", 4, 9000, 120, 30); test_override(args, "q35", expected); } From ca92eb5defcf9d1c2106341744a73a03cf26e824 Mon Sep 17 00:00:00 2001 From: Ani Sinha Date: Wed, 5 Jul 2023 17:29:23 +0530 Subject: [PATCH 45/66] hw/pci: warn when PCIe device is plugged into non-zero slot of downstream port PCIe downstream ports only have a single device 0, so PCI Express devices can only be plugged into slot 0 on a PCIe port. Add a warning to let users know when the invalid configuration is used. We may enforce this more strongly later once we get more clarity on whether we are introducing a bad regression for users currently using the wrong configuration. The change has been tested to not break or alter behaviors of ARI capable devices by instantiating seven vfs on an emulated igb device (the maximum number of vfs the igb device supports). The vfs are instantiated correctly and are seen to have non-zero device/slot numbers in the conventional PCI BDF representation. CC: jusual@redhat.com CC: imammedo@redhat.com CC: mst@redhat.com CC: akihiko.odaki@daynix.com Resolves: https://bugzilla.redhat.com/show_bug.cgi?id=2128929 Signed-off-by: Ani Sinha Reviewed-by: Julia Suvorova Message-Id: <20230705115925.5339-6-anisinha@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Akihiko Odaki --- hw/pci/pci.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index fb17138f7d..4b14f31859 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -65,6 +65,7 @@ bool pci_available = true; static char *pcibus_get_dev_path(DeviceState *dev); static char *pcibus_get_fw_dev_path(DeviceState *dev); static void pcibus_reset(BusState *qbus); +static bool pcie_has_upstream_port(PCIDevice *dev); static Property pci_props[] = { DEFINE_PROP_PCI_DEVFN("addr", PCIDevice, devfn, -1), @@ -2121,6 +2122,25 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp) } } + /* + * A PCIe Downstream Port that do not have ARI Forwarding enabled must + * associate only Device 0 with the device attached to the bus + * representing the Link from the Port (PCIe base spec rev 4.0 ver 0.3, + * sec 7.3.1). + * With ARI, PCI_SLOT() can return non-zero value as the traditional + * 5-bit Device Number and 3-bit Function Number fields in its associated + * Routing IDs, Requester IDs and Completer IDs are interpreted as a + * single 8-bit Function Number. Hence, ignore ARI capable devices. + */ + if (pci_is_express(pci_dev) && + !pcie_find_capability(pci_dev, PCI_EXT_CAP_ID_ARI) && + pcie_has_upstream_port(pci_dev) && + PCI_SLOT(pci_dev->devfn)) { + warn_report("PCI: slot %d is not valid for %s," + " parent device only allows plugging into slot 0.", + PCI_SLOT(pci_dev->devfn), pci_dev->name); + } + if (pci_dev->failover_pair_id) { if (!pci_bus_is_express(pci_get_bus(pci_dev))) { error_setg(errp, "failover primary device must be on " From 94df5b2180d61fb2ee2b04cc007981e58b6479a9 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Wed, 5 Jul 2023 18:51:17 +0200 Subject: [PATCH 46/66] virtio-iommu: Fix 64kB host page size VFIO device assignment When running on a 64kB page size host and protecting a VFIO device with the virtio-iommu, qemu crashes with this kind of message: qemu-kvm: virtio-iommu page mask 0xfffffffffffff000 is incompatible with mask 0x20010000 qemu: hardware error: vfio: DMA mapping failed, unable to continue This is due to the fact the IOMMU MR corresponding to the VFIO device is enabled very late on domain attach, after the machine init. The device reports a minimal 64kB page size but it is too late to be applied. virtio_iommu_set_page_size_mask() fails and this causes vfio_listener_region_add() to end up with hw_error(); To work around this issue, we transiently enable the IOMMU MR on machine init to collect the page size requirements and then restore the bypass state. Fixes: 90519b9053 ("virtio-iommu: Add bypass mode support to assigned device") Signed-off-by: Eric Auger Message-Id: <20230705165118.28194-2-eric.auger@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Jean-Philippe Brucker Tested-by: Jean-Philippe Brucker Reviewed-by: Zhenzhong Duan --- hw/virtio/trace-events | 1 + hw/virtio/virtio-iommu.c | 31 +++++++++++++++++++++++++++++-- include/hw/virtio/virtio-iommu.h | 2 ++ 3 files changed, 32 insertions(+), 2 deletions(-) diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index 4e39ed8a95..7109cf1a3b 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -133,6 +133,7 @@ virtio_iommu_set_page_size_mask(const char *name, uint64_t old, uint64_t new) "m virtio_iommu_notify_flag_add(const char *name) "add notifier to mr %s" virtio_iommu_notify_flag_del(const char *name) "del notifier from mr %s" virtio_iommu_switch_address_space(uint8_t bus, uint8_t slot, uint8_t fn, bool on) "Device %02x:%02x.%x switching address space (iommu enabled=%d)" +virtio_iommu_freeze_granule(uint64_t page_size_mask) "granule set to 0x%"PRIx64 # virtio-mem.c virtio_mem_send_response(uint16_t type) "type=%" PRIu16 diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index 1bbad23f4a..8d0c5e3f32 100644 --- a/hw/virtio/virtio-iommu.c +++ b/hw/virtio/virtio-iommu.c @@ -25,6 +25,7 @@ #include "hw/virtio/virtio.h" #include "sysemu/kvm.h" #include "sysemu/reset.h" +#include "sysemu/sysemu.h" #include "qapi/error.h" #include "qemu/error-report.h" #include "trace.h" @@ -1106,12 +1107,12 @@ static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, } /* - * After the machine is finalized, we can't change the mask anymore. If by + * Once the granule is frozen we can't change the mask anymore. If by * chance the hotplugged device supports the same granule, we can still * accept it. Having a different masks is possible but the guest will use * sub-optimal block sizes, so warn about it. */ - if (phase_check(PHASE_MACHINE_READY)) { + if (s->granule_frozen) { int new_granule = ctz64(new_mask); int cur_granule = ctz64(cur_mask); @@ -1146,6 +1147,28 @@ static void virtio_iommu_system_reset(void *opaque) } +static void virtio_iommu_freeze_granule(Notifier *notifier, void *data) +{ + VirtIOIOMMU *s = container_of(notifier, VirtIOIOMMU, machine_done); + int granule; + + if (likely(s->config.bypass)) { + /* + * Transient IOMMU MR enable to collect page_size_mask requirements + * through memory_region_iommu_set_page_size_mask() called by + * VFIO region_add() callback + */ + s->config.bypass = false; + virtio_iommu_switch_address_space_all(s); + /* restore default */ + s->config.bypass = true; + virtio_iommu_switch_address_space_all(s); + } + s->granule_frozen = true; + granule = ctz64(s->config.page_size_mask); + trace_virtio_iommu_freeze_granule(BIT(granule)); +} + static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); @@ -1189,6 +1212,9 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) error_setg(errp, "VIRTIO-IOMMU is not attached to any PCI bus!"); } + s->machine_done.notify = virtio_iommu_freeze_granule; + qemu_add_machine_init_done_notifier(&s->machine_done); + qemu_register_reset(virtio_iommu_system_reset, s); } @@ -1198,6 +1224,7 @@ static void virtio_iommu_device_unrealize(DeviceState *dev) VirtIOIOMMU *s = VIRTIO_IOMMU(dev); qemu_unregister_reset(virtio_iommu_system_reset, s); + qemu_remove_machine_init_done_notifier(&s->machine_done); g_hash_table_destroy(s->as_by_busptr); if (s->domains) { diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h index 2ad5ee320b..a93fc5383e 100644 --- a/include/hw/virtio/virtio-iommu.h +++ b/include/hw/virtio/virtio-iommu.h @@ -61,6 +61,8 @@ struct VirtIOIOMMU { QemuRecMutex mutex; GTree *endpoints; bool boot_bypass; + Notifier machine_done; + bool granule_frozen; }; #endif From 587a7641d53055054d68d67d94c9408ef808f127 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Wed, 5 Jul 2023 18:51:18 +0200 Subject: [PATCH 47/66] virtio-iommu: Rework the traces in virtio_iommu_set_page_size_mask() The current error messages in virtio_iommu_set_page_size_mask() sound quite similar for different situations and miss the IOMMU memory region that causes the issue. Clarify them and rework the comment. Also remove the trace when the new page_size_mask is not applied as the current frozen granule is kept. This message is rather confusing for the end user and anyway the current granule would have been used by the driver. Signed-off-by: Eric Auger Reviewed-by: Zhenzhong Duan Message-Id: <20230705165118.28194-3-eric.auger@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Jean-Philippe Brucker Tested-by: Jean-Philippe Brucker --- hw/virtio/virtio-iommu.c | 19 +++++++------------ 1 file changed, 7 insertions(+), 12 deletions(-) diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index 8d0c5e3f32..201127c488 100644 --- a/hw/virtio/virtio-iommu.c +++ b/hw/virtio/virtio-iommu.c @@ -1101,29 +1101,24 @@ static int virtio_iommu_set_page_size_mask(IOMMUMemoryRegion *mr, new_mask); if ((cur_mask & new_mask) == 0) { - error_setg(errp, "virtio-iommu page mask 0x%"PRIx64 - " is incompatible with mask 0x%"PRIx64, cur_mask, new_mask); + error_setg(errp, "virtio-iommu %s reports a page size mask 0x%"PRIx64 + " incompatible with currently supported mask 0x%"PRIx64, + mr->parent_obj.name, new_mask, cur_mask); return -1; } /* * Once the granule is frozen we can't change the mask anymore. If by * chance the hotplugged device supports the same granule, we can still - * accept it. Having a different masks is possible but the guest will use - * sub-optimal block sizes, so warn about it. + * accept it. */ if (s->granule_frozen) { - int new_granule = ctz64(new_mask); int cur_granule = ctz64(cur_mask); - if (new_granule != cur_granule) { - error_setg(errp, "virtio-iommu page mask 0x%"PRIx64 - " is incompatible with mask 0x%"PRIx64, cur_mask, - new_mask); + if (!(BIT(cur_granule) & new_mask)) { + error_setg(errp, "virtio-iommu %s does not support frozen granule 0x%llx", + mr->parent_obj.name, BIT_ULL(cur_granule)); return -1; - } else if (new_mask != cur_mask) { - warn_report("virtio-iommu page mask 0x%"PRIx64 - " does not match 0x%"PRIx64, cur_mask, new_mask); } return 0; } From 625b370c45f4acd155ee625d61c0057d770a5b5e Mon Sep 17 00:00:00 2001 From: Leonardo Bras Date: Thu, 6 Jul 2023 01:55:47 -0300 Subject: [PATCH 48/66] pcie: Add hotplug detect state register to cmask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When trying to migrate a machine type pc-q35-6.0 or lower, with this cmdline options, -device driver=pcie-root-port,port=18,chassis=19,id=pcie-root-port18,bus=pcie.0,addr=0x12 \ -device driver=nec-usb-xhci,p2=4,p3=4,id=nex-usb-xhci0,bus=pcie-root-port18,addr=0x12.0x1 the following bug happens after all ram pages were sent: qemu-kvm: get_pci_config_device: Bad config data: i=0x6e read: 0 device: 40 cmask: ff wmask: 0 w1cmask:19 qemu-kvm: Failed to load PCIDevice:config qemu-kvm: Failed to load pcie-root-port:parent_obj.parent_obj.parent_obj qemu-kvm: error while loading state for instance 0x0 of device '0000:00:12.0/pcie-root-port' qemu-kvm: load of migration failed: Invalid argument This happens on pc-q35-6.0 or lower because of: { "ICH9-LPC", ACPI_PM_PROP_ACPI_PCIHP_BRIDGE, "off" } In this scenario, hotplug_handler_plug() calls pcie_cap_slot_plug_cb(), which sets dev->config byte 0x6e with bit PCI_EXP_SLTSTA_PDS to signal PCI hotplug for the guest. After a while the guest will deal with this hotplug and qemu will clear the above bit. Then, during migration, get_pci_config_device() will compare the configs of both the freshly created device and the one that is being received via migration, which will differ due to the PCI_EXP_SLTSTA_PDS bit and cause the bug to reproduce. To avoid this fake incompatibility, there are tree fields in PCIDevice that can help: - wmask: Used to implement R/W bytes, and - w1cmask: Used to implement RW1C(Write 1 to Clear) bytes - cmask: Used to enable config checks on load. According to PCI Express® Base Specification Revision 5.0 Version 1.0, table 7-27 (Slot Status Register) bit 6, the "Presence Detect State" is listed as RO (read-only), so it only makes sense to make use of the cmask field. So, clear PCI_EXP_SLTSTA_PDS bit on cmask, so the fake incompatibility on get_pci_config_device() does not abort the migration. Buglink: https://bugzilla.redhat.com/show_bug.cgi?id=2215819 Signed-off-by: Leonardo Bras Message-Id: <20230706045546.593605-3-leobras@redhat.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Juan Quintela --- hw/pci/pcie.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index b7f107ed8d..763f65c528 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -666,6 +666,10 @@ void pcie_cap_slot_init(PCIDevice *dev, PCIESlot *s) pci_word_test_and_set_mask(dev->w1cmask + pos + PCI_EXP_SLTSTA, PCI_EXP_HP_EV_SUPPORTED); + /* Avoid migration abortion when this device hot-removed by guest */ + pci_word_test_and_clear_mask(dev->cmask + pos + PCI_EXP_SLTSTA, + PCI_EXP_SLTSTA_PDS); + dev->exp.hpev_notified = false; qbus_set_hotplug_handler(BUS(pci_bridge_get_sec_bus(PCI_BRIDGE(dev))), From 031b1abacbdb3f4e016b6b926f7e7876c05339bb Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Sat, 8 Jul 2023 00:44:42 +0800 Subject: [PATCH 49/66] vdpa: Fix possible use-after-free for VirtQueueElement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit QEMU uses vhost_handle_guest_kick() to forward guest's available buffers to the vdpa device in SVQ avail ring. In vhost_handle_guest_kick(), a `g_autofree` `elem` is used to iterate through the available VirtQueueElements. This `elem` is then passed to `svq->ops->avail_handler`, specifically to the vhost_vdpa_net_handle_ctrl_avail(). If this handler fails to process the CVQ command, vhost_handle_guest_kick() regains ownership of the `elem`, and either frees it or requeues it. Yet the problem is that, vhost_vdpa_net_handle_ctrl_avail() mistakenly frees the `elem`, even if it fails to forward the CVQ command to vdpa device. This can result in a use-after-free for the `elem` in vhost_handle_guest_kick(). This patch solves this problem by refactoring vhost_vdpa_net_handle_ctrl_avail() to only freeing the `elem` if it owns it. Fixes: bd907ae4b0 ("vdpa: manual forward CVQ buffers") Signed-off-by: Hawkins Jiawei Message-Id: Reviewed-by: Eugenio Pérez Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 679ef4bed0..5542b47a8f 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -831,7 +831,16 @@ out: error_report("Bad device CVQ written length"); } vhost_svq_push_elem(svq, elem, MIN(in_len, sizeof(status))); - g_free(elem); + /* + * `elem` belongs to vhost_vdpa_net_handle_ctrl_avail() only when + * the function successfully forwards the CVQ command, indicated + * by a non-negative value of `dev_written`. Otherwise, it still + * belongs to SVQ. + * This function should only free the `elem` when it owns. + */ + if (dev_written >= 0) { + g_free(elem); + } return dev_written < 0 ? dev_written : 0; } From e57fc3dec9508cfa81c1bedcaa6b7469995ac6a5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Mon, 10 Jul 2023 16:35:03 +0100 Subject: [PATCH 50/66] include: attempt to document device_class_set_props MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit I'm still not sure how I achieve by use case of the parent class defining the following properties: static Property vud_properties[] = { DEFINE_PROP_CHR("chardev", VHostUserDevice, chardev), DEFINE_PROP_UINT16("id", VHostUserDevice, id, 0), DEFINE_PROP_UINT32("num_vqs", VHostUserDevice, num_vqs, 1), DEFINE_PROP_END_OF_LIST(), }; But for the specialisation of the class I want the id to default to the actual device id, e.g.: static Property vu_rng_properties[] = { DEFINE_PROP_UINT16("id", VHostUserDevice, id, VIRTIO_ID_RNG), DEFINE_PROP_UINT32("num_vqs", VHostUserDevice, num_vqs, 1), DEFINE_PROP_END_OF_LIST(), }; And so far the API for doing that isn't super clear. Signed-off-by: Alex Bennée Message-Id: <20230710153522.3469097-2-alex.bennee@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/hw/qdev-core.h | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index 06cadfc492..196ebf6d91 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -926,6 +926,15 @@ BusState *sysbus_get_default(void); char *qdev_get_fw_dev_path(DeviceState *dev); char *qdev_get_own_fw_dev_path_from_handler(BusState *bus, DeviceState *dev); +/** + * device_class_set_props(): add a set of properties to an device + * @dc: the parent DeviceClass all devices inherit + * @props: an array of properties, terminate by DEFINE_PROP_END_OF_LIST() + * + * This will add a set of properties to the object. It will fault if + * you attempt to add an existing property defined by a parent class. + * To modify an inherited property you need to use???? + */ void device_class_set_props(DeviceClass *dc, Property *props); /** From c378e88218c9679d7df8230c9f988b22a6cbdc1a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Mon, 10 Jul 2023 16:35:04 +0100 Subject: [PATCH 51/66] include/hw: document the device_class_set_parent_* fns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These are useful functions for when you want proper inheritance of functionality across realize/unrealize calls. Signed-off-by: Alex Bennée Message-Id: <20230710153522.3469097-3-alex.bennee@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/hw/qdev-core.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/include/hw/qdev-core.h b/include/hw/qdev-core.h index 196ebf6d91..884c726a87 100644 --- a/include/hw/qdev-core.h +++ b/include/hw/qdev-core.h @@ -952,9 +952,36 @@ void device_class_set_props(DeviceClass *dc, Property *props); void device_class_set_parent_reset(DeviceClass *dc, DeviceReset dev_reset, DeviceReset *parent_reset); + +/** + * device_class_set_parent_realize() - set up for chaining realize fns + * @dc: The device class + * @dev_realize: the device realize function + * @parent_realize: somewhere to save the parents realize function + * + * This is intended to be used when the new realize function will + * eventually call its parent realization function during creation. + * This requires storing the function call somewhere (usually in the + * instance structure) so you can eventually call + * dc->parent_realize(dev, errp) + */ void device_class_set_parent_realize(DeviceClass *dc, DeviceRealize dev_realize, DeviceRealize *parent_realize); + + +/** + * device_class_set_parent_unrealize() - set up for chaining unrealize fns + * @dc: The device class + * @dev_unrealize: the device realize function + * @parent_unrealize: somewhere to save the parents unrealize function + * + * This is intended to be used when the new unrealize function will + * eventually call its parent unrealization function during the + * unrealize phase. This requires storing the function call somewhere + * (usually in the instance structure) so you can eventually call + * dc->parent_unrealize(dev); + */ void device_class_set_parent_unrealize(DeviceClass *dc, DeviceUnrealize dev_unrealize, DeviceUnrealize *parent_unrealize); From 7e8094f0dfd04d4772b65dc92dd087dc86783dc4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Mon, 10 Jul 2023 16:35:05 +0100 Subject: [PATCH 52/66] hw/virtio: fix typo in VIRTIO_CONFIG_IRQ_IDX comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes: 544f0278af (virtio: introduce macro VIRTIO_CONFIG_IRQ_IDX) Signed-off-by: Alex Bennée Message-Id: <20230710153522.3469097-4-alex.bennee@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/display/vhost-user-gpu.c | 4 ++-- hw/net/virtio-net.c | 4 ++-- hw/virtio/vhost-user-fs.c | 4 ++-- hw/virtio/vhost-user-gpio.c | 2 +- hw/virtio/vhost-vsock-common.c | 4 ++-- hw/virtio/virtio-crypto.c | 4 ++-- 6 files changed, 11 insertions(+), 11 deletions(-) diff --git a/hw/display/vhost-user-gpu.c b/hw/display/vhost-user-gpu.c index 543ea92721..e8ee03094e 100644 --- a/hw/display/vhost-user-gpu.c +++ b/hw/display/vhost-user-gpu.c @@ -521,7 +521,7 @@ vhost_user_gpu_guest_notifier_pending(VirtIODevice *vdev, int idx) /* * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 - * as the Marco of configure interrupt's IDX, If this driver does not + * as the macro of configure interrupt's IDX, If this driver does not * support, the function will return */ @@ -538,7 +538,7 @@ vhost_user_gpu_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask) /* * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 - * as the Marco of configure interrupt's IDX, If this driver does not + * as the macro of configure interrupt's IDX, If this driver does not * support, the function will return */ diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c index d51067123b..7102ec4817 100644 --- a/hw/net/virtio-net.c +++ b/hw/net/virtio-net.c @@ -3362,7 +3362,7 @@ static bool virtio_net_guest_notifier_pending(VirtIODevice *vdev, int idx) } /* * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 - * as the Marco of configure interrupt's IDX, If this driver does not + * as the macro of configure interrupt's IDX, If this driver does not * support, the function will return false */ @@ -3394,7 +3394,7 @@ static void virtio_net_guest_notifier_mask(VirtIODevice *vdev, int idx, } /* *Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 - * as the Marco of configure interrupt's IDX, If this driver does not + * as the macro of configure interrupt's IDX, If this driver does not * support, the function will return */ diff --git a/hw/virtio/vhost-user-fs.c b/hw/virtio/vhost-user-fs.c index 83fc20e49e..49d699ffc2 100644 --- a/hw/virtio/vhost-user-fs.c +++ b/hw/virtio/vhost-user-fs.c @@ -161,7 +161,7 @@ static void vuf_guest_notifier_mask(VirtIODevice *vdev, int idx, /* * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 - * as the Marco of configure interrupt's IDX, If this driver does not + * as the macro of configure interrupt's IDX, If this driver does not * support, the function will return */ @@ -177,7 +177,7 @@ static bool vuf_guest_notifier_pending(VirtIODevice *vdev, int idx) /* * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 - * as the Marco of configure interrupt's IDX, If this driver does not + * as the macro of configure interrupt's IDX, If this driver does not * support, the function will return */ diff --git a/hw/virtio/vhost-user-gpio.c b/hw/virtio/vhost-user-gpio.c index d6927b610a..3b013f2d0f 100644 --- a/hw/virtio/vhost-user-gpio.c +++ b/hw/virtio/vhost-user-gpio.c @@ -194,7 +194,7 @@ static void vu_gpio_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask) /* * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 - * as the Marco of configure interrupt's IDX, If this driver does not + * as the macro of configure interrupt's IDX, If this driver does not * support, the function will return */ diff --git a/hw/virtio/vhost-vsock-common.c b/hw/virtio/vhost-vsock-common.c index 321262f6b3..12ea87d7a7 100644 --- a/hw/virtio/vhost-vsock-common.c +++ b/hw/virtio/vhost-vsock-common.c @@ -129,7 +129,7 @@ static void vhost_vsock_common_guest_notifier_mask(VirtIODevice *vdev, int idx, /* * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 - * as the Marco of configure interrupt's IDX, If this driver does not + * as the macro of configure interrupt's IDX, If this driver does not * support, the function will return */ @@ -146,7 +146,7 @@ static bool vhost_vsock_common_guest_notifier_pending(VirtIODevice *vdev, /* * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 - * as the Marco of configure interrupt's IDX, If this driver does not + * as the macro of configure interrupt's IDX, If this driver does not * support, the function will return */ diff --git a/hw/virtio/virtio-crypto.c b/hw/virtio/virtio-crypto.c index a6d7e1e8ec..44faf5a522 100644 --- a/hw/virtio/virtio-crypto.c +++ b/hw/virtio/virtio-crypto.c @@ -1210,7 +1210,7 @@ static void virtio_crypto_guest_notifier_mask(VirtIODevice *vdev, int idx, /* * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 - * as the Marco of configure interrupt's IDX, If this driver does not + * as the macro of configure interrupt's IDX, If this driver does not * support, the function will return */ @@ -1229,7 +1229,7 @@ static bool virtio_crypto_guest_notifier_pending(VirtIODevice *vdev, int idx) /* * Add the check for configure interrupt, Use VIRTIO_CONFIG_IRQ_IDX -1 - * as the Marco of configure interrupt's IDX, If this driver does not + * as the macro of configure interrupt's IDX, If this driver does not * support, the function will return */ From a0cc7673ab731794caefed2033d68a2199d6bfaf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Mon, 10 Jul 2023 16:35:06 +0100 Subject: [PATCH 53/66] include/hw/virtio: document virtio_notify_config MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Alex Bennée Message-Id: <20230710153522.3469097-5-alex.bennee@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/hw/virtio/virtio.h | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 0492d26900..0671989383 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -276,6 +276,13 @@ extern const VMStateInfo virtio_vmstate_info; int virtio_load(VirtIODevice *vdev, QEMUFile *f, int version_id); +/** + * virtio_notify_config() - signal a change to device config + * @vdev: the virtio device + * + * Assuming the virtio device is up (VIRTIO_CONFIG_S_DRIVER_OK) this + * will trigger a guest interrupt and update the config version. + */ void virtio_notify_config(VirtIODevice *vdev); bool virtio_queue_get_notification(VirtQueue *vq); From 3b6256c2c57061c365cfad7857e12fd8d15ca3c8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Mon, 10 Jul 2023 16:35:07 +0100 Subject: [PATCH 54/66] include/hw/virtio: add kerneldoc for virtio_init MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Alex Bennée Message-Id: <20230710153522.3469097-6-alex.bennee@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/hw/virtio/virtio.h | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 0671989383..631490bda4 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -219,6 +219,12 @@ struct VirtioDeviceClass { void virtio_instance_init_common(Object *proxy_obj, void *data, size_t vdev_size, const char *vdev_name); +/** + * virtio_init() - initialise the common VirtIODevice structure + * @vdev: pointer to VirtIODevice + * @device_id: the VirtIO device ID (see virtio_ids.h) + * @config_size: size of the config space + */ void virtio_init(VirtIODevice *vdev, uint16_t device_id, size_t config_size); void virtio_cleanup(VirtIODevice *vdev); From 661dee7bd08dd93d15b898d43821bb46b2aa422c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Alex=20Benn=C3=A9e?= Date: Mon, 10 Jul 2023 16:35:08 +0100 Subject: [PATCH 55/66] include/hw/virtio: document some more usage of notifiers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Lets document some more of the core VirtIODevice structure. Signed-off-by: Alex Bennée Message-Id: <20230710153522.3469097-7-alex.bennee@linaro.org> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/hw/virtio/virtio.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/include/hw/virtio/virtio.h b/include/hw/virtio/virtio.h index 631490bda4..c8f72850bc 100644 --- a/include/hw/virtio/virtio.h +++ b/include/hw/virtio/virtio.h @@ -150,10 +150,18 @@ struct VirtIODevice VMChangeStateEntry *vmstate; char *bus_name; uint8_t device_endian; + /** + * @user_guest_notifier_mask: gate usage of ->guest_notifier_mask() callback. + * This is used to suppress the masking of guest updates for + * vhost-user devices which are asynchronous by design. + */ bool use_guest_notifier_mask; AddressSpace *dma_as; QLIST_HEAD(, VirtQueue) *vector_queues; QTAILQ_ENTRY(VirtIODevice) next; + /** + * @config_notifier: the event notifier that handles config events + */ EventNotifier config_notifier; bool device_iotlb_enabled; }; From 445416e3010a2525c577d692921979a64d88a998 Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Tue, 11 Jul 2023 00:38:35 +0900 Subject: [PATCH 56/66] pcie: Use common ARI next function number Currently the only implementers of ARI is SR-IOV devices, and they behave similar. Share the ARI next function number. Signed-off-by: Akihiko Odaki Reviewed-by: Ani Sinha Message-Id: <20230710153838.33917-2-akihiko.odaki@daynix.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- docs/pcie_sriov.txt | 4 ++-- hw/net/igb.c | 2 +- hw/net/igbvf.c | 2 +- hw/nvme/ctrl.c | 2 +- hw/pci/pcie.c | 4 +++- include/hw/pci/pcie.h | 2 +- 6 files changed, 9 insertions(+), 7 deletions(-) diff --git a/docs/pcie_sriov.txt b/docs/pcie_sriov.txt index 7eff7f2703..a47aad0bfa 100644 --- a/docs/pcie_sriov.txt +++ b/docs/pcie_sriov.txt @@ -48,7 +48,7 @@ setting up a BAR for a VF. ... int ret = pcie_endpoint_cap_init(d, 0x70); ... - pcie_ari_init(d, 0x100, 1); + pcie_ari_init(d, 0x100); ... /* Add and initialize the SR/IOV capability */ @@ -78,7 +78,7 @@ setting up a BAR for a VF. ... int ret = pcie_endpoint_cap_init(d, 0x60); ... - pcie_ari_init(d, 0x100, 1); + pcie_ari_init(d, 0x100); ... memory_region_init(mr, ... ) pcie_sriov_vf_register_bar(d, bar_nr, mr); diff --git a/hw/net/igb.c b/hw/net/igb.c index 1c989d7677..8ff832acfc 100644 --- a/hw/net/igb.c +++ b/hw/net/igb.c @@ -431,7 +431,7 @@ static void igb_pci_realize(PCIDevice *pci_dev, Error **errp) hw_error("Failed to initialize AER capability"); } - pcie_ari_init(pci_dev, 0x150, 1); + pcie_ari_init(pci_dev, 0x150); pcie_sriov_pf_init(pci_dev, IGB_CAP_SRIOV_OFFSET, TYPE_IGBVF, IGB_82576_VF_DEV_ID, IGB_MAX_VF_FUNCTIONS, IGB_MAX_VF_FUNCTIONS, diff --git a/hw/net/igbvf.c b/hw/net/igbvf.c index 284ea61184..d55e1e8a6a 100644 --- a/hw/net/igbvf.c +++ b/hw/net/igbvf.c @@ -270,7 +270,7 @@ static void igbvf_pci_realize(PCIDevice *dev, Error **errp) hw_error("Failed to initialize AER capability"); } - pcie_ari_init(dev, 0x150, 1); + pcie_ari_init(dev, 0x150); } static void igbvf_pci_uninit(PCIDevice *dev) diff --git a/hw/nvme/ctrl.c b/hw/nvme/ctrl.c index 355668bdf8..8e8e870b9a 100644 --- a/hw/nvme/ctrl.c +++ b/hw/nvme/ctrl.c @@ -8120,7 +8120,7 @@ static bool nvme_init_pci(NvmeCtrl *n, PCIDevice *pci_dev, Error **errp) pcie_endpoint_cap_init(pci_dev, 0x80); pcie_cap_flr_init(pci_dev); if (n->params.sriov_max_vfs) { - pcie_ari_init(pci_dev, 0x100, 1); + pcie_ari_init(pci_dev, 0x100); } /* add one to max_ioqpairs to account for the admin queue pair */ diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 763f65c528..6075ff5556 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -1039,8 +1039,10 @@ void pcie_sync_bridge_lnk(PCIDevice *bridge_dev) */ /* ARI */ -void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn) +void pcie_ari_init(PCIDevice *dev, uint16_t offset) { + uint16_t nextfn = 1; + pcie_add_capability(dev, PCI_EXT_CAP_ID_ARI, PCI_ARI_VER, offset, PCI_ARI_SIZEOF); pci_set_long(dev->config + offset + PCI_ARI_CAP, (nextfn & 0xff) << 8); diff --git a/include/hw/pci/pcie.h b/include/hw/pci/pcie.h index 51ab57bc3c..11f5a91bbb 100644 --- a/include/hw/pci/pcie.h +++ b/include/hw/pci/pcie.h @@ -135,7 +135,7 @@ void pcie_sync_bridge_lnk(PCIDevice *dev); void pcie_acs_init(PCIDevice *dev, uint16_t offset); void pcie_acs_reset(PCIDevice *dev); -void pcie_ari_init(PCIDevice *dev, uint16_t offset, uint16_t nextfn); +void pcie_ari_init(PCIDevice *dev, uint16_t offset); void pcie_dev_ser_num_init(PCIDevice *dev, uint16_t offset, uint64_t ser_num); void pcie_ats_init(PCIDevice *dev, uint16_t offset, bool aligned); From 7c228c5f3301113ffd7cfee7f982e7ae04c8ffda Mon Sep 17 00:00:00 2001 From: Akihiko Odaki Date: Tue, 11 Jul 2023 00:38:36 +0900 Subject: [PATCH 57/66] pcie: Specify 0 for ARI next function numbers The current implementers of ARI are all SR-IOV devices. The ARI next function number field is undefined for VF according to PCI Express Base Specification Revision 5.0 Version 1.0 section 9.3.7.7. The PF still requires some defined value so end the linked list formed with the field by specifying 0 as required for any ARI implementation according to section 7.8.7.2. For migration, the field will keep having 1 as its value on the old QEMU machine versions. Fixes: 2503461691 ("pcie: Add some SR/IOV API documentation in docs/pcie_sriov.txt") Fixes: 44c2c09488 ("hw/nvme: Add support for SR-IOV") Fixes: 3a977deebe ("Intrdocue igb device emulation") Signed-off-by: Akihiko Odaki Reviewed-by: Ani Sinha Message-Id: <20230710153838.33917-3-akihiko.odaki@daynix.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/core/machine.c | 1 + hw/pci/pci.c | 2 ++ hw/pci/pcie.c | 2 +- include/hw/pci/pci.h | 2 ++ 4 files changed, 6 insertions(+), 1 deletion(-) diff --git a/hw/core/machine.c b/hw/core/machine.c index 46f8f9a2b0..f0d35c6401 100644 --- a/hw/core/machine.c +++ b/hw/core/machine.c @@ -41,6 +41,7 @@ GlobalProperty hw_compat_8_0[] = { { "migration", "multifd-flush-after-each-section", "on"}, + { TYPE_PCI_DEVICE, "x-pcie-ari-nextfn-1", "on" }, }; const size_t hw_compat_8_0_len = G_N_ELEMENTS(hw_compat_8_0); diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 4b14f31859..784c02a182 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -83,6 +83,8 @@ static Property pci_props[] = { DEFINE_PROP_UINT32("acpi-index", PCIDevice, acpi_index, 0), DEFINE_PROP_BIT("x-pcie-err-unc-mask", PCIDevice, cap_present, QEMU_PCIE_ERR_UNC_MASK_BITNR, true), + DEFINE_PROP_BIT("x-pcie-ari-nextfn-1", PCIDevice, cap_present, + QEMU_PCIE_ARI_NEXTFN_1_BITNR, false), DEFINE_PROP_END_OF_LIST() }; diff --git a/hw/pci/pcie.c b/hw/pci/pcie.c index 6075ff5556..6db0cf69cd 100644 --- a/hw/pci/pcie.c +++ b/hw/pci/pcie.c @@ -1041,7 +1041,7 @@ void pcie_sync_bridge_lnk(PCIDevice *bridge_dev) /* ARI */ void pcie_ari_init(PCIDevice *dev, uint16_t offset) { - uint16_t nextfn = 1; + uint16_t nextfn = dev->cap_present & QEMU_PCIE_ARI_NEXTFN_1 ? 1 : 0; pcie_add_capability(dev, PCI_EXT_CAP_ID_ARI, PCI_ARI_VER, offset, PCI_ARI_SIZEOF); diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index ab2bd65a3a..abdc1ef103 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -209,6 +209,8 @@ enum { QEMU_PCIE_CAP_CXL = (1 << QEMU_PCIE_CXL_BITNR), #define QEMU_PCIE_ERR_UNC_MASK_BITNR 11 QEMU_PCIE_ERR_UNC_MASK = (1 << QEMU_PCIE_ERR_UNC_MASK_BITNR), +#define QEMU_PCIE_ARI_NEXTFN_1_BITNR 12 + QEMU_PCIE_ARI_NEXTFN_1 = (1 << QEMU_PCIE_ARI_NEXTFN_1_BITNR), }; typedef struct PCIINTxRoute { From 2848c6aa7536fb48a57f38dabed1dce97fb45c6f Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Fri, 7 Jul 2023 23:27:28 +0800 Subject: [PATCH 58/66] vdpa: Use iovec for vhost_vdpa_net_load_cmd() According to VirtIO standard, "The driver MUST follow the VIRTIO_NET_CTRL_MAC_TABLE_SET command by a le32 number, followed by that number of non-multicast MAC addresses, followed by another le32 number, followed by that number of multicast addresses." Considering that these data is not stored in contiguous memory, this patch refactors vhost_vdpa_net_load_cmd() to accept scattered data, eliminating the need for an addtional data copy or packing the data into s->cvq_cmd_out_buffer outside of vhost_vdpa_net_load_cmd(). Signed-off-by: Hawkins Jiawei Message-Id: <3482cc50eebd13db4140b8b5dec9d0cc25b20b1b.1688743107.git.yin31149@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 33 +++++++++++++++++++++++++-------- 1 file changed, 25 insertions(+), 8 deletions(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 5542b47a8f..e192217a96 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -626,29 +626,38 @@ static ssize_t vhost_vdpa_net_cvq_add(VhostVDPAState *s, size_t out_len, } static ssize_t vhost_vdpa_net_load_cmd(VhostVDPAState *s, uint8_t class, - uint8_t cmd, const void *data, - size_t data_size) + uint8_t cmd, const struct iovec *data_sg, + size_t data_num) { const struct virtio_net_ctrl_hdr ctrl = { .class = class, .cmd = cmd, }; + size_t data_size = iov_size(data_sg, data_num); assert(data_size < vhost_vdpa_net_cvq_cmd_page_len() - sizeof(ctrl)); + /* pack the CVQ command header */ memcpy(s->cvq_cmd_out_buffer, &ctrl, sizeof(ctrl)); - memcpy(s->cvq_cmd_out_buffer + sizeof(ctrl), data, data_size); - return vhost_vdpa_net_cvq_add(s, sizeof(ctrl) + data_size, + /* pack the CVQ command command-specific-data */ + iov_to_buf(data_sg, data_num, 0, + s->cvq_cmd_out_buffer + sizeof(ctrl), data_size); + + return vhost_vdpa_net_cvq_add(s, data_size + sizeof(ctrl), sizeof(virtio_net_ctrl_ack)); } static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n) { if (virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_MAC_ADDR)) { + const struct iovec data = { + .iov_base = (void *)n->mac, + .iov_len = sizeof(n->mac), + }; ssize_t dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MAC, VIRTIO_NET_CTRL_MAC_ADDR_SET, - n->mac, sizeof(n->mac)); + &data, 1); if (unlikely(dev_written < 0)) { return dev_written; } @@ -671,9 +680,13 @@ static int vhost_vdpa_net_load_mq(VhostVDPAState *s, } mq.virtqueue_pairs = cpu_to_le16(n->curr_queue_pairs); + const struct iovec data = { + .iov_base = &mq, + .iov_len = sizeof(mq), + }; dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_MQ, - VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, &mq, - sizeof(mq)); + VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET, + &data, 1); if (unlikely(dev_written < 0)) { return dev_written; } @@ -712,9 +725,13 @@ static int vhost_vdpa_net_load_offloads(VhostVDPAState *s, } offloads = cpu_to_le64(n->curr_guest_offloads); + const struct iovec data = { + .iov_base = &offloads, + .iov_len = sizeof(offloads), + }; dev_written = vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_GUEST_OFFLOADS, VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET, - &offloads, sizeof(offloads)); + &data, 1); if (unlikely(dev_written < 0)) { return dev_written; } From 0ddcecb8f2e14b5aa6781ae3403ffa2ac4b25197 Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Fri, 7 Jul 2023 23:27:29 +0800 Subject: [PATCH 59/66] vdpa: Restore MAC address filtering state This patch refactors vhost_vdpa_net_load_mac() to restore the MAC address filtering state at device's startup. Signed-off-by: Hawkins Jiawei Message-Id: <4b9550c14bc8c98c8f48e04dbf3d3ac41489d3fd.1688743107.git.yin31149@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 52 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 52 insertions(+) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index e192217a96..126afbc7a5 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -666,6 +666,58 @@ static int vhost_vdpa_net_load_mac(VhostVDPAState *s, const VirtIONet *n) } } + /* + * According to VirtIO standard, "The device MUST have an + * empty MAC filtering table on reset.". + * + * Therefore, there is no need to send this CVQ command if the + * driver also sets an empty MAC filter table, which aligns with + * the device's defaults. + * + * Note that the device's defaults can mismatch the driver's + * configuration only at live migration. + */ + if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_RX) || + n->mac_table.in_use == 0) { + return 0; + } + + uint32_t uni_entries = n->mac_table.first_multi, + uni_macs_size = uni_entries * ETH_ALEN, + mul_entries = n->mac_table.in_use - uni_entries, + mul_macs_size = mul_entries * ETH_ALEN; + struct virtio_net_ctrl_mac uni = { + .entries = cpu_to_le32(uni_entries), + }; + struct virtio_net_ctrl_mac mul = { + .entries = cpu_to_le32(mul_entries), + }; + const struct iovec data[] = { + { + .iov_base = &uni, + .iov_len = sizeof(uni), + }, { + .iov_base = n->mac_table.macs, + .iov_len = uni_macs_size, + }, { + .iov_base = &mul, + .iov_len = sizeof(mul), + }, { + .iov_base = &n->mac_table.macs[uni_macs_size], + .iov_len = mul_macs_size, + }, + }; + ssize_t dev_written = vhost_vdpa_net_load_cmd(s, + VIRTIO_NET_CTRL_MAC, + VIRTIO_NET_CTRL_MAC_TABLE_SET, + data, ARRAY_SIZE(data)); + if (unlikely(dev_written < 0)) { + return dev_written; + } + if (*s->status != VIRTIO_NET_OK) { + return -EIO; + } + return 0; } From b12f907eeaad7bd4d282502bd7f7773f95e1ee96 Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Fri, 7 Jul 2023 23:27:30 +0800 Subject: [PATCH 60/66] vdpa: Restore packet receive filtering state relative with _F_CTRL_RX feature This patch introduces vhost_vdpa_net_load_rx_mode() and vhost_vdpa_net_load_rx() to restore the packet receive filtering state in relation to VIRTIO_NET_F_CTRL_RX feature at device's startup. Signed-off-by: Hawkins Jiawei Message-Id: <804cedac93e19ba3b810d52b274ca5ec11469f09.1688743107.git.yin31149@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 85 insertions(+) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 126afbc7a5..f299e446b0 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -794,6 +794,87 @@ static int vhost_vdpa_net_load_offloads(VhostVDPAState *s, return 0; } +static int vhost_vdpa_net_load_rx_mode(VhostVDPAState *s, + uint8_t cmd, + uint8_t on) +{ + const struct iovec data = { + .iov_base = &on, + .iov_len = sizeof(on), + }; + return vhost_vdpa_net_load_cmd(s, VIRTIO_NET_CTRL_RX, + cmd, &data, 1); +} + +static int vhost_vdpa_net_load_rx(VhostVDPAState *s, + const VirtIONet *n) +{ + ssize_t dev_written; + + if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_RX)) { + return 0; + } + + /* + * According to virtio_net_reset(), device turns promiscuous mode + * on by default. + * + * Addtionally, according to VirtIO standard, "Since there are + * no guarantees, it can use a hash filter or silently switch to + * allmulti or promiscuous mode if it is given too many addresses.". + * QEMU marks `n->mac_table.uni_overflow` if guest sets too many + * non-multicast MAC addresses, indicating that promiscuous mode + * should be enabled. + * + * Therefore, QEMU should only send this CVQ command if the + * `n->mac_table.uni_overflow` is not marked and `n->promisc` is off, + * which sets promiscuous mode on, different from the device's defaults. + * + * Note that the device's defaults can mismatch the driver's + * configuration only at live migration. + */ + if (!n->mac_table.uni_overflow && !n->promisc) { + dev_written = vhost_vdpa_net_load_rx_mode(s, + VIRTIO_NET_CTRL_RX_PROMISC, 0); + if (unlikely(dev_written < 0)) { + return dev_written; + } + if (*s->status != VIRTIO_NET_OK) { + return -EIO; + } + } + + /* + * According to virtio_net_reset(), device turns all-multicast mode + * off by default. + * + * According to VirtIO standard, "Since there are no guarantees, + * it can use a hash filter or silently switch to allmulti or + * promiscuous mode if it is given too many addresses.". QEMU marks + * `n->mac_table.multi_overflow` if guest sets too many + * non-multicast MAC addresses. + * + * Therefore, QEMU should only send this CVQ command if the + * `n->mac_table.multi_overflow` is marked or `n->allmulti` is on, + * which sets all-multicast mode on, different from the device's defaults. + * + * Note that the device's defaults can mismatch the driver's + * configuration only at live migration. + */ + if (n->mac_table.multi_overflow || n->allmulti) { + dev_written = vhost_vdpa_net_load_rx_mode(s, + VIRTIO_NET_CTRL_RX_ALLMULTI, 1); + if (unlikely(dev_written < 0)) { + return dev_written; + } + if (*s->status != VIRTIO_NET_OK) { + return -EIO; + } + } + + return 0; +} + static int vhost_vdpa_net_load(NetClientState *nc) { VhostVDPAState *s = DO_UPCAST(VhostVDPAState, nc, nc); @@ -820,6 +901,10 @@ static int vhost_vdpa_net_load(NetClientState *nc) if (unlikely(r)) { return r; } + r = vhost_vdpa_net_load_rx(s, n); + if (unlikely(r)) { + return r; + } return 0; } From b77a5f22aca4cef2dc893ea544841bdd95dc83ba Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Fri, 7 Jul 2023 23:27:31 +0800 Subject: [PATCH 61/66] vhost: Fix false positive out-of-bounds QEMU uses vhost_svq_translate_addr() to translate addresses between the QEMU's virtual address and the SVQ IOVA. In order to validate this translation, QEMU checks whether the translated range falls within the mapped range. Yet the problem is that, the value of `needle_last`, which is calculated by `needle.translated_addr + iovec[i].iov_len`, should represent the exclusive boundary of the translated range, rather than the last inclusive addresses of the range. Consequently, QEMU fails the check when the translated range matches the size of the mapped range. This patch solves this problem by fixing the `needle_last` value to the last inclusive address of the translated range. Note that this bug cannot be triggered at the moment, because QEMU is unable to translate such a big range due to the truncation of the CVQ command in vhost_vdpa_net_handle_ctrl_avail(). Fixes: 34e3c94eda ("vdpa: Add custom IOTLB translations to SVQ") Signed-off-by: Hawkins Jiawei Message-Id: Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-shadow-virtqueue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hw/virtio/vhost-shadow-virtqueue.c b/hw/virtio/vhost-shadow-virtqueue.c index 1b1d85306c..49e5aed931 100644 --- a/hw/virtio/vhost-shadow-virtqueue.c +++ b/hw/virtio/vhost-shadow-virtqueue.c @@ -111,7 +111,7 @@ static bool vhost_svq_translate_addr(const VhostShadowVirtqueue *svq, addrs[i] = map->iova + off; needle_last = int128_add(int128_make64(needle.translated_addr), - int128_make64(iovec[i].iov_len)); + int128_makes64(iovec[i].iov_len - 1)); map_last = int128_make64(map->translated_addr + map->size); if (unlikely(int128_gt(needle_last, map_last))) { qemu_log_mask(LOG_GUEST_ERROR, From 45c4101828a483b87863f2e2ba6d98faf0ff7d92 Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Fri, 7 Jul 2023 23:27:32 +0800 Subject: [PATCH 62/66] vdpa: Accessing CVQ header through its structure We can access the CVQ header through `struct virtio_net_ctrl_hdr`, instead of accessing it through a `uint8_t` pointer, which improves the code's readability and maintainability. Signed-off-by: Hawkins Jiawei Message-Id: Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index f299e446b0..eee4b2a09e 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -934,6 +934,7 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, { VhostVDPAState *s = opaque; size_t in_len; + const struct virtio_net_ctrl_hdr *ctrl; virtio_net_ctrl_ack status = VIRTIO_NET_ERR; /* Out buffer sent to both the vdpa device and the device model */ struct iovec out = { @@ -949,7 +950,9 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0, s->cvq_cmd_out_buffer, vhost_vdpa_net_cvq_cmd_len()); - if (*(uint8_t *)s->cvq_cmd_out_buffer == VIRTIO_NET_CTRL_ANNOUNCE) { + + ctrl = s->cvq_cmd_out_buffer; + if (ctrl->class == VIRTIO_NET_CTRL_ANNOUNCE) { /* * Guest announce capability is emulated by qemu, so don't forward to * the device. From fee364e4b1ad82a78929f3eed114321f77b6f916 Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Fri, 7 Jul 2023 23:27:33 +0800 Subject: [PATCH 63/66] vdpa: Avoid forwarding large CVQ command failures Due to the size limitation of the out buffer sent to the vdpa device, which is determined by vhost_vdpa_net_cvq_cmd_len(), excessive CVQ command is truncated in QEMU. As a result, the vdpa device rejects this flawd CVQ command. However, the problem is that, the VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command has a variable length, which may exceed vhost_vdpa_net_cvq_cmd_len() if the guest sets more than `MAC_TABLE_ENTRIES` MAC addresses for the filter table. This patch solves this problem by following steps: * Increase the out buffer size to vhost_vdpa_net_cvq_cmd_page_len(), which represents the size of the buffer that is allocated and mmaped. This ensures that everything works correctly as long as the guest sets fewer than `(vhost_vdpa_net_cvq_cmd_page_len() - sizeof(struct virtio_net_ctrl_hdr) - 2 * sizeof(struct virtio_net_ctrl_mac)) / ETH_ALEN` MAC addresses. Considering the highly unlikely scenario for the guest setting more than that number of MAC addresses for the filter table, this should work fine for the majority of cases. * If the CVQ command exceeds vhost_vdpa_net_cvq_cmd_page_len(), instead of directly sending this CVQ command, QEMU should send a VIRTIO_NET_CTRL_RX_PROMISC CVQ command to vdpa device. Addtionally, a fake VIRTIO_NET_CTRL_MAC_TABLE_SET command including (`MAC_TABLE_ENTRIES` + 1) non-multicast MAC addresses and (`MAC_TABLE_ENTRIES` + 1) multicast MAC addresses should be provided to the device model. By doing so, the vdpa device turns promiscuous mode on, aligning with the VirtIO standard. The device model marks `n->mac_table.uni_overflow` and `n->mac_table.multi_overflow`, which aligns with the state of the vdpa device. Note that the bug cannot be triggered at the moment, since VIRTIO_NET_F_CTRL_RX feature is not enabled for SVQ. Fixes: 7a7f87e94c ("vdpa: Move command buffers map to start of net device") Signed-off-by: Hawkins Jiawei Message-Id: <267e15e4eed2d7aeb9887f193da99a13d22a2f1d.1688743107.git.yin31149@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 162 ++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 161 insertions(+), 1 deletion(-) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index eee4b2a09e..7610589a60 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -922,6 +922,148 @@ static NetClientInfo net_vhost_vdpa_cvq_info = { .check_peer_type = vhost_vdpa_check_peer_type, }; +/* + * Forward the excessive VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command to + * vdpa device. + * + * Considering that QEMU cannot send the entire filter table to the + * vdpa device, it should send the VIRTIO_NET_CTRL_RX_PROMISC CVQ + * command to enable promiscuous mode to receive all packets, + * according to VirtIO standard, "Since there are no guarantees, + * it can use a hash filter or silently switch to allmulti or + * promiscuous mode if it is given too many addresses.". + * + * Since QEMU ignores MAC addresses beyond `MAC_TABLE_ENTRIES` and + * marks `n->mac_table.x_overflow` accordingly, it should have + * the same effect on the device model to receive + * (`MAC_TABLE_ENTRIES` + 1) or more non-multicast MAC addresses. + * The same applies to multicast MAC addresses. + * + * Therefore, QEMU can provide the device model with a fake + * VIRTIO_NET_CTRL_MAC_TABLE_SET command with (`MAC_TABLE_ENTRIES` + 1) + * non-multicast MAC addresses and (`MAC_TABLE_ENTRIES` + 1) multicast + * MAC addresses. This ensures that the device model marks + * `n->mac_table.uni_overflow` and `n->mac_table.multi_overflow`, + * allowing all packets to be received, which aligns with the + * state of the vdpa device. + */ +static int vhost_vdpa_net_excessive_mac_filter_cvq_add(VhostVDPAState *s, + VirtQueueElement *elem, + struct iovec *out) +{ + struct virtio_net_ctrl_mac mac_data, *mac_ptr; + struct virtio_net_ctrl_hdr *hdr_ptr; + uint32_t cursor; + ssize_t r; + + /* parse the non-multicast MAC address entries from CVQ command */ + cursor = sizeof(*hdr_ptr); + r = iov_to_buf(elem->out_sg, elem->out_num, cursor, + &mac_data, sizeof(mac_data)); + if (unlikely(r != sizeof(mac_data))) { + /* + * If the CVQ command is invalid, we should simulate the vdpa device + * to reject the VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command + */ + *s->status = VIRTIO_NET_ERR; + return sizeof(*s->status); + } + cursor += sizeof(mac_data) + le32_to_cpu(mac_data.entries) * ETH_ALEN; + + /* parse the multicast MAC address entries from CVQ command */ + r = iov_to_buf(elem->out_sg, elem->out_num, cursor, + &mac_data, sizeof(mac_data)); + if (r != sizeof(mac_data)) { + /* + * If the CVQ command is invalid, we should simulate the vdpa device + * to reject the VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command + */ + *s->status = VIRTIO_NET_ERR; + return sizeof(*s->status); + } + cursor += sizeof(mac_data) + le32_to_cpu(mac_data.entries) * ETH_ALEN; + + /* validate the CVQ command */ + if (iov_size(elem->out_sg, elem->out_num) != cursor) { + /* + * If the CVQ command is invalid, we should simulate the vdpa device + * to reject the VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command + */ + *s->status = VIRTIO_NET_ERR; + return sizeof(*s->status); + } + + /* + * According to VirtIO standard, "Since there are no guarantees, + * it can use a hash filter or silently switch to allmulti or + * promiscuous mode if it is given too many addresses.". + * + * Therefore, considering that QEMU is unable to send the entire + * filter table to the vdpa device, it should send the + * VIRTIO_NET_CTRL_RX_PROMISC CVQ command to enable promiscuous mode + */ + r = vhost_vdpa_net_load_rx_mode(s, VIRTIO_NET_CTRL_RX_PROMISC, 1); + if (unlikely(r < 0)) { + return r; + } + if (*s->status != VIRTIO_NET_OK) { + return sizeof(*s->status); + } + + /* + * QEMU should also send a fake VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ + * command to the device model, including (`MAC_TABLE_ENTRIES` + 1) + * non-multicast MAC addresses and (`MAC_TABLE_ENTRIES` + 1) + * multicast MAC addresses. + * + * By doing so, the device model can mark `n->mac_table.uni_overflow` + * and `n->mac_table.multi_overflow`, enabling all packets to be + * received, which aligns with the state of the vdpa device. + */ + cursor = 0; + uint32_t fake_uni_entries = MAC_TABLE_ENTRIES + 1, + fake_mul_entries = MAC_TABLE_ENTRIES + 1, + fake_cvq_size = sizeof(struct virtio_net_ctrl_hdr) + + sizeof(mac_data) + fake_uni_entries * ETH_ALEN + + sizeof(mac_data) + fake_mul_entries * ETH_ALEN; + + assert(fake_cvq_size < vhost_vdpa_net_cvq_cmd_page_len()); + out->iov_len = fake_cvq_size; + + /* pack the header for fake CVQ command */ + hdr_ptr = out->iov_base + cursor; + hdr_ptr->class = VIRTIO_NET_CTRL_MAC; + hdr_ptr->cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET; + cursor += sizeof(*hdr_ptr); + + /* + * Pack the non-multicast MAC addresses part for fake CVQ command. + * + * According to virtio_net_handle_mac(), QEMU doesn't verify the MAC + * addresses provieded in CVQ command. Therefore, only the entries + * field need to be prepared in the CVQ command. + */ + mac_ptr = out->iov_base + cursor; + mac_ptr->entries = cpu_to_le32(fake_uni_entries); + cursor += sizeof(*mac_ptr) + fake_uni_entries * ETH_ALEN; + + /* + * Pack the multicast MAC addresses part for fake CVQ command. + * + * According to virtio_net_handle_mac(), QEMU doesn't verify the MAC + * addresses provieded in CVQ command. Therefore, only the entries + * field need to be prepared in the CVQ command. + */ + mac_ptr = out->iov_base + cursor; + mac_ptr->entries = cpu_to_le32(fake_mul_entries); + + /* + * Simulating QEMU poll a vdpa device used buffer + * for VIRTIO_NET_CTRL_MAC_TABLE_SET CVQ command + */ + return sizeof(*s->status); +} + /** * Validate and copy control virtqueue commands. * @@ -949,7 +1091,7 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, out.iov_len = iov_to_buf(elem->out_sg, elem->out_num, 0, s->cvq_cmd_out_buffer, - vhost_vdpa_net_cvq_cmd_len()); + vhost_vdpa_net_cvq_cmd_page_len()); ctrl = s->cvq_cmd_out_buffer; if (ctrl->class == VIRTIO_NET_CTRL_ANNOUNCE) { @@ -959,6 +1101,24 @@ static int vhost_vdpa_net_handle_ctrl_avail(VhostShadowVirtqueue *svq, */ dev_written = sizeof(status); *s->status = VIRTIO_NET_OK; + } else if (unlikely(ctrl->class == VIRTIO_NET_CTRL_MAC && + ctrl->cmd == VIRTIO_NET_CTRL_MAC_TABLE_SET && + iov_size(elem->out_sg, elem->out_num) > out.iov_len)) { + /* + * Due to the size limitation of the out buffer sent to the vdpa device, + * which is determined by vhost_vdpa_net_cvq_cmd_page_len(), excessive + * MAC addresses set by the driver for the filter table can cause + * truncation of the CVQ command in QEMU. As a result, the vdpa device + * rejects the flawed CVQ command. + * + * Therefore, QEMU must handle this situation instead of sending + * the CVQ command direclty. + */ + dev_written = vhost_vdpa_net_excessive_mac_filter_cvq_add(s, elem, + &out); + if (unlikely(dev_written < 0)) { + goto out; + } } else { dev_written = vhost_vdpa_net_cvq_add(s, out.iov_len, sizeof(status)); if (unlikely(dev_written < 0)) { From ea6eec497921f9c0db259445fc44429a743d0665 Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Fri, 7 Jul 2023 23:27:34 +0800 Subject: [PATCH 64/66] vdpa: Allow VIRTIO_NET_F_CTRL_RX in SVQ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable SVQ with VIRTIO_NET_F_CTRL_RX feature. Signed-off-by: Hawkins Jiawei Acked-by: Eugenio Pérez Message-Id: <5d6173a6d7c4c514c98362b404c019f52d73b06c.1688743107.git.yin31149@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 7610589a60..5cd671bfb9 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -110,6 +110,7 @@ static const uint64_t vdpa_svq_device_features = BIT_ULL(VIRTIO_NET_F_MRG_RXBUF) | BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | + BIT_ULL(VIRTIO_NET_F_CTRL_RX) | BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) | From 4fd180c7bb476d0793f3849ae4e6c0f932b6e3ab Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Sat, 8 Jul 2023 17:24:51 +0800 Subject: [PATCH 65/66] vdpa: Restore packet receive filtering state relative with _F_CTRL_RX_EXTRA feature This patch refactors vhost_vdpa_net_load_rx() to restore the packet receive filtering state in relation to VIRTIO_NET_F_CTRL_RX_EXTRA feature at device's startup. Signed-off-by: Hawkins Jiawei Message-Id: Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 88 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 88 insertions(+) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 5cd671bfb9..0c1c0760a7 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -873,6 +873,94 @@ static int vhost_vdpa_net_load_rx(VhostVDPAState *s, } } + if (!virtio_vdev_has_feature(&n->parent_obj, VIRTIO_NET_F_CTRL_RX_EXTRA)) { + return 0; + } + + /* + * According to virtio_net_reset(), device turns all-unicast mode + * off by default. + * + * Therefore, QEMU should only send this CVQ command if the driver + * sets all-unicast mode on, different from the device's defaults. + * + * Note that the device's defaults can mismatch the driver's + * configuration only at live migration. + */ + if (n->alluni) { + dev_written = vhost_vdpa_net_load_rx_mode(s, + VIRTIO_NET_CTRL_RX_ALLUNI, 1); + if (dev_written < 0) { + return dev_written; + } + if (*s->status != VIRTIO_NET_OK) { + return -EIO; + } + } + + /* + * According to virtio_net_reset(), device turns non-multicast mode + * off by default. + * + * Therefore, QEMU should only send this CVQ command if the driver + * sets non-multicast mode on, different from the device's defaults. + * + * Note that the device's defaults can mismatch the driver's + * configuration only at live migration. + */ + if (n->nomulti) { + dev_written = vhost_vdpa_net_load_rx_mode(s, + VIRTIO_NET_CTRL_RX_NOMULTI, 1); + if (dev_written < 0) { + return dev_written; + } + if (*s->status != VIRTIO_NET_OK) { + return -EIO; + } + } + + /* + * According to virtio_net_reset(), device turns non-unicast mode + * off by default. + * + * Therefore, QEMU should only send this CVQ command if the driver + * sets non-unicast mode on, different from the device's defaults. + * + * Note that the device's defaults can mismatch the driver's + * configuration only at live migration. + */ + if (n->nouni) { + dev_written = vhost_vdpa_net_load_rx_mode(s, + VIRTIO_NET_CTRL_RX_NOUNI, 1); + if (dev_written < 0) { + return dev_written; + } + if (*s->status != VIRTIO_NET_OK) { + return -EIO; + } + } + + /* + * According to virtio_net_reset(), device turns non-broadcast mode + * off by default. + * + * Therefore, QEMU should only send this CVQ command if the driver + * sets non-broadcast mode on, different from the device's defaults. + * + * Note that the device's defaults can mismatch the driver's + * configuration only at live migration. + */ + if (n->nobcast) { + dev_written = vhost_vdpa_net_load_rx_mode(s, + VIRTIO_NET_CTRL_RX_NOBCAST, 1); + if (dev_written < 0) { + return dev_written; + } + if (*s->status != VIRTIO_NET_OK) { + return -EIO; + } + } + return 0; } From d669b7bba22d45cb9e5926d63541e52bde1655dd Mon Sep 17 00:00:00 2001 From: Hawkins Jiawei Date: Sat, 8 Jul 2023 17:24:52 +0800 Subject: [PATCH 66/66] vdpa: Allow VIRTIO_NET_F_CTRL_RX_EXTRA in SVQ MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable SVQ with VIRTIO_NET_F_CTRL_RX_EXTRA feature. Signed-off-by: Hawkins Jiawei Acked-by: Eugenio Pérez Message-Id: <15ecc49975f9b8d1316ed4296879564a18abf31e.1688797728.git.yin31149@gmail.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- net/vhost-vdpa.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 0c1c0760a7..9795306742 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -111,6 +111,7 @@ static const uint64_t vdpa_svq_device_features = BIT_ULL(VIRTIO_NET_F_STATUS) | BIT_ULL(VIRTIO_NET_F_CTRL_VQ) | BIT_ULL(VIRTIO_NET_F_CTRL_RX) | + BIT_ULL(VIRTIO_NET_F_CTRL_RX_EXTRA) | BIT_ULL(VIRTIO_NET_F_MQ) | BIT_ULL(VIRTIO_F_ANY_LAYOUT) | BIT_ULL(VIRTIO_NET_F_CTRL_MAC_ADDR) |