From df72f01104ae9a68f356ae412776b209f04d27cc Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 25 Oct 2023 11:18:41 -0600 Subject: [PATCH 01/21] virtio: rng: Check notifier helpers for VIRTIO_CONFIG_IRQ_IDX MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the driver doesn't support interrupts, we must return early when index is set to VIRTIO_CONFIG_IRQ_IDX. Basically the same thing Viresh did for "91208dd297f2 virtio: i2c: Check notifier helpers for VIRTIO_CONFIG_IRQ_IDX". Fixes: 544f0278afca ("virtio: introduce macro VIRTIO_CONFIG_IRQ_IDX") Signed-off-by: Mathieu Poirier Message-Id: <20231025171841.3379663-1-mathieu.poirier@linaro.org> Tested-by: Leo Yan Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-user-rng.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/hw/virtio/vhost-user-rng.c b/hw/virtio/vhost-user-rng.c index efc54cd3fb..24ac1a22c8 100644 --- a/hw/virtio/vhost-user-rng.c +++ b/hw/virtio/vhost-user-rng.c @@ -129,6 +129,14 @@ static void vu_rng_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask) { VHostUserRNG *rng = VHOST_USER_RNG(vdev); + /* + * We don't support interrupts, return early if index is set to + * VIRTIO_CONFIG_IRQ_IDX. + */ + if (idx == VIRTIO_CONFIG_IRQ_IDX) { + return; + } + vhost_virtqueue_mask(&rng->vhost_dev, vdev, idx, mask); } @@ -136,6 +144,14 @@ static bool vu_rng_guest_notifier_pending(VirtIODevice *vdev, int idx) { VHostUserRNG *rng = VHOST_USER_RNG(vdev); + /* + * We don't support interrupts, return early if index is set to + * VIRTIO_CONFIG_IRQ_IDX. + */ + if (idx == VIRTIO_CONFIG_IRQ_IDX) { + return false; + } + return vhost_virtqueue_pending(&rng->vhost_dev, idx); } From c40db4ba60af354cccaddbbae5c7125b4b97c79c Mon Sep 17 00:00:00 2001 From: Zhao Liu Date: Tue, 28 Nov 2023 00:02:02 +0800 Subject: [PATCH 02/21] tests: bios-tables-test: Rename smbios type 4 related test functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In fact, type4-count, core-count, core-count2, thread-count and thread-count2 are tested with KVM not TCG. Rename these test functions to reflect KVM base instead of TCG. Signed-off-by: Zhao Liu Message-Id: <20231127160202.1037290-1-zhao1.liu@linux.intel.com> Reviewed-by: Philippe Mathieu-Daudé Reviewed-by: Igor Mammedov Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- tests/qtest/bios-tables-test.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c index fe6a9a8563..21811a1ab5 100644 --- a/tests/qtest/bios-tables-test.c +++ b/tests/qtest/bios-tables-test.c @@ -1015,7 +1015,7 @@ static void test_acpi_q35_tcg(void) free_test_data(&data); } -static void test_acpi_q35_tcg_type4_count(void) +static void test_acpi_q35_kvm_type4_count(void) { test_data data = { .machine = MACHINE_Q35, @@ -1031,7 +1031,7 @@ static void test_acpi_q35_tcg_type4_count(void) free_test_data(&data); } -static void test_acpi_q35_tcg_core_count(void) +static void test_acpi_q35_kvm_core_count(void) { test_data data = { .machine = MACHINE_Q35, @@ -1048,7 +1048,7 @@ static void test_acpi_q35_tcg_core_count(void) free_test_data(&data); } -static void test_acpi_q35_tcg_core_count2(void) +static void test_acpi_q35_kvm_core_count2(void) { test_data data = { .machine = MACHINE_Q35, @@ -1065,7 +1065,7 @@ static void test_acpi_q35_tcg_core_count2(void) free_test_data(&data); } -static void test_acpi_q35_tcg_thread_count(void) +static void test_acpi_q35_kvm_thread_count(void) { test_data data = { .machine = MACHINE_Q35, @@ -1082,7 +1082,7 @@ static void test_acpi_q35_tcg_thread_count(void) free_test_data(&data); } -static void test_acpi_q35_tcg_thread_count2(void) +static void test_acpi_q35_kvm_thread_count2(void) { test_data data = { .machine = MACHINE_Q35, @@ -2262,15 +2262,15 @@ int main(int argc, char *argv[]) qtest_add_func("acpi/q35/kvm/xapic", test_acpi_q35_kvm_xapic); qtest_add_func("acpi/q35/kvm/dmar", test_acpi_q35_kvm_dmar); qtest_add_func("acpi/q35/type4-count", - test_acpi_q35_tcg_type4_count); + test_acpi_q35_kvm_type4_count); qtest_add_func("acpi/q35/core-count", - test_acpi_q35_tcg_core_count); + test_acpi_q35_kvm_core_count); qtest_add_func("acpi/q35/core-count2", - test_acpi_q35_tcg_core_count2); + test_acpi_q35_kvm_core_count2); qtest_add_func("acpi/q35/thread-count", - test_acpi_q35_tcg_thread_count); + test_acpi_q35_kvm_thread_count); qtest_add_func("acpi/q35/thread-count2", - test_acpi_q35_tcg_thread_count2); + test_acpi_q35_kvm_thread_count2); } if (qtest_has_device("virtio-iommu-pci")) { qtest_add_func("acpi/q35/viot", test_acpi_q35_viot); From 9aad781959d770eb2479596a23e471cd82bd4e5b Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 4 Dec 2023 17:16:17 -0600 Subject: [PATCH 03/21] vhost: Add worker backend callouts This adds the vhost backend callouts for the worker ioctls added in the 6.4 linux kernel commit: c1ecd8e95007 ("vhost: allow userspace to create workers") Signed-off-by: Mike Christie Reviewed-by: Stefano Garzarella Reviewed-by: Stefan Hajnoczi Message-Id: <20231204231618.21962-2-michael.christie@oracle.com> Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-backend.c | 28 ++++++++++++++++++++++++++++ include/hw/virtio/vhost-backend.h | 14 ++++++++++++++ 2 files changed, 42 insertions(+) diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c index 17f3fc6a08..833804dd40 100644 --- a/hw/virtio/vhost-backend.c +++ b/hw/virtio/vhost-backend.c @@ -158,6 +158,30 @@ static int vhost_kernel_set_vring_busyloop_timeout(struct vhost_dev *dev, return vhost_kernel_call(dev, VHOST_SET_VRING_BUSYLOOP_TIMEOUT, s); } +static int vhost_kernel_new_worker(struct vhost_dev *dev, + struct vhost_worker_state *worker) +{ + return vhost_kernel_call(dev, VHOST_NEW_WORKER, worker); +} + +static int vhost_kernel_free_worker(struct vhost_dev *dev, + struct vhost_worker_state *worker) +{ + return vhost_kernel_call(dev, VHOST_FREE_WORKER, worker); +} + +static int vhost_kernel_attach_vring_worker(struct vhost_dev *dev, + struct vhost_vring_worker *worker) +{ + return vhost_kernel_call(dev, VHOST_ATTACH_VRING_WORKER, worker); +} + +static int vhost_kernel_get_vring_worker(struct vhost_dev *dev, + struct vhost_vring_worker *worker) +{ + return vhost_kernel_call(dev, VHOST_GET_VRING_WORKER, worker); +} + static int vhost_kernel_set_features(struct vhost_dev *dev, uint64_t features) { @@ -313,6 +337,10 @@ const VhostOps kernel_ops = { .vhost_set_vring_err = vhost_kernel_set_vring_err, .vhost_set_vring_busyloop_timeout = vhost_kernel_set_vring_busyloop_timeout, + .vhost_get_vring_worker = vhost_kernel_get_vring_worker, + .vhost_attach_vring_worker = vhost_kernel_attach_vring_worker, + .vhost_new_worker = vhost_kernel_new_worker, + .vhost_free_worker = vhost_kernel_free_worker, .vhost_set_features = vhost_kernel_set_features, .vhost_get_features = vhost_kernel_get_features, .vhost_set_backend_cap = vhost_kernel_set_backend_cap, diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h index a86d103f82..70c2e8ffee 100644 --- a/include/hw/virtio/vhost-backend.h +++ b/include/hw/virtio/vhost-backend.h @@ -45,6 +45,8 @@ struct vhost_memory; struct vhost_vring_file; struct vhost_vring_state; struct vhost_vring_addr; +struct vhost_vring_worker; +struct vhost_worker_state; struct vhost_scsi_target; struct vhost_iotlb_msg; struct vhost_virtqueue; @@ -85,6 +87,14 @@ typedef int (*vhost_set_vring_err_op)(struct vhost_dev *dev, struct vhost_vring_file *file); typedef int (*vhost_set_vring_busyloop_timeout_op)(struct vhost_dev *dev, struct vhost_vring_state *r); +typedef int (*vhost_attach_vring_worker_op)(struct vhost_dev *dev, + struct vhost_vring_worker *worker); +typedef int (*vhost_get_vring_worker_op)(struct vhost_dev *dev, + struct vhost_vring_worker *worker); +typedef int (*vhost_new_worker_op)(struct vhost_dev *dev, + struct vhost_worker_state *worker); +typedef int (*vhost_free_worker_op)(struct vhost_dev *dev, + struct vhost_worker_state *worker); typedef int (*vhost_set_features_op)(struct vhost_dev *dev, uint64_t features); typedef int (*vhost_get_features_op)(struct vhost_dev *dev, @@ -172,6 +182,10 @@ typedef struct VhostOps { vhost_set_vring_call_op vhost_set_vring_call; vhost_set_vring_err_op vhost_set_vring_err; vhost_set_vring_busyloop_timeout_op vhost_set_vring_busyloop_timeout; + vhost_new_worker_op vhost_new_worker; + vhost_free_worker_op vhost_free_worker; + vhost_get_vring_worker_op vhost_get_vring_worker; + vhost_attach_vring_worker_op vhost_attach_vring_worker; vhost_set_features_op vhost_set_features; vhost_get_features_op vhost_get_features; vhost_set_backend_cap_op vhost_set_backend_cap; From 51396556f0927c3d202f9903db5aa5aa8d4cbd2c Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Mon, 4 Dec 2023 17:16:18 -0600 Subject: [PATCH 04/21] vhost-scsi: Add support for a worker thread per virtqueue This adds support for vhost-scsi to be able to create a worker thread per virtqueue. Right now for vhost-net we get a worker thread per tx/rx virtqueue pair which scales nicely as we add more virtqueues and CPUs, but for scsi we get the single worker thread that's shared by all virtqueues. When trying to send IO to more than 2 virtqueues the single thread becomes a bottlneck. This patch adds a new setting, worker_per_virtqueue, which can be set to: false: Existing behavior where we get the single worker thread. true: Create a worker per IO virtqueue. Signed-off-by: Mike Christie Reviewed-by: Stefan Hajnoczi Message-Id: <20231204231618.21962-3-michael.christie@oracle.com> Reviewed-by: Stefan Hajnoczi Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella --- hw/scsi/vhost-scsi.c | 62 +++++++++++++++++++++++++++++++++ include/hw/virtio/virtio-scsi.h | 1 + 2 files changed, 63 insertions(+) diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c index 3126df9e1d..08aa7534df 100644 --- a/hw/scsi/vhost-scsi.c +++ b/hw/scsi/vhost-scsi.c @@ -165,6 +165,59 @@ static const VMStateDescription vmstate_virtio_vhost_scsi = { .pre_save = vhost_scsi_pre_save, }; +static int vhost_scsi_set_workers(VHostSCSICommon *vsc, bool per_virtqueue) +{ + struct vhost_dev *dev = &vsc->dev; + struct vhost_vring_worker vq_worker; + struct vhost_worker_state worker; + int i, ret; + + /* Use default worker */ + if (!per_virtqueue || dev->nvqs == VHOST_SCSI_VQ_NUM_FIXED + 1) { + return 0; + } + + /* + * ctl/evt share the first worker since it will be rare for them + * to send cmds while IO is running. + */ + for (i = VHOST_SCSI_VQ_NUM_FIXED + 1; i < dev->nvqs; i++) { + memset(&worker, 0, sizeof(worker)); + + ret = dev->vhost_ops->vhost_new_worker(dev, &worker); + if (ret == -ENOTTY) { + /* + * worker ioctls are not implemented so just ignore and + * and continue device setup. + */ + warn_report("vhost-scsi: Backend supports a single worker. " + "Ignoring worker_per_virtqueue=true setting."); + ret = 0; + break; + } else if (ret) { + break; + } + + memset(&vq_worker, 0, sizeof(vq_worker)); + vq_worker.worker_id = worker.worker_id; + vq_worker.index = i; + + ret = dev->vhost_ops->vhost_attach_vring_worker(dev, &vq_worker); + if (ret == -ENOTTY) { + /* + * It's a bug for the kernel to have supported the worker creation + * ioctl but not attach. + */ + dev->vhost_ops->vhost_free_worker(dev, &worker); + break; + } else if (ret) { + break; + } + } + + return ret; +} + static void vhost_scsi_realize(DeviceState *dev, Error **errp) { VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); @@ -232,6 +285,13 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp) goto free_vqs; } + ret = vhost_scsi_set_workers(vsc, vs->conf.worker_per_virtqueue); + if (ret < 0) { + error_setg(errp, "vhost-scsi: vhost worker setup failed: %s", + strerror(-ret)); + goto free_vqs; + } + /* At present, channel and lun both are 0 for bootable vhost-scsi disk */ vsc->channel = 0; vsc->lun = 0; @@ -297,6 +357,8 @@ static Property vhost_scsi_properties[] = { VIRTIO_SCSI_F_T10_PI, false), DEFINE_PROP_BOOL("migratable", VHostSCSICommon, migratable, false), + DEFINE_PROP_BOOL("worker_per_virtqueue", VirtIOSCSICommon, + conf.worker_per_virtqueue, false), DEFINE_PROP_END_OF_LIST(), }; diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h index 7f0573b1bf..7be0105918 100644 --- a/include/hw/virtio/virtio-scsi.h +++ b/include/hw/virtio/virtio-scsi.h @@ -51,6 +51,7 @@ typedef struct virtio_scsi_config VirtIOSCSIConfig; struct VirtIOSCSIConf { uint32_t num_queues; uint32_t virtqueue_size; + bool worker_per_virtqueue; bool seg_max_adjust; uint32_t max_sectors; uint32_t cmd_per_lun; From 45e48809d3d0328fd2f113b878a62a27f1360664 Mon Sep 17 00:00:00 2001 From: Aaron Young Date: Tue, 12 Dec 2023 08:51:43 -0800 Subject: [PATCH 05/21] hw/acpi: propagate vcpu hotplug after switch to modern interface If a vcpu with an apic-id that is not supported by the legacy interface (>255) is hot-plugged, the legacy code will dynamically switch to the modern interface. However, the hotplug event is not forwarded to the new interface resulting in the vcpu not being fully/properly added to the machine config. This BUG is evidenced by OVMF when it it attempts to count the vcpus and reports an inconsistent vcpu count reported by the fw_cfg interface and the modern hotpug interface. Fix is to propagate the hotplug event after making the switch from the legacy interface to the modern interface. Cc: "Michael S. Tsirkin" Cc: Igor Mammedov Signed-off-by: Aaron Young Message-Id: <0e8a9baebbb29f2a6c87fd08e43dc2ac4019759a.1702398644.git.Aaron.Young@oracle.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/acpi/cpu_hotplug.c | 20 ++++++++++++++++---- 1 file changed, 16 insertions(+), 4 deletions(-) diff --git a/hw/acpi/cpu_hotplug.c b/hw/acpi/cpu_hotplug.c index 634bbecb31..6f78db0ccb 100644 --- a/hw/acpi/cpu_hotplug.c +++ b/hw/acpi/cpu_hotplug.c @@ -59,7 +59,8 @@ static const MemoryRegionOps AcpiCpuHotplug_ops = { }, }; -static void acpi_set_cpu_present_bit(AcpiCpuHotplug *g, CPUState *cpu) +static void acpi_set_cpu_present_bit(AcpiCpuHotplug *g, CPUState *cpu, + bool *swtchd_to_modern) { CPUClass *k = CPU_GET_CLASS(cpu); int64_t cpu_id; @@ -68,23 +69,34 @@ static void acpi_set_cpu_present_bit(AcpiCpuHotplug *g, CPUState *cpu) if ((cpu_id / 8) >= ACPI_GPE_PROC_LEN) { object_property_set_bool(g->device, "cpu-hotplug-legacy", false, &error_abort); + *swtchd_to_modern = true; return; } + *swtchd_to_modern = false; g->sts[cpu_id / 8] |= (1 << (cpu_id % 8)); } void legacy_acpi_cpu_plug_cb(HotplugHandler *hotplug_dev, AcpiCpuHotplug *g, DeviceState *dev, Error **errp) { - acpi_set_cpu_present_bit(g, CPU(dev)); - acpi_send_event(DEVICE(hotplug_dev), ACPI_CPU_HOTPLUG_STATUS); + bool swtchd_to_modern; + Error *local_err = NULL; + + acpi_set_cpu_present_bit(g, CPU(dev), &swtchd_to_modern); + if (swtchd_to_modern) { + /* propagate the hotplug to the modern interface */ + hotplug_handler_plug(hotplug_dev, dev, &local_err); + } else { + acpi_send_event(DEVICE(hotplug_dev), ACPI_CPU_HOTPLUG_STATUS); + } } void legacy_acpi_cpu_hotplug_init(MemoryRegion *parent, Object *owner, AcpiCpuHotplug *gpe_cpu, uint16_t base) { CPUState *cpu; + bool swtchd_to_modern; memory_region_init_io(&gpe_cpu->io, owner, &AcpiCpuHotplug_ops, gpe_cpu, "acpi-cpu-hotplug", ACPI_GPE_PROC_LEN); @@ -92,7 +104,7 @@ void legacy_acpi_cpu_hotplug_init(MemoryRegion *parent, Object *owner, gpe_cpu->device = owner; CPU_FOREACH(cpu) { - acpi_set_cpu_present_bit(gpe_cpu, cpu); + acpi_set_cpu_present_bit(gpe_cpu, cpu, &swtchd_to_modern); } } From 551bf7b4c1795ef89407039dd908c2f199b122c2 Mon Sep 17 00:00:00 2001 From: Dongli Zhang Date: Wed, 13 Dec 2023 16:31:17 -0800 Subject: [PATCH 06/21] vhost-scsi: fix usage of error_reportf_err() It is required to use error_report() instead of error_reportf_err(), if the prior function does not take local_err as the argument. As a result, the local_err is always NULL and segment fault may happen. vhost_scsi_start() -> vhost_scsi_set_endpoint(s) --> does not allocate local_err -> error_reportf_err() -> error_vprepend() -> g_string_append(newmsg, (*errp)->msg) --> (*errp) is NULL In addition, add ": " at the end of other error_reportf_err() logs. Fixes: 7962e432b4e4 ("vhost-user-scsi: support reconnect to backend") Signed-off-by: Dongli Zhang Message-Id: <20231214003117.43960-1-dongli.zhang@oracle.com> Reviewed-by: Feng Li Reviewed-by: Raphael Norwitz Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/scsi/vhost-scsi.c | 4 ++-- hw/scsi/vhost-user-scsi.c | 3 ++- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c index 08aa7534df..6159eb6fec 100644 --- a/hw/scsi/vhost-scsi.c +++ b/hw/scsi/vhost-scsi.c @@ -91,13 +91,13 @@ static int vhost_scsi_start(VHostSCSI *s) ret = vhost_scsi_common_start(vsc, &local_err); if (ret < 0) { - error_reportf_err(local_err, "Error starting vhost-scsi"); + error_reportf_err(local_err, "Error starting vhost-scsi: "); return ret; } ret = vhost_scsi_set_endpoint(s); if (ret < 0) { - error_reportf_err(local_err, "Error setting vhost-scsi endpoint"); + error_report("Error setting vhost-scsi endpoint"); vhost_scsi_common_stop(vsc); } diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c index 780f10559d..af18c4f3d3 100644 --- a/hw/scsi/vhost-user-scsi.c +++ b/hw/scsi/vhost-user-scsi.c @@ -83,7 +83,8 @@ static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status) if (should_start) { ret = vhost_user_scsi_start(s, &local_err); if (ret < 0) { - error_reportf_err(local_err, "unable to start vhost-user-scsi: %s", + error_reportf_err(local_err, + "unable to start vhost-user-scsi: %s: ", strerror(-ret)); qemu_chr_fe_disconnect(&vs->conf.chardev); } From 410cefbd5c3682432229f5905aa43358785d2ae0 Mon Sep 17 00:00:00 2001 From: wangmeiling Date: Thu, 14 Dec 2023 07:22:08 +0000 Subject: [PATCH 07/21] Fix bugs when VM shutdown with virtio-gpu unplugged Virtio-gpu malloc memory for the queue when it realized, but the queues was not released when it unrealized, which resulting in a memory leak. In addition, vm_change_state_handler is not cleaned up, which is related to vdev and will lead to segmentation fault when VM shutdown. Signed-off-by: wangmeiling Signed-off-by: Binfeng Wu Message-Id: <7bbbc0f3-2ad9-83ca-b39b-f976d0837daf@huawei.com> Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/display/virtio-gpu-base.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/display/virtio-gpu-base.c b/hw/display/virtio-gpu-base.c index 37af256219..4fc7ef8896 100644 --- a/hw/display/virtio-gpu-base.c +++ b/hw/display/virtio-gpu-base.c @@ -251,7 +251,11 @@ void virtio_gpu_base_device_unrealize(DeviceState *qdev) { VirtIOGPUBase *g = VIRTIO_GPU_BASE(qdev); + VirtIODevice *vdev = VIRTIO_DEVICE(qdev); + virtio_del_queue(vdev, 0); + virtio_del_queue(vdev, 1); + virtio_cleanup(vdev); migrate_del_blocker(&g->migration_blocker); } From bc865bfe2d54ac56daa20a6395abff68fab6a5d6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Fri, 15 Dec 2023 18:28:19 +0100 Subject: [PATCH 08/21] vdpa: do not set virtio status bits if unneeded MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Next commits will set DRIVER and ACKNOWLEDGE flags repeatedly in the case of a migration destination. Let's save ioctls with this. Signed-off-by: Eugenio Pérez Message-Id: <20231215172830.2540987-2-eperezma@redhat.com> --- hw/virtio/vhost-vdpa.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 819b2d811a..401dfa96fd 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -511,6 +511,10 @@ static int vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status) if (ret < 0) { return ret; } + if ((s & status) == status) { + /* Don't set bits already set */ + return 0; + } s |= status; From 8c5e9809225c387026476f5eefc2f8ae749a72f7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Thu, 21 Dec 2023 18:43:10 +0100 Subject: [PATCH 09/21] vdpa: add VhostVDPAShared MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It will hold properties shared among all vhost_vdpa instances associated with of the same device. For example, we just need one iova_tree or one memory listener for the entire device. Next patches will register the vhost_vdpa memory listener at the beginning of the VM migration at the destination. This enables QEMU to map the memory to the device before stopping the VM at the source, instead of doing while both source and destination are stopped, thus minimizing the downtime. However, the destination QEMU is unaware of which vhost_vdpa struct will register its memory_listener. If the source guest has CVQ enabled, it will be the one associated with the CVQ. Otherwise, it will be the first one. Save the memory operations related members in a common place rather than always in the first / last vhost_vdpa. Signed-off-by: Eugenio Pérez Acked-by: Jason Wang Message-Id: <20231221174322.3130442-2-eperezma@redhat.com> Tested-by: Lei Yang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- include/hw/virtio/vhost-vdpa.h | 5 +++++ net/vhost-vdpa.c | 24 ++++++++++++++++++++++-- 2 files changed, 27 insertions(+), 2 deletions(-) diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h index 5407d54fd7..eb1a56d75a 100644 --- a/include/hw/virtio/vhost-vdpa.h +++ b/include/hw/virtio/vhost-vdpa.h @@ -30,6 +30,10 @@ typedef struct VhostVDPAHostNotifier { void *addr; } VhostVDPAHostNotifier; +/* Info shared by all vhost_vdpa device models */ +typedef struct vhost_vdpa_shared { +} VhostVDPAShared; + typedef struct vhost_vdpa { int device_fd; int index; @@ -46,6 +50,7 @@ typedef struct vhost_vdpa { bool suspended; /* IOVA mapping used by the Shadow Virtqueue */ VhostIOVATree *iova_tree; + VhostVDPAShared *shared; GPtrArray *shadow_vqs; const VhostShadowVirtqueueOps *shadow_vq_ops; void *shadow_vq_ops_opaque; diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index d0614d7954..8b661b9e6d 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -240,6 +240,10 @@ static void vhost_vdpa_cleanup(NetClientState *nc) qemu_close(s->vhost_vdpa.device_fd); s->vhost_vdpa.device_fd = -1; } + if (s->vhost_vdpa.index != 0) { + return; + } + g_free(s->vhost_vdpa.shared); } /** Dummy SetSteeringEBPF to support RSS for vhost-vdpa backend */ @@ -1661,6 +1665,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, bool svq, struct vhost_vdpa_iova_range iova_range, uint64_t features, + VhostVDPAShared *shared, Error **errp) { NetClientState *nc = NULL; @@ -1696,6 +1701,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, if (queue_pair_index == 0) { vhost_vdpa_net_valid_svq_features(features, &s->vhost_vdpa.migration_blocker); + s->vhost_vdpa.shared = g_new0(VhostVDPAShared, 1); } else if (!is_datapath) { s->cvq_cmd_out_buffer = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(), PROT_READ | PROT_WRITE, @@ -1708,11 +1714,16 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, s->vhost_vdpa.shadow_vq_ops_opaque = s; s->cvq_isolated = cvq_isolated; } + if (queue_pair_index != 0) { + s->vhost_vdpa.shared = shared; + } + ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); if (ret) { qemu_del_net_client(nc); return NULL; } + return nc; } @@ -1824,17 +1835,26 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, ncs = g_malloc0(sizeof(*ncs) * queue_pairs); for (i = 0; i < queue_pairs; i++) { + VhostVDPAShared *shared = NULL; + + if (i) { + shared = DO_UPCAST(VhostVDPAState, nc, ncs[0])->vhost_vdpa.shared; + } ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, vdpa_device_fd, i, 2, true, opts->x_svq, - iova_range, features, errp); + iova_range, features, shared, errp); if (!ncs[i]) goto err; } if (has_cvq) { + VhostVDPAState *s0 = DO_UPCAST(VhostVDPAState, nc, ncs[0]); + VhostVDPAShared *shared = s0->vhost_vdpa.shared; + nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, vdpa_device_fd, i, 1, false, - opts->x_svq, iova_range, features, errp); + opts->x_svq, iova_range, features, shared, + errp); if (!nc) goto err; } From 5edb02e8004c2f1d5026a02cd9378046973f47af Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Thu, 21 Dec 2023 18:43:11 +0100 Subject: [PATCH 10/21] vdpa: move iova tree to the shared struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Next patches will register the vhost_vdpa memory listener while the VM is migrating at the destination, so we can map the memory to the device before stopping the VM at the source. The main goal is to reduce the downtime. However, the destination QEMU is unaware of which vhost_vdpa device will register its memory_listener. If the source guest has CVQ enabled, it will be the CVQ device. Otherwise, it will be the first one. Move the iova tree to VhostVDPAShared so all vhost_vdpa can use it, rather than always in the first or last vhost_vdpa. Signed-off-by: Eugenio Pérez Acked-by: Jason Wang Message-Id: <20231221174322.3130442-3-eperezma@redhat.com> Tested-by: Lei Yang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-vdpa.c | 19 ++++++------ include/hw/virtio/vhost-vdpa.h | 4 +-- net/vhost-vdpa.c | 54 +++++++++++++++------------------- 3 files changed, 35 insertions(+), 42 deletions(-) diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 401dfa96fd..95a179a082 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -358,7 +358,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, mem_region.size = int128_get64(llsize) - 1, mem_region.perm = IOMMU_ACCESS_FLAG(true, section->readonly), - r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region); + r = vhost_iova_tree_map_alloc(v->shared->iova_tree, &mem_region); if (unlikely(r != IOVA_OK)) { error_report("Can't allocate a mapping (%d)", r); goto fail; @@ -379,7 +379,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, fail_map: if (v->shadow_data) { - vhost_iova_tree_remove(v->iova_tree, mem_region); + vhost_iova_tree_remove(v->shared->iova_tree, mem_region); } fail: @@ -441,13 +441,13 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, .size = int128_get64(llsize) - 1, }; - result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region); + result = vhost_iova_tree_find_iova(v->shared->iova_tree, &mem_region); if (!result) { /* The memory listener map wasn't mapped */ return; } iova = result->iova; - vhost_iova_tree_remove(v->iova_tree, *result); + vhost_iova_tree_remove(v->shared->iova_tree, *result); } vhost_vdpa_iotlb_batch_begin_once(v); /* @@ -1063,7 +1063,8 @@ static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr addr) const DMAMap needle = { .translated_addr = addr, }; - const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, &needle); + const DMAMap *result = vhost_iova_tree_find_iova(v->shared->iova_tree, + &needle); hwaddr size; int r; @@ -1079,7 +1080,7 @@ static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr addr) return; } - vhost_iova_tree_remove(v->iova_tree, *result); + vhost_iova_tree_remove(v->shared->iova_tree, *result); } static void vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev, @@ -1107,7 +1108,7 @@ static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle, { int r; - r = vhost_iova_tree_map_alloc(v->iova_tree, needle); + r = vhost_iova_tree_map_alloc(v->shared->iova_tree, needle); if (unlikely(r != IOVA_OK)) { error_setg(errp, "Cannot allocate iova (%d)", r); return false; @@ -1119,7 +1120,7 @@ static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle, needle->perm == IOMMU_RO); if (unlikely(r != 0)) { error_setg_errno(errp, -r, "Cannot map region to device"); - vhost_iova_tree_remove(v->iova_tree, *needle); + vhost_iova_tree_remove(v->shared->iova_tree, *needle); } return r == 0; @@ -1220,7 +1221,7 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) goto err; } - vhost_svq_start(svq, dev->vdev, vq, v->iova_tree); + vhost_svq_start(svq, dev->vdev, vq, v->shared->iova_tree); ok = vhost_vdpa_svq_map_rings(dev, svq, &addr, &err); if (unlikely(!ok)) { goto err_map; diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h index eb1a56d75a..ac036055d3 100644 --- a/include/hw/virtio/vhost-vdpa.h +++ b/include/hw/virtio/vhost-vdpa.h @@ -32,6 +32,8 @@ typedef struct VhostVDPAHostNotifier { /* Info shared by all vhost_vdpa device models */ typedef struct vhost_vdpa_shared { + /* IOVA mapping used by the Shadow Virtqueue */ + VhostIOVATree *iova_tree; } VhostVDPAShared; typedef struct vhost_vdpa { @@ -48,8 +50,6 @@ typedef struct vhost_vdpa { bool shadow_data; /* Device suspended successfully */ bool suspended; - /* IOVA mapping used by the Shadow Virtqueue */ - VhostIOVATree *iova_tree; VhostVDPAShared *shared; GPtrArray *shadow_vqs; const VhostShadowVirtqueueOps *shadow_vq_ops; diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 8b661b9e6d..10703e5833 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -354,8 +354,8 @@ static void vhost_vdpa_net_data_start_first(VhostVDPAState *s) migration_add_notifier(&s->migration_state, vdpa_net_migration_state_notifier); if (v->shadow_vqs_enabled) { - v->iova_tree = vhost_iova_tree_new(v->iova_range.first, - v->iova_range.last); + v->shared->iova_tree = vhost_iova_tree_new(v->iova_range.first, + v->iova_range.last); } } @@ -380,11 +380,6 @@ static int vhost_vdpa_net_data_start(NetClientState *nc) return 0; } - if (v->shadow_vqs_enabled) { - VhostVDPAState *s0 = vhost_vdpa_net_first_nc_vdpa(s); - v->iova_tree = s0->vhost_vdpa.iova_tree; - } - return 0; } @@ -417,9 +412,8 @@ static void vhost_vdpa_net_client_stop(NetClientState *nc) dev = s->vhost_vdpa.dev; if (dev->vq_index + dev->nvqs == dev->vq_index_end) { - g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete); - } else { - s->vhost_vdpa.iova_tree = NULL; + g_clear_pointer(&s->vhost_vdpa.shared->iova_tree, + vhost_iova_tree_delete); } } @@ -474,7 +468,7 @@ static int vhost_vdpa_set_address_space_id(struct vhost_vdpa *v, static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) { - VhostIOVATree *tree = v->iova_tree; + VhostIOVATree *tree = v->shared->iova_tree; DMAMap needle = { /* * No need to specify size or to look for more translations since @@ -508,7 +502,7 @@ static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size, map.translated_addr = (hwaddr)(uintptr_t)buf; map.size = size - 1; map.perm = write ? IOMMU_RW : IOMMU_RO, - r = vhost_iova_tree_map_alloc(v->iova_tree, &map); + r = vhost_iova_tree_map_alloc(v->shared->iova_tree, &map); if (unlikely(r != IOVA_OK)) { error_report("Cannot map injected element"); return r; @@ -523,7 +517,7 @@ static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size, return 0; dma_map_err: - vhost_iova_tree_remove(v->iova_tree, map); + vhost_iova_tree_remove(v->shared->iova_tree, map); return r; } @@ -583,24 +577,22 @@ out: return 0; } - if (s0->vhost_vdpa.iova_tree) { - /* - * SVQ is already configured for all virtqueues. Reuse IOVA tree for - * simplicity, whether CVQ shares ASID with guest or not, because: - * - Memory listener need access to guest's memory addresses allocated - * in the IOVA tree. - * - There should be plenty of IOVA address space for both ASID not to - * worry about collisions between them. Guest's translations are - * still validated with virtio virtqueue_pop so there is no risk for - * the guest to access memory that it shouldn't. - * - * To allocate a iova tree per ASID is doable but it complicates the - * code and it is not worth it for the moment. - */ - v->iova_tree = s0->vhost_vdpa.iova_tree; - } else { - v->iova_tree = vhost_iova_tree_new(v->iova_range.first, - v->iova_range.last); + /* + * If other vhost_vdpa already have an iova_tree, reuse it for simplicity, + * whether CVQ shares ASID with guest or not, because: + * - Memory listener need access to guest's memory addresses allocated in + * the IOVA tree. + * - There should be plenty of IOVA address space for both ASID not to + * worry about collisions between them. Guest's translations are still + * validated with virtio virtqueue_pop so there is no risk for the guest + * to access memory that it shouldn't. + * + * To allocate a iova tree per ASID is doable but it complicates the code + * and it is not worth it for the moment. + */ + if (!v->shared->iova_tree) { + v->shared->iova_tree = vhost_iova_tree_new(v->iova_range.first, + v->iova_range.last); } r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer, From ae25ff41b72366248aa89bdc8be58aa86f67e4c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Thu, 21 Dec 2023 18:43:12 +0100 Subject: [PATCH 11/21] vdpa: move iova_range to vhost_vdpa_shared MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Next patches will register the vhost_vdpa memory listener while the VM is migrating at the destination, so we can map the memory to the device before stopping the VM at the source. The main goal is to reduce the downtime. However, the destination QEMU is unaware of which vhost_vdpa device will register its memory_listener. If the source guest has CVQ enabled, it will be the CVQ device. Otherwise, it will be the first one. Move the iova range to VhostVDPAShared so all vhost_vdpa can use it, rather than always in the first or last vhost_vdpa. Signed-off-by: Eugenio Pérez Acked-by: Jason Wang Message-Id: <20231221174322.3130442-4-eperezma@redhat.com> Tested-by: Lei Yang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vdpa-dev.c | 5 ++++- hw/virtio/vhost-vdpa.c | 16 ++++++++++------ include/hw/virtio/vhost-vdpa.h | 3 ++- net/vhost-vdpa.c | 10 +++++----- 4 files changed, 21 insertions(+), 13 deletions(-) diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index f22d5d5bc0..457960d28a 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -114,7 +114,8 @@ static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) strerror(-ret)); goto free_vqs; } - v->vdpa.iova_range = iova_range; + v->vdpa.shared = g_new0(VhostVDPAShared, 1); + v->vdpa.shared->iova_range = iova_range; ret = vhost_dev_init(&v->dev, &v->vdpa, VHOST_BACKEND_TYPE_VDPA, 0, NULL); if (ret < 0) { @@ -162,6 +163,7 @@ vhost_cleanup: vhost_dev_cleanup(&v->dev); free_vqs: g_free(vqs); + g_free(v->vdpa.shared); out: qemu_close(v->vhostfd); v->vhostfd = -1; @@ -184,6 +186,7 @@ static void vhost_vdpa_device_unrealize(DeviceState *dev) g_free(s->config); g_free(s->dev.vqs); vhost_dev_cleanup(&s->dev); + g_free(s->vdpa.shared); qemu_close(s->vhostfd); s->vhostfd = -1; } diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 95a179a082..5ff1d43ba9 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -213,10 +213,10 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) RCU_READ_LOCK_GUARD(); /* check if RAM section out of device range */ llend = int128_add(int128_makes64(iotlb->addr_mask), int128_makes64(iova)); - if (int128_gt(llend, int128_make64(v->iova_range.last))) { + if (int128_gt(llend, int128_make64(v->shared->iova_range.last))) { error_report("RAM section out of device range (max=0x%" PRIx64 ", end addr=0x%" PRIx64 ")", - v->iova_range.last, int128_get64(llend)); + v->shared->iova_range.last, int128_get64(llend)); return; } @@ -316,8 +316,10 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, int page_size = qemu_target_page_size(); int page_mask = -page_size; - if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first, - v->iova_range.last, page_mask)) { + if (vhost_vdpa_listener_skipped_section(section, + v->shared->iova_range.first, + v->shared->iova_range.last, + page_mask)) { return; } if (memory_region_is_iommu(section->mr)) { @@ -403,8 +405,10 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, int page_size = qemu_target_page_size(); int page_mask = -page_size; - if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first, - v->iova_range.last, page_mask)) { + if (vhost_vdpa_listener_skipped_section(section, + v->shared->iova_range.first, + v->shared->iova_range.last, + page_mask)) { return; } if (memory_region_is_iommu(section->mr)) { diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h index ac036055d3..8d52a7e498 100644 --- a/include/hw/virtio/vhost-vdpa.h +++ b/include/hw/virtio/vhost-vdpa.h @@ -32,6 +32,8 @@ typedef struct VhostVDPAHostNotifier { /* Info shared by all vhost_vdpa device models */ typedef struct vhost_vdpa_shared { + struct vhost_vdpa_iova_range iova_range; + /* IOVA mapping used by the Shadow Virtqueue */ VhostIOVATree *iova_tree; } VhostVDPAShared; @@ -43,7 +45,6 @@ typedef struct vhost_vdpa { bool iotlb_batch_begin_sent; uint32_t address_space_id; MemoryListener listener; - struct vhost_vdpa_iova_range iova_range; uint64_t acked_features; bool shadow_vqs_enabled; /* Vdpa must send shadow addresses as IOTLB key for data queues, not GPA */ diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 10703e5833..7be2c30ad3 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -354,8 +354,8 @@ static void vhost_vdpa_net_data_start_first(VhostVDPAState *s) migration_add_notifier(&s->migration_state, vdpa_net_migration_state_notifier); if (v->shadow_vqs_enabled) { - v->shared->iova_tree = vhost_iova_tree_new(v->iova_range.first, - v->iova_range.last); + v->shared->iova_tree = vhost_iova_tree_new(v->shared->iova_range.first, + v->shared->iova_range.last); } } @@ -591,8 +591,8 @@ out: * and it is not worth it for the moment. */ if (!v->shared->iova_tree) { - v->shared->iova_tree = vhost_iova_tree_new(v->iova_range.first, - v->iova_range.last); + v->shared->iova_tree = vhost_iova_tree_new(v->shared->iova_range.first, + v->shared->iova_range.last); } r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer, @@ -1688,12 +1688,12 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, s->always_svq = svq; s->migration_state.notify = NULL; s->vhost_vdpa.shadow_vqs_enabled = svq; - s->vhost_vdpa.iova_range = iova_range; s->vhost_vdpa.shadow_data = svq; if (queue_pair_index == 0) { vhost_vdpa_net_valid_svq_features(features, &s->vhost_vdpa.migration_blocker); s->vhost_vdpa.shared = g_new0(VhostVDPAShared, 1); + s->vhost_vdpa.shared->iova_range = iova_range; } else if (!is_datapath) { s->cvq_cmd_out_buffer = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(), PROT_READ | PROT_WRITE, From a6e823d40eb75110a8a1c6eee9650309412a5e9c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Thu, 21 Dec 2023 18:43:13 +0100 Subject: [PATCH 12/21] vdpa: move shadow_data to vhost_vdpa_shared MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Next patches will register the vhost_vdpa memory listener while the VM is migrating at the destination, so we can map the memory to the device before stopping the VM at the source. The main goal is to reduce the downtime. However, the destination QEMU is unaware of which vhost_vdpa device will register its memory_listener. If the source guest has CVQ enabled, it will be the CVQ device. Otherwise, it will be the first one. Move the shadow_data member to VhostVDPAShared so all vhost_vdpa can use it, rather than always in the first or last vhost_vdpa. Signed-off-by: Eugenio Pérez Acked-by: Jason Wang Message-Id: <20231221174322.3130442-5-eperezma@redhat.com> Tested-by: Lei Yang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-vdpa.c | 6 +++--- include/hw/virtio/vhost-vdpa.h | 5 +++-- net/vhost-vdpa.c | 22 +++++----------------- 3 files changed, 11 insertions(+), 22 deletions(-) diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 5ff1d43ba9..97588848fc 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -353,7 +353,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, vaddr, section->readonly); llsize = int128_sub(llend, int128_make64(iova)); - if (v->shadow_data) { + if (v->shared->shadow_data) { int r; mem_region.translated_addr = (hwaddr)(uintptr_t)vaddr, @@ -380,7 +380,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, return; fail_map: - if (v->shadow_data) { + if (v->shared->shadow_data) { vhost_iova_tree_remove(v->shared->iova_tree, mem_region); } @@ -435,7 +435,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, llsize = int128_sub(llend, int128_make64(iova)); - if (v->shadow_data) { + if (v->shared->shadow_data) { const DMAMap *result; const void *vaddr = memory_region_get_ram_ptr(section->mr) + section->offset_within_region + diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h index 8d52a7e498..01e0f25e27 100644 --- a/include/hw/virtio/vhost-vdpa.h +++ b/include/hw/virtio/vhost-vdpa.h @@ -36,6 +36,9 @@ typedef struct vhost_vdpa_shared { /* IOVA mapping used by the Shadow Virtqueue */ VhostIOVATree *iova_tree; + + /* Vdpa must send shadow addresses as IOTLB key for data queues, not GPA */ + bool shadow_data; } VhostVDPAShared; typedef struct vhost_vdpa { @@ -47,8 +50,6 @@ typedef struct vhost_vdpa { MemoryListener listener; uint64_t acked_features; bool shadow_vqs_enabled; - /* Vdpa must send shadow addresses as IOTLB key for data queues, not GPA */ - bool shadow_data; /* Device suspended successfully */ bool suspended; VhostVDPAShared *shared; diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 7be2c30ad3..bf8e8327da 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -290,15 +290,6 @@ static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf, return size; } -/** From any vdpa net client, get the netclient of the first queue pair */ -static VhostVDPAState *vhost_vdpa_net_first_nc_vdpa(VhostVDPAState *s) -{ - NICState *nic = qemu_get_nic(s->nc.peer); - NetClientState *nc0 = qemu_get_peer(nic->ncs, 0); - - return DO_UPCAST(VhostVDPAState, nc, nc0); -} - static void vhost_vdpa_net_log_global_enable(VhostVDPAState *s, bool enable) { struct vhost_vdpa *v = &s->vhost_vdpa; @@ -369,13 +360,12 @@ static int vhost_vdpa_net_data_start(NetClientState *nc) if (s->always_svq || migration_is_setup_or_active(migrate_get_current()->state)) { v->shadow_vqs_enabled = true; - v->shadow_data = true; } else { v->shadow_vqs_enabled = false; - v->shadow_data = false; } if (v->index == 0) { + v->shared->shadow_data = v->shadow_vqs_enabled; vhost_vdpa_net_data_start_first(s); return 0; } @@ -523,7 +513,7 @@ dma_map_err: static int vhost_vdpa_net_cvq_start(NetClientState *nc) { - VhostVDPAState *s, *s0; + VhostVDPAState *s; struct vhost_vdpa *v; int64_t cvq_group; int r; @@ -534,12 +524,10 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) s = DO_UPCAST(VhostVDPAState, nc, nc); v = &s->vhost_vdpa; - s0 = vhost_vdpa_net_first_nc_vdpa(s); - v->shadow_data = s0->vhost_vdpa.shadow_vqs_enabled; - v->shadow_vqs_enabled = s0->vhost_vdpa.shadow_vqs_enabled; + v->shadow_vqs_enabled = v->shared->shadow_data; s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID; - if (s->vhost_vdpa.shadow_data) { + if (v->shared->shadow_data) { /* SVQ is already configured for all virtqueues */ goto out; } @@ -1688,12 +1676,12 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, s->always_svq = svq; s->migration_state.notify = NULL; s->vhost_vdpa.shadow_vqs_enabled = svq; - s->vhost_vdpa.shadow_data = svq; if (queue_pair_index == 0) { vhost_vdpa_net_valid_svq_features(features, &s->vhost_vdpa.migration_blocker); s->vhost_vdpa.shared = g_new0(VhostVDPAShared, 1); s->vhost_vdpa.shared->iova_range = iova_range; + s->vhost_vdpa.shared->shadow_data = svq; } else if (!is_datapath) { s->cvq_cmd_out_buffer = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(), PROT_READ | PROT_WRITE, From e36b9992fafe3a4f1e2a7819d4b3df171acaa960 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Thu, 21 Dec 2023 18:43:14 +0100 Subject: [PATCH 13/21] vdpa: use vdpa shared for tracing MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit By the end of this series dma_map and dma_unmap functions don't have the vdpa device for tracing. Movinge trace function to shared member one. Print it also in the vdpa initialization so log reader can relate them. Signed-off-by: Eugenio Pérez Acked-by: Jason Wang Message-Id: <20231221174322.3130442-6-eperezma@redhat.com> Tested-by: Lei Yang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/trace-events | 14 +++++++------- hw/virtio/vhost-vdpa.c | 26 ++++++++++++++------------ 2 files changed, 21 insertions(+), 19 deletions(-) diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index 637cac4edf..77905d1994 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -30,16 +30,16 @@ vhost_user_write(uint32_t req, uint32_t flags) "req:%d flags:0x%"PRIx32"" vhost_user_create_notifier(int idx, void *n) "idx:%d n:%p" # vhost-vdpa.c -vhost_vdpa_dma_map(void *vdpa, int fd, uint32_t msg_type, uint32_t asid, uint64_t iova, uint64_t size, uint64_t uaddr, uint8_t perm, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" asid: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" uaddr: 0x%"PRIx64" perm: 0x%"PRIx8" type: %"PRIu8 -vhost_vdpa_dma_unmap(void *vdpa, int fd, uint32_t msg_type, uint32_t asid, uint64_t iova, uint64_t size, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" asid: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" type: %"PRIu8 -vhost_vdpa_listener_begin_batch(void *v, int fd, uint32_t msg_type, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8 -vhost_vdpa_listener_commit(void *v, int fd, uint32_t msg_type, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8 -vhost_vdpa_listener_region_add_unaligned(void *v, const char *name, uint64_t offset_as, uint64_t offset_page) "vdpa: %p region %s offset_within_address_space %"PRIu64" offset_within_region %"PRIu64 +vhost_vdpa_dma_map(void *vdpa, int fd, uint32_t msg_type, uint32_t asid, uint64_t iova, uint64_t size, uint64_t uaddr, uint8_t perm, uint8_t type) "vdpa_shared:%p fd: %d msg_type: %"PRIu32" asid: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" uaddr: 0x%"PRIx64" perm: 0x%"PRIx8" type: %"PRIu8 +vhost_vdpa_dma_unmap(void *vdpa, int fd, uint32_t msg_type, uint32_t asid, uint64_t iova, uint64_t size, uint8_t type) "vdpa_shared:%p fd: %d msg_type: %"PRIu32" asid: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" type: %"PRIu8 +vhost_vdpa_listener_begin_batch(void *v, int fd, uint32_t msg_type, uint8_t type) "vdpa_shared:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8 +vhost_vdpa_listener_commit(void *v, int fd, uint32_t msg_type, uint8_t type) "vdpa_shared:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8 +vhost_vdpa_listener_region_add_unaligned(void *v, const char *name, uint64_t offset_as, uint64_t offset_page) "vdpa_shared: %p region %s offset_within_address_space %"PRIu64" offset_within_region %"PRIu64 vhost_vdpa_listener_region_add(void *vdpa, uint64_t iova, uint64_t llend, void *vaddr, bool readonly) "vdpa: %p iova 0x%"PRIx64" llend 0x%"PRIx64" vaddr: %p read-only: %d" -vhost_vdpa_listener_region_del_unaligned(void *v, const char *name, uint64_t offset_as, uint64_t offset_page) "vdpa: %p region %s offset_within_address_space %"PRIu64" offset_within_region %"PRIu64 +vhost_vdpa_listener_region_del_unaligned(void *v, const char *name, uint64_t offset_as, uint64_t offset_page) "vdpa_shared: %p region %s offset_within_address_space %"PRIu64" offset_within_region %"PRIu64 vhost_vdpa_listener_region_del(void *vdpa, uint64_t iova, uint64_t llend) "vdpa: %p iova 0x%"PRIx64" llend 0x%"PRIx64 vhost_vdpa_add_status(void *dev, uint8_t status) "dev: %p status: 0x%"PRIx8 -vhost_vdpa_init(void *dev, void *vdpa) "dev: %p vdpa: %p" +vhost_vdpa_init(void *dev, void *s, void *vdpa) "dev: %p, common dev: %p vdpa: %p" vhost_vdpa_cleanup(void *dev, void *vdpa) "dev: %p vdpa: %p" vhost_vdpa_memslots_limit(void *dev, int ret) "dev: %p = 0x%x" vhost_vdpa_set_mem_table(void *dev, uint32_t nregions, uint32_t padding) "dev: %p nregions: %"PRIu32" padding: 0x%"PRIx32 diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 97588848fc..720cffbc08 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -101,7 +101,7 @@ int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW; msg.iotlb.type = VHOST_IOTLB_UPDATE; - trace_vhost_vdpa_dma_map(v, fd, msg.type, msg.asid, msg.iotlb.iova, + trace_vhost_vdpa_dma_map(v->shared, fd, msg.type, msg.asid, msg.iotlb.iova, msg.iotlb.size, msg.iotlb.uaddr, msg.iotlb.perm, msg.iotlb.type); @@ -131,8 +131,8 @@ int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, msg.iotlb.size = size; msg.iotlb.type = VHOST_IOTLB_INVALIDATE; - trace_vhost_vdpa_dma_unmap(v, fd, msg.type, msg.asid, msg.iotlb.iova, - msg.iotlb.size, msg.iotlb.type); + trace_vhost_vdpa_dma_unmap(v->shared, fd, msg.type, msg.asid, + msg.iotlb.iova, msg.iotlb.size, msg.iotlb.type); if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { error_report("failed to write, fd=%d, errno=%d (%s)", @@ -151,7 +151,8 @@ static void vhost_vdpa_listener_begin_batch(struct vhost_vdpa *v) .iotlb.type = VHOST_IOTLB_BATCH_BEGIN, }; - trace_vhost_vdpa_listener_begin_batch(v, fd, msg.type, msg.iotlb.type); + trace_vhost_vdpa_listener_begin_batch(v->shared, fd, msg.type, + msg.iotlb.type); if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { error_report("failed to write, fd=%d, errno=%d (%s)", fd, errno, strerror(errno)); @@ -186,7 +187,7 @@ static void vhost_vdpa_listener_commit(MemoryListener *listener) msg.type = v->msg_type; msg.iotlb.type = VHOST_IOTLB_BATCH_END; - trace_vhost_vdpa_listener_commit(v, fd, msg.type, msg.iotlb.type); + trace_vhost_vdpa_listener_commit(v->shared, fd, msg.type, msg.iotlb.type); if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { error_report("failed to write, fd=%d, errno=%d (%s)", fd, errno, strerror(errno)); @@ -329,7 +330,8 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, if (unlikely((section->offset_within_address_space & ~page_mask) != (section->offset_within_region & ~page_mask))) { - trace_vhost_vdpa_listener_region_add_unaligned(v, section->mr->name, + trace_vhost_vdpa_listener_region_add_unaligned(v->shared, + section->mr->name, section->offset_within_address_space & ~page_mask, section->offset_within_region & ~page_mask); return; @@ -349,7 +351,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, section->offset_within_region + (iova - section->offset_within_address_space); - trace_vhost_vdpa_listener_region_add(v, iova, int128_get64(llend), + trace_vhost_vdpa_listener_region_add(v->shared, iova, int128_get64(llend), vaddr, section->readonly); llsize = int128_sub(llend, int128_make64(iova)); @@ -417,7 +419,8 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, if (unlikely((section->offset_within_address_space & ~page_mask) != (section->offset_within_region & ~page_mask))) { - trace_vhost_vdpa_listener_region_del_unaligned(v, section->mr->name, + trace_vhost_vdpa_listener_region_del_unaligned(v->shared, + section->mr->name, section->offset_within_address_space & ~page_mask, section->offset_within_region & ~page_mask); return; @@ -426,7 +429,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, iova = ROUND_UP(section->offset_within_address_space, page_size); llend = vhost_vdpa_section_end(section, page_mask); - trace_vhost_vdpa_listener_region_del(v, iova, + trace_vhost_vdpa_listener_region_del(v->shared, iova, int128_get64(int128_sub(llend, int128_one()))); if (int128_ge(int128_make64(iova), llend)) { @@ -587,12 +590,11 @@ static void vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v) static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) { - struct vhost_vdpa *v; + struct vhost_vdpa *v = opaque; assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); - trace_vhost_vdpa_init(dev, opaque); + trace_vhost_vdpa_init(dev, v->shared, opaque); int ret; - v = opaque; v->dev = dev; dev->opaque = opaque ; v->listener = vhost_vdpa_memory_listener; From f12b2498e561ddf9ab45940deb15ef1bb046f699 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Thu, 21 Dec 2023 18:43:15 +0100 Subject: [PATCH 14/21] vdpa: move file descriptor to vhost_vdpa_shared MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Next patches will register the vhost_vdpa memory listener while the VM is migrating at the destination, so we can map the memory to the device before stopping the VM at the source. The main goal is to reduce the downtime. However, the destination QEMU is unaware of which vhost_vdpa device will register its memory_listener. If the source guest has CVQ enabled, it will be the CVQ device. Otherwise, it will be the first one. Move the file descriptor to VhostVDPAShared so all vhost_vdpa can use it, rather than always in the first / last vhost_vdpa. Signed-off-by: Eugenio Pérez Acked-by: Jason Wang Message-Id: <20231221174322.3130442-7-eperezma@redhat.com> Tested-by: Lei Yang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vdpa-dev.c | 2 +- hw/virtio/vhost-vdpa.c | 14 +++++++------- include/hw/virtio/vhost-vdpa.h | 2 +- net/vhost-vdpa.c | 11 ++++------- 4 files changed, 13 insertions(+), 16 deletions(-) diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index 457960d28a..8774986571 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -66,7 +66,6 @@ static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) if (*errp) { return; } - v->vdpa.device_fd = v->vhostfd; v->vdev_id = vhost_vdpa_device_get_u32(v->vhostfd, VHOST_VDPA_GET_DEVICE_ID, errp); @@ -115,6 +114,7 @@ static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) goto free_vqs; } v->vdpa.shared = g_new0(VhostVDPAShared, 1); + v->vdpa.shared->device_fd = v->vhostfd; v->vdpa.shared->iova_range = iova_range; ret = vhost_dev_init(&v->dev, &v->vdpa, VHOST_BACKEND_TYPE_VDPA, 0, NULL); diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 720cffbc08..09df150ef2 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -90,7 +90,7 @@ int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, hwaddr size, void *vaddr, bool readonly) { struct vhost_msg_v2 msg = {}; - int fd = v->device_fd; + int fd = v->shared->device_fd; int ret = 0; msg.type = v->msg_type; @@ -122,7 +122,7 @@ int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, hwaddr size) { struct vhost_msg_v2 msg = {}; - int fd = v->device_fd; + int fd = v->shared->device_fd; int ret = 0; msg.type = v->msg_type; @@ -145,7 +145,7 @@ int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, static void vhost_vdpa_listener_begin_batch(struct vhost_vdpa *v) { - int fd = v->device_fd; + int fd = v->shared->device_fd; struct vhost_msg_v2 msg = { .type = v->msg_type, .iotlb.type = VHOST_IOTLB_BATCH_BEGIN, @@ -174,7 +174,7 @@ static void vhost_vdpa_listener_commit(MemoryListener *listener) struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); struct vhost_dev *dev = v->dev; struct vhost_msg_v2 msg = {}; - int fd = v->device_fd; + int fd = v->shared->device_fd; if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) { return; @@ -499,7 +499,7 @@ static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request, void *arg) { struct vhost_vdpa *v = dev->opaque; - int fd = v->device_fd; + int fd = v->shared->device_fd; int ret; assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); @@ -661,7 +661,7 @@ static int vhost_vdpa_host_notifier_init(struct vhost_dev *dev, int queue_index) struct vhost_vdpa *v = dev->opaque; VirtIODevice *vdev = dev->vdev; VhostVDPAHostNotifier *n; - int fd = v->device_fd; + int fd = v->shared->device_fd; void *addr; char *name; @@ -1290,7 +1290,7 @@ static void vhost_vdpa_suspend(struct vhost_dev *dev) if (dev->backend_cap & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) { trace_vhost_vdpa_suspend(dev); - r = ioctl(v->device_fd, VHOST_VDPA_SUSPEND); + r = ioctl(v->shared->device_fd, VHOST_VDPA_SUSPEND); if (unlikely(r)) { error_report("Cannot suspend: %s(%d)", g_strerror(errno), errno); } else { diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h index 01e0f25e27..796a180afa 100644 --- a/include/hw/virtio/vhost-vdpa.h +++ b/include/hw/virtio/vhost-vdpa.h @@ -32,6 +32,7 @@ typedef struct VhostVDPAHostNotifier { /* Info shared by all vhost_vdpa device models */ typedef struct vhost_vdpa_shared { + int device_fd; struct vhost_vdpa_iova_range iova_range; /* IOVA mapping used by the Shadow Virtqueue */ @@ -42,7 +43,6 @@ typedef struct vhost_vdpa_shared { } VhostVDPAShared; typedef struct vhost_vdpa { - int device_fd; int index; uint32_t msg_type; bool iotlb_batch_begin_sent; diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index bf8e8327da..10cf0027de 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -235,14 +235,11 @@ static void vhost_vdpa_cleanup(NetClientState *nc) vhost_net_cleanup(s->vhost_net); g_free(s->vhost_net); s->vhost_net = NULL; - } - if (s->vhost_vdpa.device_fd >= 0) { - qemu_close(s->vhost_vdpa.device_fd); - s->vhost_vdpa.device_fd = -1; } if (s->vhost_vdpa.index != 0) { return; } + qemu_close(s->vhost_vdpa.shared->device_fd); g_free(s->vhost_vdpa.shared); } @@ -448,7 +445,7 @@ static int vhost_vdpa_set_address_space_id(struct vhost_vdpa *v, }; int r; - r = ioctl(v->device_fd, VHOST_VDPA_SET_GROUP_ASID, &asid); + r = ioctl(v->shared->device_fd, VHOST_VDPA_SET_GROUP_ASID, &asid); if (unlikely(r < 0)) { error_report("Can't set vq group %u asid %u, errno=%d (%s)", asid.index, asid.num, errno, g_strerror(errno)); @@ -544,7 +541,7 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) return 0; } - cvq_group = vhost_vdpa_get_vring_group(v->device_fd, + cvq_group = vhost_vdpa_get_vring_group(v->shared->device_fd, v->dev->vq_index_end - 1, &err); if (unlikely(cvq_group < 0)) { @@ -1671,7 +1668,6 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, qemu_set_info_str(nc, TYPE_VHOST_VDPA); s = DO_UPCAST(VhostVDPAState, nc, nc); - s->vhost_vdpa.device_fd = vdpa_device_fd; s->vhost_vdpa.index = queue_pair_index; s->always_svq = svq; s->migration_state.notify = NULL; @@ -1680,6 +1676,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, vhost_vdpa_net_valid_svq_features(features, &s->vhost_vdpa.migration_blocker); s->vhost_vdpa.shared = g_new0(VhostVDPAShared, 1); + s->vhost_vdpa.shared->device_fd = vdpa_device_fd; s->vhost_vdpa.shared->iova_range = iova_range; s->vhost_vdpa.shared->shadow_data = svq; } else if (!is_datapath) { From 7627f0a2de31852483babdffded8f0d59e515f91 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Thu, 21 Dec 2023 18:43:16 +0100 Subject: [PATCH 15/21] vdpa: move iotlb_batch_begin_sent to vhost_vdpa_shared MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Next patches will register the vhost_vdpa memory listener while the VM is migrating at the destination, so we can map the memory to the device before stopping the VM at the source. The main goal is to reduce the downtime. However, the destination QEMU is unaware of which vhost_vdpa device will register its memory_listener. If the source guest has CVQ enabled, it will be the CVQ device. Otherwise, it will be the first one. Move the iotlb_batch_begin_sent member to VhostVDPAShared so all vhost_vdpa can use it, rather than always in the first / last vhost_vdpa. Signed-off-by: Eugenio Pérez Acked-by: Jason Wang Message-Id: <20231221174322.3130442-8-eperezma@redhat.com> Tested-by: Lei Yang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-vdpa.c | 8 ++++---- include/hw/virtio/vhost-vdpa.h | 3 ++- 2 files changed, 6 insertions(+), 5 deletions(-) diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 09df150ef2..2ecaedb686 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -162,11 +162,11 @@ static void vhost_vdpa_listener_begin_batch(struct vhost_vdpa *v) static void vhost_vdpa_iotlb_batch_begin_once(struct vhost_vdpa *v) { if (v->dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH) && - !v->iotlb_batch_begin_sent) { + !v->shared->iotlb_batch_begin_sent) { vhost_vdpa_listener_begin_batch(v); } - v->iotlb_batch_begin_sent = true; + v->shared->iotlb_batch_begin_sent = true; } static void vhost_vdpa_listener_commit(MemoryListener *listener) @@ -180,7 +180,7 @@ static void vhost_vdpa_listener_commit(MemoryListener *listener) return; } - if (!v->iotlb_batch_begin_sent) { + if (!v->shared->iotlb_batch_begin_sent) { return; } @@ -193,7 +193,7 @@ static void vhost_vdpa_listener_commit(MemoryListener *listener) fd, errno, strerror(errno)); } - v->iotlb_batch_begin_sent = false; + v->shared->iotlb_batch_begin_sent = false; } static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h index 796a180afa..05219bbcf7 100644 --- a/include/hw/virtio/vhost-vdpa.h +++ b/include/hw/virtio/vhost-vdpa.h @@ -38,6 +38,8 @@ typedef struct vhost_vdpa_shared { /* IOVA mapping used by the Shadow Virtqueue */ VhostIOVATree *iova_tree; + bool iotlb_batch_begin_sent; + /* Vdpa must send shadow addresses as IOTLB key for data queues, not GPA */ bool shadow_data; } VhostVDPAShared; @@ -45,7 +47,6 @@ typedef struct vhost_vdpa_shared { typedef struct vhost_vdpa { int index; uint32_t msg_type; - bool iotlb_batch_begin_sent; uint32_t address_space_id; MemoryListener listener; uint64_t acked_features; From 4da38d1a6d00dd7e1897e4d717c2ebfac1702038 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Thu, 21 Dec 2023 18:43:17 +0100 Subject: [PATCH 16/21] vdpa: move backend_cap to vhost_vdpa_shared MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Next patches will register the vhost_vdpa memory listener while the VM is migrating at the destination, so we can map the memory to the device before stopping the VM at the source. The main goal is to reduce the downtime. However, the destination QEMU is unaware of which vhost_vdpa device will register its memory_listener. If the source guest has CVQ enabled, it will be the CVQ device. Otherwise, it will be the first one. Move the backend_cap member to VhostVDPAShared so all vhost_vdpa can use it, rather than always in the first / last vhost_vdpa. Signed-off-by: Eugenio Pérez Acked-by: Jason Wang Message-Id: <20231221174322.3130442-9-eperezma@redhat.com> Tested-by: Lei Yang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-vdpa.c | 8 +++++--- include/hw/virtio/vhost-vdpa.h | 3 +++ 2 files changed, 8 insertions(+), 3 deletions(-) diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 2ecaedb686..99597c3179 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -161,7 +161,7 @@ static void vhost_vdpa_listener_begin_batch(struct vhost_vdpa *v) static void vhost_vdpa_iotlb_batch_begin_once(struct vhost_vdpa *v) { - if (v->dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH) && + if (v->shared->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH) && !v->shared->iotlb_batch_begin_sent) { vhost_vdpa_listener_begin_batch(v); } @@ -172,11 +172,10 @@ static void vhost_vdpa_iotlb_batch_begin_once(struct vhost_vdpa *v) static void vhost_vdpa_listener_commit(MemoryListener *listener) { struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); - struct vhost_dev *dev = v->dev; struct vhost_msg_v2 msg = {}; int fd = v->shared->device_fd; - if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) { + if (!(v->shared->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) { return; } @@ -838,6 +837,8 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev, static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) { + struct vhost_vdpa *v = dev->opaque; + uint64_t features; uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH | @@ -859,6 +860,7 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) } dev->backend_cap = features; + v->shared->backend_cap = features; return 0; } diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h index 05219bbcf7..11ac14085a 100644 --- a/include/hw/virtio/vhost-vdpa.h +++ b/include/hw/virtio/vhost-vdpa.h @@ -38,6 +38,9 @@ typedef struct vhost_vdpa_shared { /* IOVA mapping used by the Shadow Virtqueue */ VhostIOVATree *iova_tree; + /* Copy of backend features */ + uint64_t backend_cap; + bool iotlb_batch_begin_sent; /* Vdpa must send shadow addresses as IOTLB key for data queues, not GPA */ From 74e76c7d5bf074b3dbb5ec71109d6ba80f21e0a9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Thu, 21 Dec 2023 18:43:18 +0100 Subject: [PATCH 17/21] vdpa: remove msg type of vhost_vdpa MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It is always VHOST_IOTLB_MSG_V2. We can always make it back per vhost_dev if needed. This change makes easier for vhost_vdpa_map and unmap not to depend on vhost_vdpa but only in VhostVDPAShared. Signed-off-by: Eugenio Pérez Acked-by: Jason Wang Message-Id: <20231221174322.3130442-10-eperezma@redhat.com> Tested-by: Lei Yang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-vdpa.c | 9 ++++----- include/hw/virtio/vhost-vdpa.h | 1 - 2 files changed, 4 insertions(+), 6 deletions(-) diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 99597c3179..cbfcf18883 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -93,7 +93,7 @@ int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, int fd = v->shared->device_fd; int ret = 0; - msg.type = v->msg_type; + msg.type = VHOST_IOTLB_MSG_V2; msg.asid = asid; msg.iotlb.iova = iova; msg.iotlb.size = size; @@ -125,7 +125,7 @@ int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, int fd = v->shared->device_fd; int ret = 0; - msg.type = v->msg_type; + msg.type = VHOST_IOTLB_MSG_V2; msg.asid = asid; msg.iotlb.iova = iova; msg.iotlb.size = size; @@ -147,7 +147,7 @@ static void vhost_vdpa_listener_begin_batch(struct vhost_vdpa *v) { int fd = v->shared->device_fd; struct vhost_msg_v2 msg = { - .type = v->msg_type, + .type = VHOST_IOTLB_MSG_V2, .iotlb.type = VHOST_IOTLB_BATCH_BEGIN, }; @@ -183,7 +183,7 @@ static void vhost_vdpa_listener_commit(MemoryListener *listener) return; } - msg.type = v->msg_type; + msg.type = VHOST_IOTLB_MSG_V2; msg.iotlb.type = VHOST_IOTLB_BATCH_END; trace_vhost_vdpa_listener_commit(v->shared, fd, msg.type, msg.iotlb.type); @@ -597,7 +597,6 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) v->dev = dev; dev->opaque = opaque ; v->listener = vhost_vdpa_memory_listener; - v->msg_type = VHOST_IOTLB_MSG_V2; vhost_vdpa_init_svq(dev, v); error_propagate(&dev->migration_blocker, v->migration_blocker); diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h index 11ac14085a..5bd964dac5 100644 --- a/include/hw/virtio/vhost-vdpa.h +++ b/include/hw/virtio/vhost-vdpa.h @@ -49,7 +49,6 @@ typedef struct vhost_vdpa_shared { typedef struct vhost_vdpa { int index; - uint32_t msg_type; uint32_t address_space_id; MemoryListener listener; uint64_t acked_features; From 3c6d12a3b19527c68b6462eaeaa72bbb2db2b02c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Thu, 21 Dec 2023 18:43:19 +0100 Subject: [PATCH 18/21] vdpa: move iommu_list to vhost_vdpa_shared MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Next patches will register the vhost_vdpa memory listener while the VM is migrating at the destination, so we can map the memory to the device before stopping the VM at the source. The main goal is to reduce the downtime. However, the destination QEMU is unaware of which vhost_vdpa device will register its memory_listener. If the source guest has CVQ enabled, it will be the CVQ device. Otherwise, it will be the first one. Move the iommu_list member to VhostVDPAShared so all vhost_vdpa can use it, rather than always in the first / last vhost_vdpa. Signed-off-by: Eugenio Pérez Acked-by: Jason Wang Message-Id: <20231221174322.3130442-11-eperezma@redhat.com> Tested-by: Lei Yang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-vdpa.c | 4 ++-- include/hw/virtio/vhost-vdpa.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index cbfcf18883..a2713b9f0b 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -279,7 +279,7 @@ static void vhost_vdpa_iommu_region_add(MemoryListener *listener, return; } - QLIST_INSERT_HEAD(&v->iommu_list, iommu, iommu_next); + QLIST_INSERT_HEAD(&v->shared->iommu_list, iommu, iommu_next); memory_region_iommu_replay(iommu->iommu_mr, &iommu->n); return; @@ -292,7 +292,7 @@ static void vhost_vdpa_iommu_region_del(MemoryListener *listener, struct vdpa_iommu *iommu; - QLIST_FOREACH(iommu, &v->iommu_list, iommu_next) + QLIST_FOREACH(iommu, &v->shared->iommu_list, iommu_next) { if (MEMORY_REGION(iommu->iommu_mr) == section->mr && iommu->n.start == section->offset_within_region) { diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h index 5bd964dac5..3880b9e7f2 100644 --- a/include/hw/virtio/vhost-vdpa.h +++ b/include/hw/virtio/vhost-vdpa.h @@ -34,6 +34,7 @@ typedef struct VhostVDPAHostNotifier { typedef struct vhost_vdpa_shared { int device_fd; struct vhost_vdpa_iova_range iova_range; + QLIST_HEAD(, vdpa_iommu) iommu_list; /* IOVA mapping used by the Shadow Virtqueue */ VhostIOVATree *iova_tree; @@ -62,7 +63,6 @@ typedef struct vhost_vdpa { struct vhost_dev *dev; Error *migration_blocker; VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX]; - QLIST_HEAD(, vdpa_iommu) iommu_list; IOMMUNotifier n; } VhostVDPA; From 6f03d9ef8acdcdcfaf7ba42654f36ae3c1f1ddbc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Thu, 21 Dec 2023 18:43:20 +0100 Subject: [PATCH 19/21] vdpa: use VhostVDPAShared in vdpa_dma_map and unmap MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The callers only have the shared information by the end of this series. Start converting this functions. Signed-off-by: Eugenio Pérez Acked-by: Jason Wang Message-Id: <20231221174322.3130442-12-eperezma@redhat.com> Tested-by: Lei Yang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-vdpa.c | 50 +++++++++++++++++----------------- include/hw/virtio/vhost-vdpa.h | 4 +-- net/vhost-vdpa.c | 5 ++-- 3 files changed, 30 insertions(+), 29 deletions(-) diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index a2713b9f0b..ada3cc5d7c 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -86,11 +86,11 @@ static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, * The caller must set asid = 0 if the device does not support asid. * This is not an ABI break since it is set to 0 by the initializer anyway. */ -int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, +int vhost_vdpa_dma_map(VhostVDPAShared *s, uint32_t asid, hwaddr iova, hwaddr size, void *vaddr, bool readonly) { struct vhost_msg_v2 msg = {}; - int fd = v->shared->device_fd; + int fd = s->device_fd; int ret = 0; msg.type = VHOST_IOTLB_MSG_V2; @@ -101,7 +101,7 @@ int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW; msg.iotlb.type = VHOST_IOTLB_UPDATE; - trace_vhost_vdpa_dma_map(v->shared, fd, msg.type, msg.asid, msg.iotlb.iova, + trace_vhost_vdpa_dma_map(s, fd, msg.type, msg.asid, msg.iotlb.iova, msg.iotlb.size, msg.iotlb.uaddr, msg.iotlb.perm, msg.iotlb.type); @@ -118,11 +118,11 @@ int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, * The caller must set asid = 0 if the device does not support asid. * This is not an ABI break since it is set to 0 by the initializer anyway. */ -int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, +int vhost_vdpa_dma_unmap(VhostVDPAShared *s, uint32_t asid, hwaddr iova, hwaddr size) { struct vhost_msg_v2 msg = {}; - int fd = v->shared->device_fd; + int fd = s->device_fd; int ret = 0; msg.type = VHOST_IOTLB_MSG_V2; @@ -131,8 +131,8 @@ int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, msg.iotlb.size = size; msg.iotlb.type = VHOST_IOTLB_INVALIDATE; - trace_vhost_vdpa_dma_unmap(v->shared, fd, msg.type, msg.asid, - msg.iotlb.iova, msg.iotlb.size, msg.iotlb.type); + trace_vhost_vdpa_dma_unmap(s, fd, msg.type, msg.asid, msg.iotlb.iova, + msg.iotlb.size, msg.iotlb.type); if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { error_report("failed to write, fd=%d, errno=%d (%s)", @@ -143,30 +143,29 @@ int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, return ret; } -static void vhost_vdpa_listener_begin_batch(struct vhost_vdpa *v) +static void vhost_vdpa_listener_begin_batch(VhostVDPAShared *s) { - int fd = v->shared->device_fd; + int fd = s->device_fd; struct vhost_msg_v2 msg = { .type = VHOST_IOTLB_MSG_V2, .iotlb.type = VHOST_IOTLB_BATCH_BEGIN, }; - trace_vhost_vdpa_listener_begin_batch(v->shared, fd, msg.type, - msg.iotlb.type); + trace_vhost_vdpa_listener_begin_batch(s, fd, msg.type, msg.iotlb.type); if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { error_report("failed to write, fd=%d, errno=%d (%s)", fd, errno, strerror(errno)); } } -static void vhost_vdpa_iotlb_batch_begin_once(struct vhost_vdpa *v) +static void vhost_vdpa_iotlb_batch_begin_once(VhostVDPAShared *s) { - if (v->shared->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH) && - !v->shared->iotlb_batch_begin_sent) { - vhost_vdpa_listener_begin_batch(v); + if (s->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH) && + !s->iotlb_batch_begin_sent) { + vhost_vdpa_listener_begin_batch(s); } - v->shared->iotlb_batch_begin_sent = true; + s->iotlb_batch_begin_sent = true; } static void vhost_vdpa_listener_commit(MemoryListener *listener) @@ -226,7 +225,7 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) if (!memory_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, NULL)) { return; } - ret = vhost_vdpa_dma_map(v, VHOST_VDPA_GUEST_PA_ASID, iova, + ret = vhost_vdpa_dma_map(v->shared, VHOST_VDPA_GUEST_PA_ASID, iova, iotlb->addr_mask + 1, vaddr, read_only); if (ret) { error_report("vhost_vdpa_dma_map(%p, 0x%" HWADDR_PRIx ", " @@ -234,7 +233,7 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) v, iova, iotlb->addr_mask + 1, vaddr, ret); } } else { - ret = vhost_vdpa_dma_unmap(v, VHOST_VDPA_GUEST_PA_ASID, iova, + ret = vhost_vdpa_dma_unmap(v->shared, VHOST_VDPA_GUEST_PA_ASID, iova, iotlb->addr_mask + 1); if (ret) { error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", " @@ -370,8 +369,8 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, iova = mem_region.iova; } - vhost_vdpa_iotlb_batch_begin_once(v); - ret = vhost_vdpa_dma_map(v, VHOST_VDPA_GUEST_PA_ASID, iova, + vhost_vdpa_iotlb_batch_begin_once(v->shared); + ret = vhost_vdpa_dma_map(v->shared, VHOST_VDPA_GUEST_PA_ASID, iova, int128_get64(llsize), vaddr, section->readonly); if (ret) { error_report("vhost vdpa map fail!"); @@ -455,13 +454,13 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, iova = result->iova; vhost_iova_tree_remove(v->shared->iova_tree, *result); } - vhost_vdpa_iotlb_batch_begin_once(v); + vhost_vdpa_iotlb_batch_begin_once(v->shared); /* * The unmap ioctl doesn't accept a full 64-bit. need to check it */ if (int128_eq(llsize, int128_2_64())) { llsize = int128_rshift(llsize, 1); - ret = vhost_vdpa_dma_unmap(v, VHOST_VDPA_GUEST_PA_ASID, iova, + ret = vhost_vdpa_dma_unmap(v->shared, VHOST_VDPA_GUEST_PA_ASID, iova, int128_get64(llsize)); if (ret) { @@ -471,7 +470,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, } iova += int128_get64(llsize); } - ret = vhost_vdpa_dma_unmap(v, VHOST_VDPA_GUEST_PA_ASID, iova, + ret = vhost_vdpa_dma_unmap(v->shared, VHOST_VDPA_GUEST_PA_ASID, iova, int128_get64(llsize)); if (ret) { @@ -1081,7 +1080,8 @@ static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr addr) } size = ROUND_UP(result->size, qemu_real_host_page_size()); - r = vhost_vdpa_dma_unmap(v, v->address_space_id, result->iova, size); + r = vhost_vdpa_dma_unmap(v->shared, v->address_space_id, result->iova, + size); if (unlikely(r < 0)) { error_report("Unable to unmap SVQ vring: %s (%d)", g_strerror(-r), -r); return; @@ -1121,7 +1121,7 @@ static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle, return false; } - r = vhost_vdpa_dma_map(v, v->address_space_id, needle->iova, + r = vhost_vdpa_dma_map(v->shared, v->address_space_id, needle->iova, needle->size + 1, (void *)(uintptr_t)needle->translated_addr, needle->perm == IOMMU_RO); diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h index 3880b9e7f2..705c754776 100644 --- a/include/hw/virtio/vhost-vdpa.h +++ b/include/hw/virtio/vhost-vdpa.h @@ -69,9 +69,9 @@ typedef struct vhost_vdpa { int vhost_vdpa_get_iova_range(int fd, struct vhost_vdpa_iova_range *iova_range); int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx); -int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, +int vhost_vdpa_dma_map(VhostVDPAShared *s, uint32_t asid, hwaddr iova, hwaddr size, void *vaddr, bool readonly); -int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, +int vhost_vdpa_dma_unmap(VhostVDPAShared *s, uint32_t asid, hwaddr iova, hwaddr size); typedef struct vdpa_iommu { diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index 10cf0027de..3726ee5d67 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -471,7 +471,8 @@ static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) return; } - r = vhost_vdpa_dma_unmap(v, v->address_space_id, map->iova, map->size + 1); + r = vhost_vdpa_dma_unmap(v->shared, v->address_space_id, map->iova, + map->size + 1); if (unlikely(r != 0)) { error_report("Device cannot unmap: %s(%d)", g_strerror(r), r); } @@ -495,7 +496,7 @@ static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size, return r; } - r = vhost_vdpa_dma_map(v, v->address_space_id, map.iova, + r = vhost_vdpa_dma_map(v->shared, v->address_space_id, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf, !write); if (unlikely(r < 0)) { goto dma_map_err; From b06a38f2b00d9e35cfdea2c6e0a6c6521db67378 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Thu, 21 Dec 2023 18:43:21 +0100 Subject: [PATCH 20/21] vdpa: use dev_shared in vdpa_iommu MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The memory listener functions can call these too. Make vdpa_iommu work with VhostVDPAShared. Signed-off-by: Eugenio Pérez Acked-by: Jason Wang Message-Id: <20231221174322.3130442-13-eperezma@redhat.com> Tested-by: Lei Yang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-vdpa.c | 16 ++++++++-------- include/hw/virtio/vhost-vdpa.h | 2 +- 2 files changed, 9 insertions(+), 9 deletions(-) diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index ada3cc5d7c..e6276fb1e4 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -199,7 +199,7 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) struct vdpa_iommu *iommu = container_of(n, struct vdpa_iommu, n); hwaddr iova = iotlb->iova + iommu->iommu_offset; - struct vhost_vdpa *v = iommu->dev; + VhostVDPAShared *s = iommu->dev_shared; void *vaddr; int ret; Int128 llend; @@ -212,10 +212,10 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) RCU_READ_LOCK_GUARD(); /* check if RAM section out of device range */ llend = int128_add(int128_makes64(iotlb->addr_mask), int128_makes64(iova)); - if (int128_gt(llend, int128_make64(v->shared->iova_range.last))) { + if (int128_gt(llend, int128_make64(s->iova_range.last))) { error_report("RAM section out of device range (max=0x%" PRIx64 ", end addr=0x%" PRIx64 ")", - v->shared->iova_range.last, int128_get64(llend)); + s->iova_range.last, int128_get64(llend)); return; } @@ -225,20 +225,20 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) if (!memory_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, NULL)) { return; } - ret = vhost_vdpa_dma_map(v->shared, VHOST_VDPA_GUEST_PA_ASID, iova, + ret = vhost_vdpa_dma_map(s, VHOST_VDPA_GUEST_PA_ASID, iova, iotlb->addr_mask + 1, vaddr, read_only); if (ret) { error_report("vhost_vdpa_dma_map(%p, 0x%" HWADDR_PRIx ", " "0x%" HWADDR_PRIx ", %p) = %d (%m)", - v, iova, iotlb->addr_mask + 1, vaddr, ret); + s, iova, iotlb->addr_mask + 1, vaddr, ret); } } else { - ret = vhost_vdpa_dma_unmap(v->shared, VHOST_VDPA_GUEST_PA_ASID, iova, + ret = vhost_vdpa_dma_unmap(s, VHOST_VDPA_GUEST_PA_ASID, iova, iotlb->addr_mask + 1); if (ret) { error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", " "0x%" HWADDR_PRIx ") = %d (%m)", - v, iova, iotlb->addr_mask + 1, ret); + s, iova, iotlb->addr_mask + 1, ret); } } } @@ -270,7 +270,7 @@ static void vhost_vdpa_iommu_region_add(MemoryListener *listener, iommu_idx); iommu->iommu_offset = section->offset_within_address_space - section->offset_within_region; - iommu->dev = v; + iommu->dev_shared = v->shared; ret = memory_region_register_iommu_notifier(section->mr, &iommu->n, NULL); if (ret) { diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h index 705c754776..2abee2164a 100644 --- a/include/hw/virtio/vhost-vdpa.h +++ b/include/hw/virtio/vhost-vdpa.h @@ -75,7 +75,7 @@ int vhost_vdpa_dma_unmap(VhostVDPAShared *s, uint32_t asid, hwaddr iova, hwaddr size); typedef struct vdpa_iommu { - struct vhost_vdpa *dev; + VhostVDPAShared *dev_shared; IOMMUMemoryRegion *iommu_mr; hwaddr iommu_offset; IOMMUNotifier n; From f6fe3e333fe0fcb8ef87c669a3a8f84fbee10cb7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Eugenio=20P=C3=A9rez?= Date: Thu, 21 Dec 2023 18:43:22 +0100 Subject: [PATCH 21/21] vdpa: move memory listener to vhost_vdpa_shared MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Next patches will register the vhost_vdpa memory listener while the VM is migrating at the destination, so we can map the memory to the device before stopping the VM at the source. The main goal is to reduce the downtime. However, the destination QEMU is unaware of which vhost_vdpa device will register its memory_listener. If the source guest has CVQ enabled, it will be the CVQ device. Otherwise, it will be the first one. Move the memory listener to a common place rather than always in the first / last vhost_vdpa. Signed-off-by: Eugenio Pérez Acked-by: Jason Wang Message-Id: <20231221174322.3130442-14-eperezma@redhat.com> Tested-by: Lei Yang Reviewed-by: Michael S. Tsirkin Signed-off-by: Michael S. Tsirkin --- hw/virtio/vhost-vdpa.c | 84 ++++++++++++++++------------------ include/hw/virtio/vhost-vdpa.h | 2 +- 2 files changed, 40 insertions(+), 46 deletions(-) diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index e6276fb1e4..ddae494ca8 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -170,28 +170,28 @@ static void vhost_vdpa_iotlb_batch_begin_once(VhostVDPAShared *s) static void vhost_vdpa_listener_commit(MemoryListener *listener) { - struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); + VhostVDPAShared *s = container_of(listener, VhostVDPAShared, listener); struct vhost_msg_v2 msg = {}; - int fd = v->shared->device_fd; + int fd = s->device_fd; - if (!(v->shared->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) { + if (!(s->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) { return; } - if (!v->shared->iotlb_batch_begin_sent) { + if (!s->iotlb_batch_begin_sent) { return; } msg.type = VHOST_IOTLB_MSG_V2; msg.iotlb.type = VHOST_IOTLB_BATCH_END; - trace_vhost_vdpa_listener_commit(v->shared, fd, msg.type, msg.iotlb.type); + trace_vhost_vdpa_listener_commit(s, fd, msg.type, msg.iotlb.type); if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { error_report("failed to write, fd=%d, errno=%d (%s)", fd, errno, strerror(errno)); } - v->shared->iotlb_batch_begin_sent = false; + s->iotlb_batch_begin_sent = false; } static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) @@ -246,7 +246,7 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) static void vhost_vdpa_iommu_region_add(MemoryListener *listener, MemoryRegionSection *section) { - struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); + VhostVDPAShared *s = container_of(listener, VhostVDPAShared, listener); struct vdpa_iommu *iommu; Int128 end; @@ -270,7 +270,7 @@ static void vhost_vdpa_iommu_region_add(MemoryListener *listener, iommu_idx); iommu->iommu_offset = section->offset_within_address_space - section->offset_within_region; - iommu->dev_shared = v->shared; + iommu->dev_shared = s; ret = memory_region_register_iommu_notifier(section->mr, &iommu->n, NULL); if (ret) { @@ -278,7 +278,7 @@ static void vhost_vdpa_iommu_region_add(MemoryListener *listener, return; } - QLIST_INSERT_HEAD(&v->shared->iommu_list, iommu, iommu_next); + QLIST_INSERT_HEAD(&s->iommu_list, iommu, iommu_next); memory_region_iommu_replay(iommu->iommu_mr, &iommu->n); return; @@ -287,11 +287,11 @@ static void vhost_vdpa_iommu_region_add(MemoryListener *listener, static void vhost_vdpa_iommu_region_del(MemoryListener *listener, MemoryRegionSection *section) { - struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); + VhostVDPAShared *s = container_of(listener, VhostVDPAShared, listener); struct vdpa_iommu *iommu; - QLIST_FOREACH(iommu, &v->shared->iommu_list, iommu_next) + QLIST_FOREACH(iommu, &s->iommu_list, iommu_next) { if (MEMORY_REGION(iommu->iommu_mr) == section->mr && iommu->n.start == section->offset_within_region) { @@ -307,7 +307,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, MemoryRegionSection *section) { DMAMap mem_region = {}; - struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); + VhostVDPAShared *s = container_of(listener, VhostVDPAShared, listener); hwaddr iova; Int128 llend, llsize; void *vaddr; @@ -315,10 +315,8 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, int page_size = qemu_target_page_size(); int page_mask = -page_size; - if (vhost_vdpa_listener_skipped_section(section, - v->shared->iova_range.first, - v->shared->iova_range.last, - page_mask)) { + if (vhost_vdpa_listener_skipped_section(section, s->iova_range.first, + s->iova_range.last, page_mask)) { return; } if (memory_region_is_iommu(section->mr)) { @@ -328,8 +326,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, if (unlikely((section->offset_within_address_space & ~page_mask) != (section->offset_within_region & ~page_mask))) { - trace_vhost_vdpa_listener_region_add_unaligned(v->shared, - section->mr->name, + trace_vhost_vdpa_listener_region_add_unaligned(s, section->mr->name, section->offset_within_address_space & ~page_mask, section->offset_within_region & ~page_mask); return; @@ -349,18 +346,18 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, section->offset_within_region + (iova - section->offset_within_address_space); - trace_vhost_vdpa_listener_region_add(v->shared, iova, int128_get64(llend), + trace_vhost_vdpa_listener_region_add(s, iova, int128_get64(llend), vaddr, section->readonly); llsize = int128_sub(llend, int128_make64(iova)); - if (v->shared->shadow_data) { + if (s->shadow_data) { int r; mem_region.translated_addr = (hwaddr)(uintptr_t)vaddr, mem_region.size = int128_get64(llsize) - 1, mem_region.perm = IOMMU_ACCESS_FLAG(true, section->readonly), - r = vhost_iova_tree_map_alloc(v->shared->iova_tree, &mem_region); + r = vhost_iova_tree_map_alloc(s->iova_tree, &mem_region); if (unlikely(r != IOVA_OK)) { error_report("Can't allocate a mapping (%d)", r); goto fail; @@ -369,8 +366,8 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, iova = mem_region.iova; } - vhost_vdpa_iotlb_batch_begin_once(v->shared); - ret = vhost_vdpa_dma_map(v->shared, VHOST_VDPA_GUEST_PA_ASID, iova, + vhost_vdpa_iotlb_batch_begin_once(s); + ret = vhost_vdpa_dma_map(s, VHOST_VDPA_GUEST_PA_ASID, iova, int128_get64(llsize), vaddr, section->readonly); if (ret) { error_report("vhost vdpa map fail!"); @@ -380,8 +377,8 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, return; fail_map: - if (v->shared->shadow_data) { - vhost_iova_tree_remove(v->shared->iova_tree, mem_region); + if (s->shadow_data) { + vhost_iova_tree_remove(s->iova_tree, mem_region); } fail: @@ -398,17 +395,15 @@ fail: static void vhost_vdpa_listener_region_del(MemoryListener *listener, MemoryRegionSection *section) { - struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); + VhostVDPAShared *s = container_of(listener, VhostVDPAShared, listener); hwaddr iova; Int128 llend, llsize; int ret; int page_size = qemu_target_page_size(); int page_mask = -page_size; - if (vhost_vdpa_listener_skipped_section(section, - v->shared->iova_range.first, - v->shared->iova_range.last, - page_mask)) { + if (vhost_vdpa_listener_skipped_section(section, s->iova_range.first, + s->iova_range.last, page_mask)) { return; } if (memory_region_is_iommu(section->mr)) { @@ -417,8 +412,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, if (unlikely((section->offset_within_address_space & ~page_mask) != (section->offset_within_region & ~page_mask))) { - trace_vhost_vdpa_listener_region_del_unaligned(v->shared, - section->mr->name, + trace_vhost_vdpa_listener_region_del_unaligned(s, section->mr->name, section->offset_within_address_space & ~page_mask, section->offset_within_region & ~page_mask); return; @@ -427,7 +421,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, iova = ROUND_UP(section->offset_within_address_space, page_size); llend = vhost_vdpa_section_end(section, page_mask); - trace_vhost_vdpa_listener_region_del(v->shared, iova, + trace_vhost_vdpa_listener_region_del(s, iova, int128_get64(int128_sub(llend, int128_one()))); if (int128_ge(int128_make64(iova), llend)) { @@ -436,7 +430,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, llsize = int128_sub(llend, int128_make64(iova)); - if (v->shared->shadow_data) { + if (s->shadow_data) { const DMAMap *result; const void *vaddr = memory_region_get_ram_ptr(section->mr) + section->offset_within_region + @@ -446,37 +440,37 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, .size = int128_get64(llsize) - 1, }; - result = vhost_iova_tree_find_iova(v->shared->iova_tree, &mem_region); + result = vhost_iova_tree_find_iova(s->iova_tree, &mem_region); if (!result) { /* The memory listener map wasn't mapped */ return; } iova = result->iova; - vhost_iova_tree_remove(v->shared->iova_tree, *result); + vhost_iova_tree_remove(s->iova_tree, *result); } - vhost_vdpa_iotlb_batch_begin_once(v->shared); + vhost_vdpa_iotlb_batch_begin_once(s); /* * The unmap ioctl doesn't accept a full 64-bit. need to check it */ if (int128_eq(llsize, int128_2_64())) { llsize = int128_rshift(llsize, 1); - ret = vhost_vdpa_dma_unmap(v->shared, VHOST_VDPA_GUEST_PA_ASID, iova, + ret = vhost_vdpa_dma_unmap(s, VHOST_VDPA_GUEST_PA_ASID, iova, int128_get64(llsize)); if (ret) { error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", " "0x%" HWADDR_PRIx ") = %d (%m)", - v, iova, int128_get64(llsize), ret); + s, iova, int128_get64(llsize), ret); } iova += int128_get64(llsize); } - ret = vhost_vdpa_dma_unmap(v->shared, VHOST_VDPA_GUEST_PA_ASID, iova, + ret = vhost_vdpa_dma_unmap(s, VHOST_VDPA_GUEST_PA_ASID, iova, int128_get64(llsize)); if (ret) { error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", " "0x%" HWADDR_PRIx ") = %d (%m)", - v, iova, int128_get64(llsize), ret); + s, iova, int128_get64(llsize), ret); } memory_region_unref(section->mr); @@ -595,7 +589,7 @@ static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) v->dev = dev; dev->opaque = opaque ; - v->listener = vhost_vdpa_memory_listener; + v->shared->listener = vhost_vdpa_memory_listener; vhost_vdpa_init_svq(dev, v); error_propagate(&dev->migration_blocker, v->migration_blocker); @@ -755,10 +749,10 @@ static int vhost_vdpa_cleanup(struct vhost_dev *dev) trace_vhost_vdpa_cleanup(dev, v); if (vhost_vdpa_first_dev(dev)) { ram_block_discard_disable(false); + memory_listener_unregister(&v->shared->listener); } vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); - memory_listener_unregister(&v->listener); vhost_vdpa_svq_cleanup(dev); dev->opaque = NULL; @@ -1331,7 +1325,7 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) "IOMMU and try again"); return -1; } - memory_listener_register(&v->listener, dev->vdev->dma_as); + memory_listener_register(&v->shared->listener, dev->vdev->dma_as); return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); } @@ -1350,7 +1344,7 @@ static void vhost_vdpa_reset_status(struct vhost_dev *dev) vhost_vdpa_reset_device(dev); vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER); - memory_listener_unregister(&v->listener); + memory_listener_unregister(&v->shared->listener); } static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h index 2abee2164a..8f54e5edd4 100644 --- a/include/hw/virtio/vhost-vdpa.h +++ b/include/hw/virtio/vhost-vdpa.h @@ -33,6 +33,7 @@ typedef struct VhostVDPAHostNotifier { /* Info shared by all vhost_vdpa device models */ typedef struct vhost_vdpa_shared { int device_fd; + MemoryListener listener; struct vhost_vdpa_iova_range iova_range; QLIST_HEAD(, vdpa_iommu) iommu_list; @@ -51,7 +52,6 @@ typedef struct vhost_vdpa_shared { typedef struct vhost_vdpa { int index; uint32_t address_space_id; - MemoryListener listener; uint64_t acked_features; bool shadow_vqs_enabled; /* Device suspended successfully */