diff --git a/hw/acpi/cpu_hotplug.c b/hw/acpi/cpu_hotplug.c index 634bbecb31..6f78db0ccb 100644 --- a/hw/acpi/cpu_hotplug.c +++ b/hw/acpi/cpu_hotplug.c @@ -59,7 +59,8 @@ static const MemoryRegionOps AcpiCpuHotplug_ops = { }, }; -static void acpi_set_cpu_present_bit(AcpiCpuHotplug *g, CPUState *cpu) +static void acpi_set_cpu_present_bit(AcpiCpuHotplug *g, CPUState *cpu, + bool *swtchd_to_modern) { CPUClass *k = CPU_GET_CLASS(cpu); int64_t cpu_id; @@ -68,23 +69,34 @@ static void acpi_set_cpu_present_bit(AcpiCpuHotplug *g, CPUState *cpu) if ((cpu_id / 8) >= ACPI_GPE_PROC_LEN) { object_property_set_bool(g->device, "cpu-hotplug-legacy", false, &error_abort); + *swtchd_to_modern = true; return; } + *swtchd_to_modern = false; g->sts[cpu_id / 8] |= (1 << (cpu_id % 8)); } void legacy_acpi_cpu_plug_cb(HotplugHandler *hotplug_dev, AcpiCpuHotplug *g, DeviceState *dev, Error **errp) { - acpi_set_cpu_present_bit(g, CPU(dev)); - acpi_send_event(DEVICE(hotplug_dev), ACPI_CPU_HOTPLUG_STATUS); + bool swtchd_to_modern; + Error *local_err = NULL; + + acpi_set_cpu_present_bit(g, CPU(dev), &swtchd_to_modern); + if (swtchd_to_modern) { + /* propagate the hotplug to the modern interface */ + hotplug_handler_plug(hotplug_dev, dev, &local_err); + } else { + acpi_send_event(DEVICE(hotplug_dev), ACPI_CPU_HOTPLUG_STATUS); + } } void legacy_acpi_cpu_hotplug_init(MemoryRegion *parent, Object *owner, AcpiCpuHotplug *gpe_cpu, uint16_t base) { CPUState *cpu; + bool swtchd_to_modern; memory_region_init_io(&gpe_cpu->io, owner, &AcpiCpuHotplug_ops, gpe_cpu, "acpi-cpu-hotplug", ACPI_GPE_PROC_LEN); @@ -92,7 +104,7 @@ void legacy_acpi_cpu_hotplug_init(MemoryRegion *parent, Object *owner, gpe_cpu->device = owner; CPU_FOREACH(cpu) { - acpi_set_cpu_present_bit(gpe_cpu, cpu); + acpi_set_cpu_present_bit(gpe_cpu, cpu, &swtchd_to_modern); } } diff --git a/hw/display/virtio-gpu-base.c b/hw/display/virtio-gpu-base.c index 37af256219..4fc7ef8896 100644 --- a/hw/display/virtio-gpu-base.c +++ b/hw/display/virtio-gpu-base.c @@ -251,7 +251,11 @@ void virtio_gpu_base_device_unrealize(DeviceState *qdev) { VirtIOGPUBase *g = VIRTIO_GPU_BASE(qdev); + VirtIODevice *vdev = VIRTIO_DEVICE(qdev); + virtio_del_queue(vdev, 0); + virtio_del_queue(vdev, 1); + virtio_cleanup(vdev); migrate_del_blocker(&g->migration_blocker); } diff --git a/hw/scsi/vhost-scsi.c b/hw/scsi/vhost-scsi.c index 3126df9e1d..6159eb6fec 100644 --- a/hw/scsi/vhost-scsi.c +++ b/hw/scsi/vhost-scsi.c @@ -91,13 +91,13 @@ static int vhost_scsi_start(VHostSCSI *s) ret = vhost_scsi_common_start(vsc, &local_err); if (ret < 0) { - error_reportf_err(local_err, "Error starting vhost-scsi"); + error_reportf_err(local_err, "Error starting vhost-scsi: "); return ret; } ret = vhost_scsi_set_endpoint(s); if (ret < 0) { - error_reportf_err(local_err, "Error setting vhost-scsi endpoint"); + error_report("Error setting vhost-scsi endpoint"); vhost_scsi_common_stop(vsc); } @@ -165,6 +165,59 @@ static const VMStateDescription vmstate_virtio_vhost_scsi = { .pre_save = vhost_scsi_pre_save, }; +static int vhost_scsi_set_workers(VHostSCSICommon *vsc, bool per_virtqueue) +{ + struct vhost_dev *dev = &vsc->dev; + struct vhost_vring_worker vq_worker; + struct vhost_worker_state worker; + int i, ret; + + /* Use default worker */ + if (!per_virtqueue || dev->nvqs == VHOST_SCSI_VQ_NUM_FIXED + 1) { + return 0; + } + + /* + * ctl/evt share the first worker since it will be rare for them + * to send cmds while IO is running. + */ + for (i = VHOST_SCSI_VQ_NUM_FIXED + 1; i < dev->nvqs; i++) { + memset(&worker, 0, sizeof(worker)); + + ret = dev->vhost_ops->vhost_new_worker(dev, &worker); + if (ret == -ENOTTY) { + /* + * worker ioctls are not implemented so just ignore and + * and continue device setup. + */ + warn_report("vhost-scsi: Backend supports a single worker. " + "Ignoring worker_per_virtqueue=true setting."); + ret = 0; + break; + } else if (ret) { + break; + } + + memset(&vq_worker, 0, sizeof(vq_worker)); + vq_worker.worker_id = worker.worker_id; + vq_worker.index = i; + + ret = dev->vhost_ops->vhost_attach_vring_worker(dev, &vq_worker); + if (ret == -ENOTTY) { + /* + * It's a bug for the kernel to have supported the worker creation + * ioctl but not attach. + */ + dev->vhost_ops->vhost_free_worker(dev, &worker); + break; + } else if (ret) { + break; + } + } + + return ret; +} + static void vhost_scsi_realize(DeviceState *dev, Error **errp) { VirtIOSCSICommon *vs = VIRTIO_SCSI_COMMON(dev); @@ -232,6 +285,13 @@ static void vhost_scsi_realize(DeviceState *dev, Error **errp) goto free_vqs; } + ret = vhost_scsi_set_workers(vsc, vs->conf.worker_per_virtqueue); + if (ret < 0) { + error_setg(errp, "vhost-scsi: vhost worker setup failed: %s", + strerror(-ret)); + goto free_vqs; + } + /* At present, channel and lun both are 0 for bootable vhost-scsi disk */ vsc->channel = 0; vsc->lun = 0; @@ -297,6 +357,8 @@ static Property vhost_scsi_properties[] = { VIRTIO_SCSI_F_T10_PI, false), DEFINE_PROP_BOOL("migratable", VHostSCSICommon, migratable, false), + DEFINE_PROP_BOOL("worker_per_virtqueue", VirtIOSCSICommon, + conf.worker_per_virtqueue, false), DEFINE_PROP_END_OF_LIST(), }; diff --git a/hw/scsi/vhost-user-scsi.c b/hw/scsi/vhost-user-scsi.c index 780f10559d..af18c4f3d3 100644 --- a/hw/scsi/vhost-user-scsi.c +++ b/hw/scsi/vhost-user-scsi.c @@ -83,7 +83,8 @@ static void vhost_user_scsi_set_status(VirtIODevice *vdev, uint8_t status) if (should_start) { ret = vhost_user_scsi_start(s, &local_err); if (ret < 0) { - error_reportf_err(local_err, "unable to start vhost-user-scsi: %s", + error_reportf_err(local_err, + "unable to start vhost-user-scsi: %s: ", strerror(-ret)); qemu_chr_fe_disconnect(&vs->conf.chardev); } diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index 637cac4edf..77905d1994 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -30,16 +30,16 @@ vhost_user_write(uint32_t req, uint32_t flags) "req:%d flags:0x%"PRIx32"" vhost_user_create_notifier(int idx, void *n) "idx:%d n:%p" # vhost-vdpa.c -vhost_vdpa_dma_map(void *vdpa, int fd, uint32_t msg_type, uint32_t asid, uint64_t iova, uint64_t size, uint64_t uaddr, uint8_t perm, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" asid: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" uaddr: 0x%"PRIx64" perm: 0x%"PRIx8" type: %"PRIu8 -vhost_vdpa_dma_unmap(void *vdpa, int fd, uint32_t msg_type, uint32_t asid, uint64_t iova, uint64_t size, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" asid: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" type: %"PRIu8 -vhost_vdpa_listener_begin_batch(void *v, int fd, uint32_t msg_type, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8 -vhost_vdpa_listener_commit(void *v, int fd, uint32_t msg_type, uint8_t type) "vdpa:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8 -vhost_vdpa_listener_region_add_unaligned(void *v, const char *name, uint64_t offset_as, uint64_t offset_page) "vdpa: %p region %s offset_within_address_space %"PRIu64" offset_within_region %"PRIu64 +vhost_vdpa_dma_map(void *vdpa, int fd, uint32_t msg_type, uint32_t asid, uint64_t iova, uint64_t size, uint64_t uaddr, uint8_t perm, uint8_t type) "vdpa_shared:%p fd: %d msg_type: %"PRIu32" asid: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" uaddr: 0x%"PRIx64" perm: 0x%"PRIx8" type: %"PRIu8 +vhost_vdpa_dma_unmap(void *vdpa, int fd, uint32_t msg_type, uint32_t asid, uint64_t iova, uint64_t size, uint8_t type) "vdpa_shared:%p fd: %d msg_type: %"PRIu32" asid: %"PRIu32" iova: 0x%"PRIx64" size: 0x%"PRIx64" type: %"PRIu8 +vhost_vdpa_listener_begin_batch(void *v, int fd, uint32_t msg_type, uint8_t type) "vdpa_shared:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8 +vhost_vdpa_listener_commit(void *v, int fd, uint32_t msg_type, uint8_t type) "vdpa_shared:%p fd: %d msg_type: %"PRIu32" type: %"PRIu8 +vhost_vdpa_listener_region_add_unaligned(void *v, const char *name, uint64_t offset_as, uint64_t offset_page) "vdpa_shared: %p region %s offset_within_address_space %"PRIu64" offset_within_region %"PRIu64 vhost_vdpa_listener_region_add(void *vdpa, uint64_t iova, uint64_t llend, void *vaddr, bool readonly) "vdpa: %p iova 0x%"PRIx64" llend 0x%"PRIx64" vaddr: %p read-only: %d" -vhost_vdpa_listener_region_del_unaligned(void *v, const char *name, uint64_t offset_as, uint64_t offset_page) "vdpa: %p region %s offset_within_address_space %"PRIu64" offset_within_region %"PRIu64 +vhost_vdpa_listener_region_del_unaligned(void *v, const char *name, uint64_t offset_as, uint64_t offset_page) "vdpa_shared: %p region %s offset_within_address_space %"PRIu64" offset_within_region %"PRIu64 vhost_vdpa_listener_region_del(void *vdpa, uint64_t iova, uint64_t llend) "vdpa: %p iova 0x%"PRIx64" llend 0x%"PRIx64 vhost_vdpa_add_status(void *dev, uint8_t status) "dev: %p status: 0x%"PRIx8 -vhost_vdpa_init(void *dev, void *vdpa) "dev: %p vdpa: %p" +vhost_vdpa_init(void *dev, void *s, void *vdpa) "dev: %p, common dev: %p vdpa: %p" vhost_vdpa_cleanup(void *dev, void *vdpa) "dev: %p vdpa: %p" vhost_vdpa_memslots_limit(void *dev, int ret) "dev: %p = 0x%x" vhost_vdpa_set_mem_table(void *dev, uint32_t nregions, uint32_t padding) "dev: %p nregions: %"PRIu32" padding: 0x%"PRIx32 diff --git a/hw/virtio/vdpa-dev.c b/hw/virtio/vdpa-dev.c index f22d5d5bc0..8774986571 100644 --- a/hw/virtio/vdpa-dev.c +++ b/hw/virtio/vdpa-dev.c @@ -66,7 +66,6 @@ static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) if (*errp) { return; } - v->vdpa.device_fd = v->vhostfd; v->vdev_id = vhost_vdpa_device_get_u32(v->vhostfd, VHOST_VDPA_GET_DEVICE_ID, errp); @@ -114,7 +113,9 @@ static void vhost_vdpa_device_realize(DeviceState *dev, Error **errp) strerror(-ret)); goto free_vqs; } - v->vdpa.iova_range = iova_range; + v->vdpa.shared = g_new0(VhostVDPAShared, 1); + v->vdpa.shared->device_fd = v->vhostfd; + v->vdpa.shared->iova_range = iova_range; ret = vhost_dev_init(&v->dev, &v->vdpa, VHOST_BACKEND_TYPE_VDPA, 0, NULL); if (ret < 0) { @@ -162,6 +163,7 @@ vhost_cleanup: vhost_dev_cleanup(&v->dev); free_vqs: g_free(vqs); + g_free(v->vdpa.shared); out: qemu_close(v->vhostfd); v->vhostfd = -1; @@ -184,6 +186,7 @@ static void vhost_vdpa_device_unrealize(DeviceState *dev) g_free(s->config); g_free(s->dev.vqs); vhost_dev_cleanup(&s->dev); + g_free(s->vdpa.shared); qemu_close(s->vhostfd); s->vhostfd = -1; } diff --git a/hw/virtio/vhost-backend.c b/hw/virtio/vhost-backend.c index 17f3fc6a08..833804dd40 100644 --- a/hw/virtio/vhost-backend.c +++ b/hw/virtio/vhost-backend.c @@ -158,6 +158,30 @@ static int vhost_kernel_set_vring_busyloop_timeout(struct vhost_dev *dev, return vhost_kernel_call(dev, VHOST_SET_VRING_BUSYLOOP_TIMEOUT, s); } +static int vhost_kernel_new_worker(struct vhost_dev *dev, + struct vhost_worker_state *worker) +{ + return vhost_kernel_call(dev, VHOST_NEW_WORKER, worker); +} + +static int vhost_kernel_free_worker(struct vhost_dev *dev, + struct vhost_worker_state *worker) +{ + return vhost_kernel_call(dev, VHOST_FREE_WORKER, worker); +} + +static int vhost_kernel_attach_vring_worker(struct vhost_dev *dev, + struct vhost_vring_worker *worker) +{ + return vhost_kernel_call(dev, VHOST_ATTACH_VRING_WORKER, worker); +} + +static int vhost_kernel_get_vring_worker(struct vhost_dev *dev, + struct vhost_vring_worker *worker) +{ + return vhost_kernel_call(dev, VHOST_GET_VRING_WORKER, worker); +} + static int vhost_kernel_set_features(struct vhost_dev *dev, uint64_t features) { @@ -313,6 +337,10 @@ const VhostOps kernel_ops = { .vhost_set_vring_err = vhost_kernel_set_vring_err, .vhost_set_vring_busyloop_timeout = vhost_kernel_set_vring_busyloop_timeout, + .vhost_get_vring_worker = vhost_kernel_get_vring_worker, + .vhost_attach_vring_worker = vhost_kernel_attach_vring_worker, + .vhost_new_worker = vhost_kernel_new_worker, + .vhost_free_worker = vhost_kernel_free_worker, .vhost_set_features = vhost_kernel_set_features, .vhost_get_features = vhost_kernel_get_features, .vhost_set_backend_cap = vhost_kernel_set_backend_cap, diff --git a/hw/virtio/vhost-user-rng.c b/hw/virtio/vhost-user-rng.c index efc54cd3fb..24ac1a22c8 100644 --- a/hw/virtio/vhost-user-rng.c +++ b/hw/virtio/vhost-user-rng.c @@ -129,6 +129,14 @@ static void vu_rng_guest_notifier_mask(VirtIODevice *vdev, int idx, bool mask) { VHostUserRNG *rng = VHOST_USER_RNG(vdev); + /* + * We don't support interrupts, return early if index is set to + * VIRTIO_CONFIG_IRQ_IDX. + */ + if (idx == VIRTIO_CONFIG_IRQ_IDX) { + return; + } + vhost_virtqueue_mask(&rng->vhost_dev, vdev, idx, mask); } @@ -136,6 +144,14 @@ static bool vu_rng_guest_notifier_pending(VirtIODevice *vdev, int idx) { VHostUserRNG *rng = VHOST_USER_RNG(vdev); + /* + * We don't support interrupts, return early if index is set to + * VIRTIO_CONFIG_IRQ_IDX. + */ + if (idx == VIRTIO_CONFIG_IRQ_IDX) { + return false; + } + return vhost_virtqueue_pending(&rng->vhost_dev, idx); } diff --git a/hw/virtio/vhost-vdpa.c b/hw/virtio/vhost-vdpa.c index 819b2d811a..ddae494ca8 100644 --- a/hw/virtio/vhost-vdpa.c +++ b/hw/virtio/vhost-vdpa.c @@ -86,14 +86,14 @@ static bool vhost_vdpa_listener_skipped_section(MemoryRegionSection *section, * The caller must set asid = 0 if the device does not support asid. * This is not an ABI break since it is set to 0 by the initializer anyway. */ -int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, +int vhost_vdpa_dma_map(VhostVDPAShared *s, uint32_t asid, hwaddr iova, hwaddr size, void *vaddr, bool readonly) { struct vhost_msg_v2 msg = {}; - int fd = v->device_fd; + int fd = s->device_fd; int ret = 0; - msg.type = v->msg_type; + msg.type = VHOST_IOTLB_MSG_V2; msg.asid = asid; msg.iotlb.iova = iova; msg.iotlb.size = size; @@ -101,7 +101,7 @@ int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, msg.iotlb.perm = readonly ? VHOST_ACCESS_RO : VHOST_ACCESS_RW; msg.iotlb.type = VHOST_IOTLB_UPDATE; - trace_vhost_vdpa_dma_map(v, fd, msg.type, msg.asid, msg.iotlb.iova, + trace_vhost_vdpa_dma_map(s, fd, msg.type, msg.asid, msg.iotlb.iova, msg.iotlb.size, msg.iotlb.uaddr, msg.iotlb.perm, msg.iotlb.type); @@ -118,20 +118,20 @@ int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, * The caller must set asid = 0 if the device does not support asid. * This is not an ABI break since it is set to 0 by the initializer anyway. */ -int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, +int vhost_vdpa_dma_unmap(VhostVDPAShared *s, uint32_t asid, hwaddr iova, hwaddr size) { struct vhost_msg_v2 msg = {}; - int fd = v->device_fd; + int fd = s->device_fd; int ret = 0; - msg.type = v->msg_type; + msg.type = VHOST_IOTLB_MSG_V2; msg.asid = asid; msg.iotlb.iova = iova; msg.iotlb.size = size; msg.iotlb.type = VHOST_IOTLB_INVALIDATE; - trace_vhost_vdpa_dma_unmap(v, fd, msg.type, msg.asid, msg.iotlb.iova, + trace_vhost_vdpa_dma_unmap(s, fd, msg.type, msg.asid, msg.iotlb.iova, msg.iotlb.size, msg.iotlb.type); if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { @@ -143,56 +143,55 @@ int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, return ret; } -static void vhost_vdpa_listener_begin_batch(struct vhost_vdpa *v) +static void vhost_vdpa_listener_begin_batch(VhostVDPAShared *s) { - int fd = v->device_fd; + int fd = s->device_fd; struct vhost_msg_v2 msg = { - .type = v->msg_type, + .type = VHOST_IOTLB_MSG_V2, .iotlb.type = VHOST_IOTLB_BATCH_BEGIN, }; - trace_vhost_vdpa_listener_begin_batch(v, fd, msg.type, msg.iotlb.type); + trace_vhost_vdpa_listener_begin_batch(s, fd, msg.type, msg.iotlb.type); if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { error_report("failed to write, fd=%d, errno=%d (%s)", fd, errno, strerror(errno)); } } -static void vhost_vdpa_iotlb_batch_begin_once(struct vhost_vdpa *v) +static void vhost_vdpa_iotlb_batch_begin_once(VhostVDPAShared *s) { - if (v->dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH) && - !v->iotlb_batch_begin_sent) { - vhost_vdpa_listener_begin_batch(v); + if (s->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH) && + !s->iotlb_batch_begin_sent) { + vhost_vdpa_listener_begin_batch(s); } - v->iotlb_batch_begin_sent = true; + s->iotlb_batch_begin_sent = true; } static void vhost_vdpa_listener_commit(MemoryListener *listener) { - struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); - struct vhost_dev *dev = v->dev; + VhostVDPAShared *s = container_of(listener, VhostVDPAShared, listener); struct vhost_msg_v2 msg = {}; - int fd = v->device_fd; + int fd = s->device_fd; - if (!(dev->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) { + if (!(s->backend_cap & (0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH))) { return; } - if (!v->iotlb_batch_begin_sent) { + if (!s->iotlb_batch_begin_sent) { return; } - msg.type = v->msg_type; + msg.type = VHOST_IOTLB_MSG_V2; msg.iotlb.type = VHOST_IOTLB_BATCH_END; - trace_vhost_vdpa_listener_commit(v, fd, msg.type, msg.iotlb.type); + trace_vhost_vdpa_listener_commit(s, fd, msg.type, msg.iotlb.type); if (write(fd, &msg, sizeof(msg)) != sizeof(msg)) { error_report("failed to write, fd=%d, errno=%d (%s)", fd, errno, strerror(errno)); } - v->iotlb_batch_begin_sent = false; + s->iotlb_batch_begin_sent = false; } static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) @@ -200,7 +199,7 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) struct vdpa_iommu *iommu = container_of(n, struct vdpa_iommu, n); hwaddr iova = iotlb->iova + iommu->iommu_offset; - struct vhost_vdpa *v = iommu->dev; + VhostVDPAShared *s = iommu->dev_shared; void *vaddr; int ret; Int128 llend; @@ -213,10 +212,10 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) RCU_READ_LOCK_GUARD(); /* check if RAM section out of device range */ llend = int128_add(int128_makes64(iotlb->addr_mask), int128_makes64(iova)); - if (int128_gt(llend, int128_make64(v->iova_range.last))) { + if (int128_gt(llend, int128_make64(s->iova_range.last))) { error_report("RAM section out of device range (max=0x%" PRIx64 ", end addr=0x%" PRIx64 ")", - v->iova_range.last, int128_get64(llend)); + s->iova_range.last, int128_get64(llend)); return; } @@ -226,20 +225,20 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) if (!memory_get_xlat_addr(iotlb, &vaddr, NULL, &read_only, NULL)) { return; } - ret = vhost_vdpa_dma_map(v, VHOST_VDPA_GUEST_PA_ASID, iova, + ret = vhost_vdpa_dma_map(s, VHOST_VDPA_GUEST_PA_ASID, iova, iotlb->addr_mask + 1, vaddr, read_only); if (ret) { error_report("vhost_vdpa_dma_map(%p, 0x%" HWADDR_PRIx ", " "0x%" HWADDR_PRIx ", %p) = %d (%m)", - v, iova, iotlb->addr_mask + 1, vaddr, ret); + s, iova, iotlb->addr_mask + 1, vaddr, ret); } } else { - ret = vhost_vdpa_dma_unmap(v, VHOST_VDPA_GUEST_PA_ASID, iova, + ret = vhost_vdpa_dma_unmap(s, VHOST_VDPA_GUEST_PA_ASID, iova, iotlb->addr_mask + 1); if (ret) { error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", " "0x%" HWADDR_PRIx ") = %d (%m)", - v, iova, iotlb->addr_mask + 1, ret); + s, iova, iotlb->addr_mask + 1, ret); } } } @@ -247,7 +246,7 @@ static void vhost_vdpa_iommu_map_notify(IOMMUNotifier *n, IOMMUTLBEntry *iotlb) static void vhost_vdpa_iommu_region_add(MemoryListener *listener, MemoryRegionSection *section) { - struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); + VhostVDPAShared *s = container_of(listener, VhostVDPAShared, listener); struct vdpa_iommu *iommu; Int128 end; @@ -271,7 +270,7 @@ static void vhost_vdpa_iommu_region_add(MemoryListener *listener, iommu_idx); iommu->iommu_offset = section->offset_within_address_space - section->offset_within_region; - iommu->dev = v; + iommu->dev_shared = s; ret = memory_region_register_iommu_notifier(section->mr, &iommu->n, NULL); if (ret) { @@ -279,7 +278,7 @@ static void vhost_vdpa_iommu_region_add(MemoryListener *listener, return; } - QLIST_INSERT_HEAD(&v->iommu_list, iommu, iommu_next); + QLIST_INSERT_HEAD(&s->iommu_list, iommu, iommu_next); memory_region_iommu_replay(iommu->iommu_mr, &iommu->n); return; @@ -288,11 +287,11 @@ static void vhost_vdpa_iommu_region_add(MemoryListener *listener, static void vhost_vdpa_iommu_region_del(MemoryListener *listener, MemoryRegionSection *section) { - struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); + VhostVDPAShared *s = container_of(listener, VhostVDPAShared, listener); struct vdpa_iommu *iommu; - QLIST_FOREACH(iommu, &v->iommu_list, iommu_next) + QLIST_FOREACH(iommu, &s->iommu_list, iommu_next) { if (MEMORY_REGION(iommu->iommu_mr) == section->mr && iommu->n.start == section->offset_within_region) { @@ -308,7 +307,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, MemoryRegionSection *section) { DMAMap mem_region = {}; - struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); + VhostVDPAShared *s = container_of(listener, VhostVDPAShared, listener); hwaddr iova; Int128 llend, llsize; void *vaddr; @@ -316,8 +315,8 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, int page_size = qemu_target_page_size(); int page_mask = -page_size; - if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first, - v->iova_range.last, page_mask)) { + if (vhost_vdpa_listener_skipped_section(section, s->iova_range.first, + s->iova_range.last, page_mask)) { return; } if (memory_region_is_iommu(section->mr)) { @@ -327,7 +326,7 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, if (unlikely((section->offset_within_address_space & ~page_mask) != (section->offset_within_region & ~page_mask))) { - trace_vhost_vdpa_listener_region_add_unaligned(v, section->mr->name, + trace_vhost_vdpa_listener_region_add_unaligned(s, section->mr->name, section->offset_within_address_space & ~page_mask, section->offset_within_region & ~page_mask); return; @@ -347,18 +346,18 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, section->offset_within_region + (iova - section->offset_within_address_space); - trace_vhost_vdpa_listener_region_add(v, iova, int128_get64(llend), + trace_vhost_vdpa_listener_region_add(s, iova, int128_get64(llend), vaddr, section->readonly); llsize = int128_sub(llend, int128_make64(iova)); - if (v->shadow_data) { + if (s->shadow_data) { int r; mem_region.translated_addr = (hwaddr)(uintptr_t)vaddr, mem_region.size = int128_get64(llsize) - 1, mem_region.perm = IOMMU_ACCESS_FLAG(true, section->readonly), - r = vhost_iova_tree_map_alloc(v->iova_tree, &mem_region); + r = vhost_iova_tree_map_alloc(s->iova_tree, &mem_region); if (unlikely(r != IOVA_OK)) { error_report("Can't allocate a mapping (%d)", r); goto fail; @@ -367,8 +366,8 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, iova = mem_region.iova; } - vhost_vdpa_iotlb_batch_begin_once(v); - ret = vhost_vdpa_dma_map(v, VHOST_VDPA_GUEST_PA_ASID, iova, + vhost_vdpa_iotlb_batch_begin_once(s); + ret = vhost_vdpa_dma_map(s, VHOST_VDPA_GUEST_PA_ASID, iova, int128_get64(llsize), vaddr, section->readonly); if (ret) { error_report("vhost vdpa map fail!"); @@ -378,8 +377,8 @@ static void vhost_vdpa_listener_region_add(MemoryListener *listener, return; fail_map: - if (v->shadow_data) { - vhost_iova_tree_remove(v->iova_tree, mem_region); + if (s->shadow_data) { + vhost_iova_tree_remove(s->iova_tree, mem_region); } fail: @@ -396,15 +395,15 @@ fail: static void vhost_vdpa_listener_region_del(MemoryListener *listener, MemoryRegionSection *section) { - struct vhost_vdpa *v = container_of(listener, struct vhost_vdpa, listener); + VhostVDPAShared *s = container_of(listener, VhostVDPAShared, listener); hwaddr iova; Int128 llend, llsize; int ret; int page_size = qemu_target_page_size(); int page_mask = -page_size; - if (vhost_vdpa_listener_skipped_section(section, v->iova_range.first, - v->iova_range.last, page_mask)) { + if (vhost_vdpa_listener_skipped_section(section, s->iova_range.first, + s->iova_range.last, page_mask)) { return; } if (memory_region_is_iommu(section->mr)) { @@ -413,7 +412,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, if (unlikely((section->offset_within_address_space & ~page_mask) != (section->offset_within_region & ~page_mask))) { - trace_vhost_vdpa_listener_region_del_unaligned(v, section->mr->name, + trace_vhost_vdpa_listener_region_del_unaligned(s, section->mr->name, section->offset_within_address_space & ~page_mask, section->offset_within_region & ~page_mask); return; @@ -422,7 +421,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, iova = ROUND_UP(section->offset_within_address_space, page_size); llend = vhost_vdpa_section_end(section, page_mask); - trace_vhost_vdpa_listener_region_del(v, iova, + trace_vhost_vdpa_listener_region_del(s, iova, int128_get64(int128_sub(llend, int128_one()))); if (int128_ge(int128_make64(iova), llend)) { @@ -431,7 +430,7 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, llsize = int128_sub(llend, int128_make64(iova)); - if (v->shadow_data) { + if (s->shadow_data) { const DMAMap *result; const void *vaddr = memory_region_get_ram_ptr(section->mr) + section->offset_within_region + @@ -441,37 +440,37 @@ static void vhost_vdpa_listener_region_del(MemoryListener *listener, .size = int128_get64(llsize) - 1, }; - result = vhost_iova_tree_find_iova(v->iova_tree, &mem_region); + result = vhost_iova_tree_find_iova(s->iova_tree, &mem_region); if (!result) { /* The memory listener map wasn't mapped */ return; } iova = result->iova; - vhost_iova_tree_remove(v->iova_tree, *result); + vhost_iova_tree_remove(s->iova_tree, *result); } - vhost_vdpa_iotlb_batch_begin_once(v); + vhost_vdpa_iotlb_batch_begin_once(s); /* * The unmap ioctl doesn't accept a full 64-bit. need to check it */ if (int128_eq(llsize, int128_2_64())) { llsize = int128_rshift(llsize, 1); - ret = vhost_vdpa_dma_unmap(v, VHOST_VDPA_GUEST_PA_ASID, iova, + ret = vhost_vdpa_dma_unmap(s, VHOST_VDPA_GUEST_PA_ASID, iova, int128_get64(llsize)); if (ret) { error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", " "0x%" HWADDR_PRIx ") = %d (%m)", - v, iova, int128_get64(llsize), ret); + s, iova, int128_get64(llsize), ret); } iova += int128_get64(llsize); } - ret = vhost_vdpa_dma_unmap(v, VHOST_VDPA_GUEST_PA_ASID, iova, + ret = vhost_vdpa_dma_unmap(s, VHOST_VDPA_GUEST_PA_ASID, iova, int128_get64(llsize)); if (ret) { error_report("vhost_vdpa_dma_unmap(%p, 0x%" HWADDR_PRIx ", " "0x%" HWADDR_PRIx ") = %d (%m)", - v, iova, int128_get64(llsize), ret); + s, iova, int128_get64(llsize), ret); } memory_region_unref(section->mr); @@ -492,7 +491,7 @@ static int vhost_vdpa_call(struct vhost_dev *dev, unsigned long int request, void *arg) { struct vhost_vdpa *v = dev->opaque; - int fd = v->device_fd; + int fd = v->shared->device_fd; int ret; assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); @@ -511,6 +510,10 @@ static int vhost_vdpa_add_status(struct vhost_dev *dev, uint8_t status) if (ret < 0) { return ret; } + if ((s & status) == status) { + /* Don't set bits already set */ + return 0; + } s |= status; @@ -579,16 +582,14 @@ static void vhost_vdpa_init_svq(struct vhost_dev *hdev, struct vhost_vdpa *v) static int vhost_vdpa_init(struct vhost_dev *dev, void *opaque, Error **errp) { - struct vhost_vdpa *v; + struct vhost_vdpa *v = opaque; assert(dev->vhost_ops->backend_type == VHOST_BACKEND_TYPE_VDPA); - trace_vhost_vdpa_init(dev, opaque); + trace_vhost_vdpa_init(dev, v->shared, opaque); int ret; - v = opaque; v->dev = dev; dev->opaque = opaque ; - v->listener = vhost_vdpa_memory_listener; - v->msg_type = VHOST_IOTLB_MSG_V2; + v->shared->listener = vhost_vdpa_memory_listener; vhost_vdpa_init_svq(dev, v); error_propagate(&dev->migration_blocker, v->migration_blocker); @@ -651,7 +652,7 @@ static int vhost_vdpa_host_notifier_init(struct vhost_dev *dev, int queue_index) struct vhost_vdpa *v = dev->opaque; VirtIODevice *vdev = dev->vdev; VhostVDPAHostNotifier *n; - int fd = v->device_fd; + int fd = v->shared->device_fd; void *addr; char *name; @@ -748,10 +749,10 @@ static int vhost_vdpa_cleanup(struct vhost_dev *dev) trace_vhost_vdpa_cleanup(dev, v); if (vhost_vdpa_first_dev(dev)) { ram_block_discard_disable(false); + memory_listener_unregister(&v->shared->listener); } vhost_vdpa_host_notifiers_uninit(dev, dev->nvqs); - memory_listener_unregister(&v->listener); vhost_vdpa_svq_cleanup(dev); dev->opaque = NULL; @@ -828,6 +829,8 @@ static int vhost_vdpa_set_features(struct vhost_dev *dev, static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) { + struct vhost_vdpa *v = dev->opaque; + uint64_t features; uint64_t f = 0x1ULL << VHOST_BACKEND_F_IOTLB_MSG_V2 | 0x1ULL << VHOST_BACKEND_F_IOTLB_BATCH | @@ -849,6 +852,7 @@ static int vhost_vdpa_set_backend_cap(struct vhost_dev *dev) } dev->backend_cap = features; + v->shared->backend_cap = features; return 0; } @@ -1059,7 +1063,8 @@ static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr addr) const DMAMap needle = { .translated_addr = addr, }; - const DMAMap *result = vhost_iova_tree_find_iova(v->iova_tree, &needle); + const DMAMap *result = vhost_iova_tree_find_iova(v->shared->iova_tree, + &needle); hwaddr size; int r; @@ -1069,13 +1074,14 @@ static void vhost_vdpa_svq_unmap_ring(struct vhost_vdpa *v, hwaddr addr) } size = ROUND_UP(result->size, qemu_real_host_page_size()); - r = vhost_vdpa_dma_unmap(v, v->address_space_id, result->iova, size); + r = vhost_vdpa_dma_unmap(v->shared, v->address_space_id, result->iova, + size); if (unlikely(r < 0)) { error_report("Unable to unmap SVQ vring: %s (%d)", g_strerror(-r), -r); return; } - vhost_iova_tree_remove(v->iova_tree, *result); + vhost_iova_tree_remove(v->shared->iova_tree, *result); } static void vhost_vdpa_svq_unmap_rings(struct vhost_dev *dev, @@ -1103,19 +1109,19 @@ static bool vhost_vdpa_svq_map_ring(struct vhost_vdpa *v, DMAMap *needle, { int r; - r = vhost_iova_tree_map_alloc(v->iova_tree, needle); + r = vhost_iova_tree_map_alloc(v->shared->iova_tree, needle); if (unlikely(r != IOVA_OK)) { error_setg(errp, "Cannot allocate iova (%d)", r); return false; } - r = vhost_vdpa_dma_map(v, v->address_space_id, needle->iova, + r = vhost_vdpa_dma_map(v->shared, v->address_space_id, needle->iova, needle->size + 1, (void *)(uintptr_t)needle->translated_addr, needle->perm == IOMMU_RO); if (unlikely(r != 0)) { error_setg_errno(errp, -r, "Cannot map region to device"); - vhost_iova_tree_remove(v->iova_tree, *needle); + vhost_iova_tree_remove(v->shared->iova_tree, *needle); } return r == 0; @@ -1216,7 +1222,7 @@ static bool vhost_vdpa_svqs_start(struct vhost_dev *dev) goto err; } - vhost_svq_start(svq, dev->vdev, vq, v->iova_tree); + vhost_svq_start(svq, dev->vdev, vq, v->shared->iova_tree); ok = vhost_vdpa_svq_map_rings(dev, svq, &addr, &err); if (unlikely(!ok)) { goto err_map; @@ -1279,7 +1285,7 @@ static void vhost_vdpa_suspend(struct vhost_dev *dev) if (dev->backend_cap & BIT_ULL(VHOST_BACKEND_F_SUSPEND)) { trace_vhost_vdpa_suspend(dev); - r = ioctl(v->device_fd, VHOST_VDPA_SUSPEND); + r = ioctl(v->shared->device_fd, VHOST_VDPA_SUSPEND); if (unlikely(r)) { error_report("Cannot suspend: %s(%d)", g_strerror(errno), errno); } else { @@ -1319,7 +1325,7 @@ static int vhost_vdpa_dev_start(struct vhost_dev *dev, bool started) "IOMMU and try again"); return -1; } - memory_listener_register(&v->listener, dev->vdev->dma_as); + memory_listener_register(&v->shared->listener, dev->vdev->dma_as); return vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_DRIVER_OK); } @@ -1338,7 +1344,7 @@ static void vhost_vdpa_reset_status(struct vhost_dev *dev) vhost_vdpa_reset_device(dev); vhost_vdpa_add_status(dev, VIRTIO_CONFIG_S_ACKNOWLEDGE | VIRTIO_CONFIG_S_DRIVER); - memory_listener_unregister(&v->listener); + memory_listener_unregister(&v->shared->listener); } static int vhost_vdpa_set_log_base(struct vhost_dev *dev, uint64_t base, diff --git a/include/hw/virtio/vhost-backend.h b/include/hw/virtio/vhost-backend.h index a86d103f82..70c2e8ffee 100644 --- a/include/hw/virtio/vhost-backend.h +++ b/include/hw/virtio/vhost-backend.h @@ -45,6 +45,8 @@ struct vhost_memory; struct vhost_vring_file; struct vhost_vring_state; struct vhost_vring_addr; +struct vhost_vring_worker; +struct vhost_worker_state; struct vhost_scsi_target; struct vhost_iotlb_msg; struct vhost_virtqueue; @@ -85,6 +87,14 @@ typedef int (*vhost_set_vring_err_op)(struct vhost_dev *dev, struct vhost_vring_file *file); typedef int (*vhost_set_vring_busyloop_timeout_op)(struct vhost_dev *dev, struct vhost_vring_state *r); +typedef int (*vhost_attach_vring_worker_op)(struct vhost_dev *dev, + struct vhost_vring_worker *worker); +typedef int (*vhost_get_vring_worker_op)(struct vhost_dev *dev, + struct vhost_vring_worker *worker); +typedef int (*vhost_new_worker_op)(struct vhost_dev *dev, + struct vhost_worker_state *worker); +typedef int (*vhost_free_worker_op)(struct vhost_dev *dev, + struct vhost_worker_state *worker); typedef int (*vhost_set_features_op)(struct vhost_dev *dev, uint64_t features); typedef int (*vhost_get_features_op)(struct vhost_dev *dev, @@ -172,6 +182,10 @@ typedef struct VhostOps { vhost_set_vring_call_op vhost_set_vring_call; vhost_set_vring_err_op vhost_set_vring_err; vhost_set_vring_busyloop_timeout_op vhost_set_vring_busyloop_timeout; + vhost_new_worker_op vhost_new_worker; + vhost_free_worker_op vhost_free_worker; + vhost_get_vring_worker_op vhost_get_vring_worker; + vhost_attach_vring_worker_op vhost_attach_vring_worker; vhost_set_features_op vhost_set_features; vhost_get_features_op vhost_get_features; vhost_set_backend_cap_op vhost_set_backend_cap; diff --git a/include/hw/virtio/vhost-vdpa.h b/include/hw/virtio/vhost-vdpa.h index 5407d54fd7..8f54e5edd4 100644 --- a/include/hw/virtio/vhost-vdpa.h +++ b/include/hw/virtio/vhost-vdpa.h @@ -30,42 +30,52 @@ typedef struct VhostVDPAHostNotifier { void *addr; } VhostVDPAHostNotifier; -typedef struct vhost_vdpa { +/* Info shared by all vhost_vdpa device models */ +typedef struct vhost_vdpa_shared { int device_fd; - int index; - uint32_t msg_type; - bool iotlb_batch_begin_sent; - uint32_t address_space_id; MemoryListener listener; struct vhost_vdpa_iova_range iova_range; - uint64_t acked_features; - bool shadow_vqs_enabled; - /* Vdpa must send shadow addresses as IOTLB key for data queues, not GPA */ - bool shadow_data; - /* Device suspended successfully */ - bool suspended; + QLIST_HEAD(, vdpa_iommu) iommu_list; + /* IOVA mapping used by the Shadow Virtqueue */ VhostIOVATree *iova_tree; + + /* Copy of backend features */ + uint64_t backend_cap; + + bool iotlb_batch_begin_sent; + + /* Vdpa must send shadow addresses as IOTLB key for data queues, not GPA */ + bool shadow_data; +} VhostVDPAShared; + +typedef struct vhost_vdpa { + int index; + uint32_t address_space_id; + uint64_t acked_features; + bool shadow_vqs_enabled; + /* Device suspended successfully */ + bool suspended; + VhostVDPAShared *shared; GPtrArray *shadow_vqs; const VhostShadowVirtqueueOps *shadow_vq_ops; void *shadow_vq_ops_opaque; struct vhost_dev *dev; Error *migration_blocker; VhostVDPAHostNotifier notifier[VIRTIO_QUEUE_MAX]; - QLIST_HEAD(, vdpa_iommu) iommu_list; IOMMUNotifier n; } VhostVDPA; int vhost_vdpa_get_iova_range(int fd, struct vhost_vdpa_iova_range *iova_range); int vhost_vdpa_set_vring_ready(struct vhost_vdpa *v, unsigned idx); -int vhost_vdpa_dma_map(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, +int vhost_vdpa_dma_map(VhostVDPAShared *s, uint32_t asid, hwaddr iova, hwaddr size, void *vaddr, bool readonly); -int vhost_vdpa_dma_unmap(struct vhost_vdpa *v, uint32_t asid, hwaddr iova, +int vhost_vdpa_dma_unmap(VhostVDPAShared *s, uint32_t asid, hwaddr iova, hwaddr size); typedef struct vdpa_iommu { - struct vhost_vdpa *dev; + VhostVDPAShared *dev_shared; IOMMUMemoryRegion *iommu_mr; hwaddr iommu_offset; IOMMUNotifier n; diff --git a/include/hw/virtio/virtio-scsi.h b/include/hw/virtio/virtio-scsi.h index 7f0573b1bf..7be0105918 100644 --- a/include/hw/virtio/virtio-scsi.h +++ b/include/hw/virtio/virtio-scsi.h @@ -51,6 +51,7 @@ typedef struct virtio_scsi_config VirtIOSCSIConfig; struct VirtIOSCSIConf { uint32_t num_queues; uint32_t virtqueue_size; + bool worker_per_virtqueue; bool seg_max_adjust; uint32_t max_sectors; uint32_t cmd_per_lun; diff --git a/net/vhost-vdpa.c b/net/vhost-vdpa.c index d0614d7954..3726ee5d67 100644 --- a/net/vhost-vdpa.c +++ b/net/vhost-vdpa.c @@ -236,10 +236,11 @@ static void vhost_vdpa_cleanup(NetClientState *nc) g_free(s->vhost_net); s->vhost_net = NULL; } - if (s->vhost_vdpa.device_fd >= 0) { - qemu_close(s->vhost_vdpa.device_fd); - s->vhost_vdpa.device_fd = -1; + if (s->vhost_vdpa.index != 0) { + return; } + qemu_close(s->vhost_vdpa.shared->device_fd); + g_free(s->vhost_vdpa.shared); } /** Dummy SetSteeringEBPF to support RSS for vhost-vdpa backend */ @@ -286,15 +287,6 @@ static ssize_t vhost_vdpa_receive(NetClientState *nc, const uint8_t *buf, return size; } -/** From any vdpa net client, get the netclient of the first queue pair */ -static VhostVDPAState *vhost_vdpa_net_first_nc_vdpa(VhostVDPAState *s) -{ - NICState *nic = qemu_get_nic(s->nc.peer); - NetClientState *nc0 = qemu_get_peer(nic->ncs, 0); - - return DO_UPCAST(VhostVDPAState, nc, nc0); -} - static void vhost_vdpa_net_log_global_enable(VhostVDPAState *s, bool enable) { struct vhost_vdpa *v = &s->vhost_vdpa; @@ -350,8 +342,8 @@ static void vhost_vdpa_net_data_start_first(VhostVDPAState *s) migration_add_notifier(&s->migration_state, vdpa_net_migration_state_notifier); if (v->shadow_vqs_enabled) { - v->iova_tree = vhost_iova_tree_new(v->iova_range.first, - v->iova_range.last); + v->shared->iova_tree = vhost_iova_tree_new(v->shared->iova_range.first, + v->shared->iova_range.last); } } @@ -365,22 +357,16 @@ static int vhost_vdpa_net_data_start(NetClientState *nc) if (s->always_svq || migration_is_setup_or_active(migrate_get_current()->state)) { v->shadow_vqs_enabled = true; - v->shadow_data = true; } else { v->shadow_vqs_enabled = false; - v->shadow_data = false; } if (v->index == 0) { + v->shared->shadow_data = v->shadow_vqs_enabled; vhost_vdpa_net_data_start_first(s); return 0; } - if (v->shadow_vqs_enabled) { - VhostVDPAState *s0 = vhost_vdpa_net_first_nc_vdpa(s); - v->iova_tree = s0->vhost_vdpa.iova_tree; - } - return 0; } @@ -413,9 +399,8 @@ static void vhost_vdpa_net_client_stop(NetClientState *nc) dev = s->vhost_vdpa.dev; if (dev->vq_index + dev->nvqs == dev->vq_index_end) { - g_clear_pointer(&s->vhost_vdpa.iova_tree, vhost_iova_tree_delete); - } else { - s->vhost_vdpa.iova_tree = NULL; + g_clear_pointer(&s->vhost_vdpa.shared->iova_tree, + vhost_iova_tree_delete); } } @@ -460,7 +445,7 @@ static int vhost_vdpa_set_address_space_id(struct vhost_vdpa *v, }; int r; - r = ioctl(v->device_fd, VHOST_VDPA_SET_GROUP_ASID, &asid); + r = ioctl(v->shared->device_fd, VHOST_VDPA_SET_GROUP_ASID, &asid); if (unlikely(r < 0)) { error_report("Can't set vq group %u asid %u, errno=%d (%s)", asid.index, asid.num, errno, g_strerror(errno)); @@ -470,7 +455,7 @@ static int vhost_vdpa_set_address_space_id(struct vhost_vdpa *v, static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) { - VhostIOVATree *tree = v->iova_tree; + VhostIOVATree *tree = v->shared->iova_tree; DMAMap needle = { /* * No need to specify size or to look for more translations since @@ -486,7 +471,8 @@ static void vhost_vdpa_cvq_unmap_buf(struct vhost_vdpa *v, void *addr) return; } - r = vhost_vdpa_dma_unmap(v, v->address_space_id, map->iova, map->size + 1); + r = vhost_vdpa_dma_unmap(v->shared, v->address_space_id, map->iova, + map->size + 1); if (unlikely(r != 0)) { error_report("Device cannot unmap: %s(%d)", g_strerror(r), r); } @@ -504,13 +490,13 @@ static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size, map.translated_addr = (hwaddr)(uintptr_t)buf; map.size = size - 1; map.perm = write ? IOMMU_RW : IOMMU_RO, - r = vhost_iova_tree_map_alloc(v->iova_tree, &map); + r = vhost_iova_tree_map_alloc(v->shared->iova_tree, &map); if (unlikely(r != IOVA_OK)) { error_report("Cannot map injected element"); return r; } - r = vhost_vdpa_dma_map(v, v->address_space_id, map.iova, + r = vhost_vdpa_dma_map(v->shared, v->address_space_id, map.iova, vhost_vdpa_net_cvq_cmd_page_len(), buf, !write); if (unlikely(r < 0)) { goto dma_map_err; @@ -519,13 +505,13 @@ static int vhost_vdpa_cvq_map_buf(struct vhost_vdpa *v, void *buf, size_t size, return 0; dma_map_err: - vhost_iova_tree_remove(v->iova_tree, map); + vhost_iova_tree_remove(v->shared->iova_tree, map); return r; } static int vhost_vdpa_net_cvq_start(NetClientState *nc) { - VhostVDPAState *s, *s0; + VhostVDPAState *s; struct vhost_vdpa *v; int64_t cvq_group; int r; @@ -536,12 +522,10 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) s = DO_UPCAST(VhostVDPAState, nc, nc); v = &s->vhost_vdpa; - s0 = vhost_vdpa_net_first_nc_vdpa(s); - v->shadow_data = s0->vhost_vdpa.shadow_vqs_enabled; - v->shadow_vqs_enabled = s0->vhost_vdpa.shadow_vqs_enabled; + v->shadow_vqs_enabled = v->shared->shadow_data; s->vhost_vdpa.address_space_id = VHOST_VDPA_GUEST_PA_ASID; - if (s->vhost_vdpa.shadow_data) { + if (v->shared->shadow_data) { /* SVQ is already configured for all virtqueues */ goto out; } @@ -558,7 +542,7 @@ static int vhost_vdpa_net_cvq_start(NetClientState *nc) return 0; } - cvq_group = vhost_vdpa_get_vring_group(v->device_fd, + cvq_group = vhost_vdpa_get_vring_group(v->shared->device_fd, v->dev->vq_index_end - 1, &err); if (unlikely(cvq_group < 0)) { @@ -579,24 +563,22 @@ out: return 0; } - if (s0->vhost_vdpa.iova_tree) { - /* - * SVQ is already configured for all virtqueues. Reuse IOVA tree for - * simplicity, whether CVQ shares ASID with guest or not, because: - * - Memory listener need access to guest's memory addresses allocated - * in the IOVA tree. - * - There should be plenty of IOVA address space for both ASID not to - * worry about collisions between them. Guest's translations are - * still validated with virtio virtqueue_pop so there is no risk for - * the guest to access memory that it shouldn't. - * - * To allocate a iova tree per ASID is doable but it complicates the - * code and it is not worth it for the moment. - */ - v->iova_tree = s0->vhost_vdpa.iova_tree; - } else { - v->iova_tree = vhost_iova_tree_new(v->iova_range.first, - v->iova_range.last); + /* + * If other vhost_vdpa already have an iova_tree, reuse it for simplicity, + * whether CVQ shares ASID with guest or not, because: + * - Memory listener need access to guest's memory addresses allocated in + * the IOVA tree. + * - There should be plenty of IOVA address space for both ASID not to + * worry about collisions between them. Guest's translations are still + * validated with virtio virtqueue_pop so there is no risk for the guest + * to access memory that it shouldn't. + * + * To allocate a iova tree per ASID is doable but it complicates the code + * and it is not worth it for the moment. + */ + if (!v->shared->iova_tree) { + v->shared->iova_tree = vhost_iova_tree_new(v->shared->iova_range.first, + v->shared->iova_range.last); } r = vhost_vdpa_cvq_map_buf(&s->vhost_vdpa, s->cvq_cmd_out_buffer, @@ -1661,6 +1643,7 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, bool svq, struct vhost_vdpa_iova_range iova_range, uint64_t features, + VhostVDPAShared *shared, Error **errp) { NetClientState *nc = NULL; @@ -1686,16 +1669,17 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, qemu_set_info_str(nc, TYPE_VHOST_VDPA); s = DO_UPCAST(VhostVDPAState, nc, nc); - s->vhost_vdpa.device_fd = vdpa_device_fd; s->vhost_vdpa.index = queue_pair_index; s->always_svq = svq; s->migration_state.notify = NULL; s->vhost_vdpa.shadow_vqs_enabled = svq; - s->vhost_vdpa.iova_range = iova_range; - s->vhost_vdpa.shadow_data = svq; if (queue_pair_index == 0) { vhost_vdpa_net_valid_svq_features(features, &s->vhost_vdpa.migration_blocker); + s->vhost_vdpa.shared = g_new0(VhostVDPAShared, 1); + s->vhost_vdpa.shared->device_fd = vdpa_device_fd; + s->vhost_vdpa.shared->iova_range = iova_range; + s->vhost_vdpa.shared->shadow_data = svq; } else if (!is_datapath) { s->cvq_cmd_out_buffer = mmap(NULL, vhost_vdpa_net_cvq_cmd_page_len(), PROT_READ | PROT_WRITE, @@ -1708,11 +1692,16 @@ static NetClientState *net_vhost_vdpa_init(NetClientState *peer, s->vhost_vdpa.shadow_vq_ops_opaque = s; s->cvq_isolated = cvq_isolated; } + if (queue_pair_index != 0) { + s->vhost_vdpa.shared = shared; + } + ret = vhost_vdpa_add(nc, (void *)&s->vhost_vdpa, queue_pair_index, nvqs); if (ret) { qemu_del_net_client(nc); return NULL; } + return nc; } @@ -1824,17 +1813,26 @@ int net_init_vhost_vdpa(const Netdev *netdev, const char *name, ncs = g_malloc0(sizeof(*ncs) * queue_pairs); for (i = 0; i < queue_pairs; i++) { + VhostVDPAShared *shared = NULL; + + if (i) { + shared = DO_UPCAST(VhostVDPAState, nc, ncs[0])->vhost_vdpa.shared; + } ncs[i] = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, vdpa_device_fd, i, 2, true, opts->x_svq, - iova_range, features, errp); + iova_range, features, shared, errp); if (!ncs[i]) goto err; } if (has_cvq) { + VhostVDPAState *s0 = DO_UPCAST(VhostVDPAState, nc, ncs[0]); + VhostVDPAShared *shared = s0->vhost_vdpa.shared; + nc = net_vhost_vdpa_init(peer, TYPE_VHOST_VDPA, name, vdpa_device_fd, i, 1, false, - opts->x_svq, iova_range, features, errp); + opts->x_svq, iova_range, features, shared, + errp); if (!nc) goto err; } diff --git a/tests/qtest/bios-tables-test.c b/tests/qtest/bios-tables-test.c index fe6a9a8563..21811a1ab5 100644 --- a/tests/qtest/bios-tables-test.c +++ b/tests/qtest/bios-tables-test.c @@ -1015,7 +1015,7 @@ static void test_acpi_q35_tcg(void) free_test_data(&data); } -static void test_acpi_q35_tcg_type4_count(void) +static void test_acpi_q35_kvm_type4_count(void) { test_data data = { .machine = MACHINE_Q35, @@ -1031,7 +1031,7 @@ static void test_acpi_q35_tcg_type4_count(void) free_test_data(&data); } -static void test_acpi_q35_tcg_core_count(void) +static void test_acpi_q35_kvm_core_count(void) { test_data data = { .machine = MACHINE_Q35, @@ -1048,7 +1048,7 @@ static void test_acpi_q35_tcg_core_count(void) free_test_data(&data); } -static void test_acpi_q35_tcg_core_count2(void) +static void test_acpi_q35_kvm_core_count2(void) { test_data data = { .machine = MACHINE_Q35, @@ -1065,7 +1065,7 @@ static void test_acpi_q35_tcg_core_count2(void) free_test_data(&data); } -static void test_acpi_q35_tcg_thread_count(void) +static void test_acpi_q35_kvm_thread_count(void) { test_data data = { .machine = MACHINE_Q35, @@ -1082,7 +1082,7 @@ static void test_acpi_q35_tcg_thread_count(void) free_test_data(&data); } -static void test_acpi_q35_tcg_thread_count2(void) +static void test_acpi_q35_kvm_thread_count2(void) { test_data data = { .machine = MACHINE_Q35, @@ -2262,15 +2262,15 @@ int main(int argc, char *argv[]) qtest_add_func("acpi/q35/kvm/xapic", test_acpi_q35_kvm_xapic); qtest_add_func("acpi/q35/kvm/dmar", test_acpi_q35_kvm_dmar); qtest_add_func("acpi/q35/type4-count", - test_acpi_q35_tcg_type4_count); + test_acpi_q35_kvm_type4_count); qtest_add_func("acpi/q35/core-count", - test_acpi_q35_tcg_core_count); + test_acpi_q35_kvm_core_count); qtest_add_func("acpi/q35/core-count2", - test_acpi_q35_tcg_core_count2); + test_acpi_q35_kvm_core_count2); qtest_add_func("acpi/q35/thread-count", - test_acpi_q35_tcg_thread_count); + test_acpi_q35_kvm_thread_count); qtest_add_func("acpi/q35/thread-count2", - test_acpi_q35_tcg_thread_count2); + test_acpi_q35_kvm_thread_count2); } if (qtest_has_device("virtio-iommu-pci")) { qtest_add_func("acpi/q35/viot", test_acpi_q35_viot);