diff --git a/hw/virtio/trace-events b/hw/virtio/trace-events index f7141aa2f6..15595f8cd7 100644 --- a/hw/virtio/trace-events +++ b/hw/virtio/trace-events @@ -64,3 +64,9 @@ virtio_iommu_attach(uint32_t domain_id, uint32_t ep_id) "domain=%d endpoint=%d" virtio_iommu_detach(uint32_t domain_id, uint32_t ep_id) "domain=%d endpoint=%d" virtio_iommu_map(uint32_t domain_id, uint64_t virt_start, uint64_t virt_end, uint64_t phys_start, uint32_t flags) "domain=%d virt_start=0x%"PRIx64" virt_end=0x%"PRIx64 " phys_start=0x%"PRIx64" flags=%d" virtio_iommu_unmap(uint32_t domain_id, uint64_t virt_start, uint64_t virt_end) "domain=%d virt_start=0x%"PRIx64" virt_end=0x%"PRIx64 +virtio_iommu_translate(const char *name, uint32_t rid, uint64_t iova, int flag) "mr=%s rid=%d addr=0x%"PRIx64" flag=%d" +virtio_iommu_init_iommu_mr(char *iommu_mr) "init %s" +virtio_iommu_get_endpoint(uint32_t ep_id) "Alloc endpoint=%d" +virtio_iommu_put_endpoint(uint32_t ep_id) "Free endpoint=%d" +virtio_iommu_get_domain(uint32_t domain_id) "Alloc domain=%d" +virtio_iommu_put_domain(uint32_t domain_id) "Free domain=%d" diff --git a/hw/virtio/virtio-iommu.c b/hw/virtio/virtio-iommu.c index 86dcdc09a1..d9fe83f530 100644 --- a/hw/virtio/virtio-iommu.c +++ b/hw/virtio/virtio-iommu.c @@ -23,6 +23,8 @@ #include "hw/qdev-properties.h" #include "hw/virtio/virtio.h" #include "sysemu/kvm.h" +#include "qapi/error.h" +#include "qemu/error-report.h" #include "trace.h" #include "standard-headers/linux/virtio_ids.h" @@ -30,19 +32,236 @@ #include "hw/virtio/virtio-bus.h" #include "hw/virtio/virtio-access.h" #include "hw/virtio/virtio-iommu.h" +#include "hw/pci/pci_bus.h" +#include "hw/pci/pci.h" /* Max size */ #define VIOMMU_DEFAULT_QUEUE_SIZE 256 +typedef struct VirtIOIOMMUDomain { + uint32_t id; + GTree *mappings; + QLIST_HEAD(, VirtIOIOMMUEndpoint) endpoint_list; +} VirtIOIOMMUDomain; + +typedef struct VirtIOIOMMUEndpoint { + uint32_t id; + VirtIOIOMMUDomain *domain; + QLIST_ENTRY(VirtIOIOMMUEndpoint) next; +} VirtIOIOMMUEndpoint; + +typedef struct VirtIOIOMMUInterval { + uint64_t low; + uint64_t high; +} VirtIOIOMMUInterval; + +static inline uint16_t virtio_iommu_get_bdf(IOMMUDevice *dev) +{ + return PCI_BUILD_BDF(pci_bus_num(dev->bus), dev->devfn); +} + +/** + * The bus number is used for lookup when SID based operations occur. + * In that case we lazily populate the IOMMUPciBus array from the bus hash + * table. At the time the IOMMUPciBus is created (iommu_find_add_as), the bus + * numbers may not be always initialized yet. + */ +static IOMMUPciBus *iommu_find_iommu_pcibus(VirtIOIOMMU *s, uint8_t bus_num) +{ + IOMMUPciBus *iommu_pci_bus = s->iommu_pcibus_by_bus_num[bus_num]; + + if (!iommu_pci_bus) { + GHashTableIter iter; + + g_hash_table_iter_init(&iter, s->as_by_busptr); + while (g_hash_table_iter_next(&iter, NULL, (void **)&iommu_pci_bus)) { + if (pci_bus_num(iommu_pci_bus->bus) == bus_num) { + s->iommu_pcibus_by_bus_num[bus_num] = iommu_pci_bus; + return iommu_pci_bus; + } + } + return NULL; + } + return iommu_pci_bus; +} + +static IOMMUMemoryRegion *virtio_iommu_mr(VirtIOIOMMU *s, uint32_t sid) +{ + uint8_t bus_n, devfn; + IOMMUPciBus *iommu_pci_bus; + IOMMUDevice *dev; + + bus_n = PCI_BUS_NUM(sid); + iommu_pci_bus = iommu_find_iommu_pcibus(s, bus_n); + if (iommu_pci_bus) { + devfn = sid & PCI_DEVFN_MAX; + dev = iommu_pci_bus->pbdev[devfn]; + if (dev) { + return &dev->iommu_mr; + } + } + return NULL; +} + +static gint interval_cmp(gconstpointer a, gconstpointer b, gpointer user_data) +{ + VirtIOIOMMUInterval *inta = (VirtIOIOMMUInterval *)a; + VirtIOIOMMUInterval *intb = (VirtIOIOMMUInterval *)b; + + if (inta->high < intb->low) { + return -1; + } else if (intb->high < inta->low) { + return 1; + } else { + return 0; + } +} + +static void virtio_iommu_detach_endpoint_from_domain(VirtIOIOMMUEndpoint *ep) +{ + if (!ep->domain) { + return; + } + QLIST_REMOVE(ep, next); + ep->domain = NULL; +} + +static VirtIOIOMMUEndpoint *virtio_iommu_get_endpoint(VirtIOIOMMU *s, + uint32_t ep_id) +{ + VirtIOIOMMUEndpoint *ep; + + ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id)); + if (ep) { + return ep; + } + if (!virtio_iommu_mr(s, ep_id)) { + return NULL; + } + ep = g_malloc0(sizeof(*ep)); + ep->id = ep_id; + trace_virtio_iommu_get_endpoint(ep_id); + g_tree_insert(s->endpoints, GUINT_TO_POINTER(ep_id), ep); + return ep; +} + +static void virtio_iommu_put_endpoint(gpointer data) +{ + VirtIOIOMMUEndpoint *ep = (VirtIOIOMMUEndpoint *)data; + + if (ep->domain) { + virtio_iommu_detach_endpoint_from_domain(ep); + } + + trace_virtio_iommu_put_endpoint(ep->id); + g_free(ep); +} + +static VirtIOIOMMUDomain *virtio_iommu_get_domain(VirtIOIOMMU *s, + uint32_t domain_id) +{ + VirtIOIOMMUDomain *domain; + + domain = g_tree_lookup(s->domains, GUINT_TO_POINTER(domain_id)); + if (domain) { + return domain; + } + domain = g_malloc0(sizeof(*domain)); + domain->id = domain_id; + domain->mappings = g_tree_new_full((GCompareDataFunc)interval_cmp, + NULL, (GDestroyNotify)g_free, + (GDestroyNotify)g_free); + g_tree_insert(s->domains, GUINT_TO_POINTER(domain_id), domain); + QLIST_INIT(&domain->endpoint_list); + trace_virtio_iommu_get_domain(domain_id); + return domain; +} + +static void virtio_iommu_put_domain(gpointer data) +{ + VirtIOIOMMUDomain *domain = (VirtIOIOMMUDomain *)data; + VirtIOIOMMUEndpoint *iter, *tmp; + + QLIST_FOREACH_SAFE(iter, &domain->endpoint_list, next, tmp) { + virtio_iommu_detach_endpoint_from_domain(iter); + } + g_tree_destroy(domain->mappings); + trace_virtio_iommu_put_domain(domain->id); + g_free(domain); +} + +static AddressSpace *virtio_iommu_find_add_as(PCIBus *bus, void *opaque, + int devfn) +{ + VirtIOIOMMU *s = opaque; + IOMMUPciBus *sbus = g_hash_table_lookup(s->as_by_busptr, bus); + static uint32_t mr_index; + IOMMUDevice *sdev; + + if (!sbus) { + sbus = g_malloc0(sizeof(IOMMUPciBus) + + sizeof(IOMMUDevice *) * PCI_DEVFN_MAX); + sbus->bus = bus; + g_hash_table_insert(s->as_by_busptr, bus, sbus); + } + + sdev = sbus->pbdev[devfn]; + if (!sdev) { + char *name = g_strdup_printf("%s-%d-%d", + TYPE_VIRTIO_IOMMU_MEMORY_REGION, + mr_index++, devfn); + sdev = sbus->pbdev[devfn] = g_malloc0(sizeof(IOMMUDevice)); + + sdev->viommu = s; + sdev->bus = bus; + sdev->devfn = devfn; + + trace_virtio_iommu_init_iommu_mr(name); + + memory_region_init_iommu(&sdev->iommu_mr, sizeof(sdev->iommu_mr), + TYPE_VIRTIO_IOMMU_MEMORY_REGION, + OBJECT(s), name, + UINT64_MAX); + address_space_init(&sdev->as, + MEMORY_REGION(&sdev->iommu_mr), TYPE_VIRTIO_IOMMU); + g_free(name); + } + return &sdev->as; +} + static int virtio_iommu_attach(VirtIOIOMMU *s, struct virtio_iommu_req_attach *req) { uint32_t domain_id = le32_to_cpu(req->domain); uint32_t ep_id = le32_to_cpu(req->endpoint); + VirtIOIOMMUDomain *domain; + VirtIOIOMMUEndpoint *ep; trace_virtio_iommu_attach(domain_id, ep_id); - return VIRTIO_IOMMU_S_UNSUPP; + ep = virtio_iommu_get_endpoint(s, ep_id); + if (!ep) { + return VIRTIO_IOMMU_S_NOENT; + } + + if (ep->domain) { + VirtIOIOMMUDomain *previous_domain = ep->domain; + /* + * the device is already attached to a domain, + * detach it first + */ + virtio_iommu_detach_endpoint_from_domain(ep); + if (QLIST_EMPTY(&previous_domain->endpoint_list)) { + g_tree_remove(s->domains, GUINT_TO_POINTER(previous_domain->id)); + } + } + + domain = virtio_iommu_get_domain(s, domain_id); + QLIST_INSERT_HEAD(&domain->endpoint_list, ep, next); + + ep->domain = domain; + + return VIRTIO_IOMMU_S_OK; } static int virtio_iommu_detach(VirtIOIOMMU *s, @@ -50,10 +269,28 @@ static int virtio_iommu_detach(VirtIOIOMMU *s, { uint32_t domain_id = le32_to_cpu(req->domain); uint32_t ep_id = le32_to_cpu(req->endpoint); + VirtIOIOMMUDomain *domain; + VirtIOIOMMUEndpoint *ep; trace_virtio_iommu_detach(domain_id, ep_id); - return VIRTIO_IOMMU_S_UNSUPP; + ep = g_tree_lookup(s->endpoints, GUINT_TO_POINTER(ep_id)); + if (!ep) { + return VIRTIO_IOMMU_S_NOENT; + } + + domain = ep->domain; + + if (!domain || domain->id != domain_id) { + return VIRTIO_IOMMU_S_INVAL; + } + + virtio_iommu_detach_endpoint_from_domain(ep); + + if (QLIST_EMPTY(&domain->endpoint_list)) { + g_tree_remove(s->domains, GUINT_TO_POINTER(domain->id)); + } + return VIRTIO_IOMMU_S_OK; } static int virtio_iommu_map(VirtIOIOMMU *s, @@ -172,6 +409,27 @@ out: } } +static IOMMUTLBEntry virtio_iommu_translate(IOMMUMemoryRegion *mr, hwaddr addr, + IOMMUAccessFlags flag, + int iommu_idx) +{ + IOMMUDevice *sdev = container_of(mr, IOMMUDevice, iommu_mr); + uint32_t sid; + + IOMMUTLBEntry entry = { + .target_as = &address_space_memory, + .iova = addr, + .translated_addr = addr, + .addr_mask = ~(hwaddr)0, + .perm = IOMMU_NONE, + }; + + sid = virtio_iommu_get_bdf(sdev); + + trace_virtio_iommu_translate(mr->parent_obj.name, sid, addr, flag); + return entry; +} + static void virtio_iommu_get_config(VirtIODevice *vdev, uint8_t *config_data) { VirtIOIOMMU *dev = VIRTIO_IOMMU(vdev); @@ -218,6 +476,13 @@ static const VMStateDescription vmstate_virtio_iommu_device = { .unmigratable = 1, }; +static gint int_cmp(gconstpointer a, gconstpointer b, gpointer user_data) +{ + guint ua = GPOINTER_TO_UINT(a); + guint ub = GPOINTER_TO_UINT(b); + return (ua > ub) - (ua < ub); +} + static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); @@ -226,6 +491,8 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) virtio_init(vdev, "virtio-iommu", VIRTIO_ID_IOMMU, sizeof(struct virtio_iommu_config)); + memset(s->iommu_pcibus_by_bus_num, 0, sizeof(s->iommu_pcibus_by_bus_num)); + s->req_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, virtio_iommu_handle_command); s->event_vq = virtio_add_queue(vdev, VIOMMU_DEFAULT_QUEUE_SIZE, NULL); @@ -244,18 +511,43 @@ static void virtio_iommu_device_realize(DeviceState *dev, Error **errp) virtio_add_feature(&s->features, VIRTIO_IOMMU_F_MMIO); qemu_mutex_init(&s->mutex); + + s->as_by_busptr = g_hash_table_new_full(NULL, NULL, NULL, g_free); + + if (s->primary_bus) { + pci_setup_iommu(s->primary_bus, virtio_iommu_find_add_as, s); + } else { + error_setg(errp, "VIRTIO-IOMMU is not attached to any PCI bus!"); + } } static void virtio_iommu_device_unrealize(DeviceState *dev, Error **errp) { VirtIODevice *vdev = VIRTIO_DEVICE(dev); + VirtIOIOMMU *s = VIRTIO_IOMMU(dev); + + g_tree_destroy(s->domains); + g_tree_destroy(s->endpoints); virtio_cleanup(vdev); } static void virtio_iommu_device_reset(VirtIODevice *vdev) { + VirtIOIOMMU *s = VIRTIO_IOMMU(vdev); + trace_virtio_iommu_device_reset(); + + if (s->domains) { + g_tree_destroy(s->domains); + } + if (s->endpoints) { + g_tree_destroy(s->endpoints); + } + s->domains = g_tree_new_full((GCompareDataFunc)int_cmp, + NULL, NULL, virtio_iommu_put_domain); + s->endpoints = g_tree_new_full((GCompareDataFunc)int_cmp, + NULL, NULL, virtio_iommu_put_endpoint); } static void virtio_iommu_set_status(VirtIODevice *vdev, uint8_t status) @@ -301,6 +593,14 @@ static void virtio_iommu_class_init(ObjectClass *klass, void *data) vdc->vmsd = &vmstate_virtio_iommu_device; } +static void virtio_iommu_memory_region_class_init(ObjectClass *klass, + void *data) +{ + IOMMUMemoryRegionClass *imrc = IOMMU_MEMORY_REGION_CLASS(klass); + + imrc->translate = virtio_iommu_translate; +} + static const TypeInfo virtio_iommu_info = { .name = TYPE_VIRTIO_IOMMU, .parent = TYPE_VIRTIO_DEVICE, @@ -309,9 +609,16 @@ static const TypeInfo virtio_iommu_info = { .class_init = virtio_iommu_class_init, }; +static const TypeInfo virtio_iommu_memory_region_info = { + .parent = TYPE_IOMMU_MEMORY_REGION, + .name = TYPE_VIRTIO_IOMMU_MEMORY_REGION, + .class_init = virtio_iommu_memory_region_class_init, +}; + static void virtio_register_types(void) { type_register_static(&virtio_iommu_info); + type_register_static(&virtio_iommu_memory_region_info); } type_init(virtio_register_types) diff --git a/include/hw/virtio/virtio-iommu.h b/include/hw/virtio/virtio-iommu.h index d24ba63305..ae88f730cf 100644 --- a/include/hw/virtio/virtio-iommu.h +++ b/include/hw/virtio/virtio-iommu.h @@ -28,6 +28,8 @@ #define VIRTIO_IOMMU(obj) \ OBJECT_CHECK(VirtIOIOMMU, (obj), TYPE_VIRTIO_IOMMU) +#define TYPE_VIRTIO_IOMMU_MEMORY_REGION "virtio-iommu-memory-region" + typedef struct IOMMUDevice { void *viommu; PCIBus *bus; @@ -48,6 +50,7 @@ typedef struct VirtIOIOMMU { struct virtio_iommu_config config; uint64_t features; GHashTable *as_by_busptr; + IOMMUPciBus *iommu_pcibus_by_bus_num[PCI_BUS_MAX]; PCIBus *primary_bus; GTree *domains; QemuMutex mutex;