From 08cf3dc611991e9697f62458107e13f2c582869a Mon Sep 17 00:00:00 2001 From: Jagannathan Raman Date: Mon, 13 Jun 2022 16:26:33 -0400 Subject: [PATCH] vfio-user: handle device interrupts Forward remote device's interrupts to the guest Signed-off-by: Elena Ufimtseva Signed-off-by: John G Johnson Signed-off-by: Jagannathan Raman Message-id: 9523479eaafe050677f4de2af5dd0df18c27cfd9.1655151679.git.jag.raman@oracle.com Signed-off-by: Stefan Hajnoczi --- MAINTAINERS | 1 + hw/pci/msi.c | 49 +++++++-- hw/pci/msix.c | 35 ++++++- hw/pci/pci.c | 13 +++ hw/remote/machine.c | 16 ++- hw/remote/trace-events | 1 + hw/remote/vfio-user-obj.c | 167 ++++++++++++++++++++++++++++++ include/hw/pci/msi.h | 1 + include/hw/pci/msix.h | 1 + include/hw/pci/pci.h | 13 +++ include/hw/remote/vfio-user-obj.h | 6 ++ stubs/meson.build | 1 + stubs/vfio-user-obj.c | 6 ++ 13 files changed, 298 insertions(+), 12 deletions(-) create mode 100644 include/hw/remote/vfio-user-obj.h create mode 100644 stubs/vfio-user-obj.c diff --git a/MAINTAINERS b/MAINTAINERS index 563259101b..aaa649a50d 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3644,6 +3644,7 @@ F: hw/remote/iohub.c F: include/hw/remote/iohub.h F: subprojects/libvfio-user F: hw/remote/vfio-user-obj.c +F: include/hw/remote/vfio-user-obj.h F: hw/remote/iommu.c F: include/hw/remote/iommu.h diff --git a/hw/pci/msi.c b/hw/pci/msi.c index 47d2b0f33c..5c471b9616 100644 --- a/hw/pci/msi.c +++ b/hw/pci/msi.c @@ -134,7 +134,7 @@ void msi_set_message(PCIDevice *dev, MSIMessage msg) pci_set_word(dev->config + msi_data_off(dev, msi64bit), msg.data); } -MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector) +static MSIMessage msi_prepare_message(PCIDevice *dev, unsigned int vector) { uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev)); bool msi64bit = flags & PCI_MSI_FLAGS_64BIT; @@ -159,6 +159,11 @@ MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector) return msg; } +MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector) +{ + return dev->msi_prepare_message(dev, vector); +} + bool msi_enabled(const PCIDevice *dev) { return msi_present(dev) && @@ -241,6 +246,8 @@ int msi_init(struct PCIDevice *dev, uint8_t offset, 0xffffffff >> (PCI_MSI_VECTORS_MAX - nr_vectors)); } + dev->msi_prepare_message = msi_prepare_message; + return 0; } @@ -256,6 +263,7 @@ void msi_uninit(struct PCIDevice *dev) cap_size = msi_cap_sizeof(flags); pci_del_capability(dev, PCI_CAP_ID_MSI, cap_size); dev->cap_present &= ~QEMU_PCI_CAP_MSI; + dev->msi_prepare_message = NULL; MSI_DEV_PRINTF(dev, "uninit\n"); } @@ -307,6 +315,39 @@ bool msi_is_masked(const PCIDevice *dev, unsigned int vector) return mask & (1U << vector); } +void msi_set_mask(PCIDevice *dev, int vector, bool mask, Error **errp) +{ + ERRP_GUARD(); + uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev)); + bool msi64bit = flags & PCI_MSI_FLAGS_64BIT; + uint32_t irq_state, vector_mask, pending; + + if (vector > PCI_MSI_VECTORS_MAX) { + error_setg(errp, "msi: vector %d not allocated. max vector is %d", + vector, PCI_MSI_VECTORS_MAX); + return; + } + + vector_mask = (1U << vector); + + irq_state = pci_get_long(dev->config + msi_mask_off(dev, msi64bit)); + + if (mask) { + irq_state |= vector_mask; + } else { + irq_state &= ~vector_mask; + } + + pci_set_long(dev->config + msi_mask_off(dev, msi64bit), irq_state); + + pending = pci_get_long(dev->config + msi_pending_off(dev, msi64bit)); + if (!mask && (pending & vector_mask)) { + pending &= ~vector_mask; + pci_set_long(dev->config + msi_pending_off(dev, msi64bit), pending); + msi_notify(dev, vector); + } +} + void msi_notify(PCIDevice *dev, unsigned int vector) { uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev)); @@ -334,11 +375,7 @@ void msi_notify(PCIDevice *dev, unsigned int vector) void msi_send_message(PCIDevice *dev, MSIMessage msg) { - MemTxAttrs attrs = {}; - - attrs.requester_id = pci_requester_id(dev); - address_space_stl_le(&dev->bus_master_as, msg.address, msg.data, - attrs, NULL); + dev->msi_trigger(dev, msg); } /* Normally called by pci_default_write_config(). */ diff --git a/hw/pci/msix.c b/hw/pci/msix.c index ae9331cd0b..1e381a9813 100644 --- a/hw/pci/msix.c +++ b/hw/pci/msix.c @@ -31,7 +31,7 @@ #define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8) #define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8) -MSIMessage msix_get_message(PCIDevice *dev, unsigned vector) +static MSIMessage msix_prepare_message(PCIDevice *dev, unsigned vector) { uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE; MSIMessage msg; @@ -41,6 +41,11 @@ MSIMessage msix_get_message(PCIDevice *dev, unsigned vector) return msg; } +MSIMessage msix_get_message(PCIDevice *dev, unsigned vector) +{ + return dev->msix_prepare_message(dev, vector); +} + /* * Special API for POWER to configure the vectors through * a side channel. Should never be used by devices. @@ -131,6 +136,31 @@ static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked) } } +void msix_set_mask(PCIDevice *dev, int vector, bool mask, Error **errp) +{ + ERRP_GUARD(); + unsigned offset; + bool was_masked; + + if (vector > dev->msix_entries_nr) { + error_setg(errp, "msix: vector %d not allocated. max vector is %d", + vector, dev->msix_entries_nr); + return; + } + + offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL; + + was_masked = msix_is_masked(dev, vector); + + if (mask) { + dev->msix_table[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT; + } else { + dev->msix_table[offset] &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT; + } + + msix_handle_mask_update(dev, vector, was_masked); +} + static bool msix_masked(PCIDevice *dev) { return dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & MSIX_MASKALL_MASK; @@ -344,6 +374,8 @@ int msix_init(struct PCIDevice *dev, unsigned short nentries, "msix-pba", pba_size); memory_region_add_subregion(pba_bar, pba_offset, &dev->msix_pba_mmio); + dev->msix_prepare_message = msix_prepare_message; + return 0; } @@ -429,6 +461,7 @@ void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion *pba_bar) g_free(dev->msix_entry_used); dev->msix_entry_used = NULL; dev->cap_present &= ~QEMU_PCI_CAP_MSIX; + dev->msix_prepare_message = NULL; } void msix_uninit_exclusive_bar(PCIDevice *dev) diff --git a/hw/pci/pci.c b/hw/pci/pci.c index 6e7015329c..2f450f6a72 100644 --- a/hw/pci/pci.c +++ b/hw/pci/pci.c @@ -317,6 +317,15 @@ void pci_device_deassert_intx(PCIDevice *dev) } } +static void pci_msi_trigger(PCIDevice *dev, MSIMessage msg) +{ + MemTxAttrs attrs = {}; + + attrs.requester_id = pci_requester_id(dev); + address_space_stl_le(&dev->bus_master_as, msg.address, msg.data, + attrs, NULL); +} + static void pci_reset_regions(PCIDevice *dev) { int r; @@ -1212,6 +1221,8 @@ static void pci_qdev_unrealize(DeviceState *dev) pci_device_deassert_intx(pci_dev); do_pci_unregister_device(pci_dev); + + pci_dev->msi_trigger = NULL; } void pci_register_bar(PCIDevice *pci_dev, int region_num, @@ -2251,6 +2262,8 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp) } pci_set_power(pci_dev, true); + + pci_dev->msi_trigger = pci_msi_trigger; } PCIDevice *pci_new_multifunction(int devfn, bool multifunction, diff --git a/hw/remote/machine.c b/hw/remote/machine.c index 645b54343d..75d550daae 100644 --- a/hw/remote/machine.c +++ b/hw/remote/machine.c @@ -23,6 +23,8 @@ #include "hw/remote/iommu.h" #include "hw/qdev-core.h" #include "hw/remote/iommu.h" +#include "hw/remote/vfio-user-obj.h" +#include "hw/pci/msi.h" static void remote_machine_init(MachineState *machine) { @@ -54,13 +56,17 @@ static void remote_machine_init(MachineState *machine) if (s->vfio_user) { remote_iommu_setup(pci_host->bus); + + msi_nonbroken = true; + + vfu_object_set_bus_irq(pci_host->bus); + } else { + remote_iohub_init(&s->iohub); + + pci_bus_irqs(pci_host->bus, remote_iohub_set_irq, remote_iohub_map_irq, + &s->iohub, REMOTE_IOHUB_NB_PIRQS); } - remote_iohub_init(&s->iohub); - - pci_bus_irqs(pci_host->bus, remote_iohub_set_irq, remote_iohub_map_irq, - &s->iohub, REMOTE_IOHUB_NB_PIRQS); - qbus_set_hotplug_handler(BUS(pci_host->bus), OBJECT(s)); } diff --git a/hw/remote/trace-events b/hw/remote/trace-events index 847d50d88f..c167b3c7a5 100644 --- a/hw/remote/trace-events +++ b/hw/remote/trace-events @@ -12,3 +12,4 @@ vfu_dma_unregister(uint64_t gpa) "vfu: unregistering GPA 0x%"PRIx64"" vfu_bar_register(int i, uint64_t addr, uint64_t size) "vfu: BAR %d: addr 0x%"PRIx64" size 0x%"PRIx64"" vfu_bar_rw_enter(const char *op, uint64_t addr) "vfu: %s request for BAR address 0x%"PRIx64"" vfu_bar_rw_exit(const char *op, uint64_t addr) "vfu: Finished %s of BAR address 0x%"PRIx64"" +vfu_interrupt(int pirq) "vfu: sending interrupt to device - PIRQ %d" diff --git a/hw/remote/vfio-user-obj.c b/hw/remote/vfio-user-obj.c index dd760a99e2..5ecdec06f6 100644 --- a/hw/remote/vfio-user-obj.c +++ b/hw/remote/vfio-user-obj.c @@ -53,6 +53,9 @@ #include "hw/pci/pci.h" #include "qemu/timer.h" #include "exec/memory.h" +#include "hw/pci/msi.h" +#include "hw/pci/msix.h" +#include "hw/remote/vfio-user-obj.h" #define TYPE_VFU_OBJECT "x-vfio-user-server" OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT) @@ -96,6 +99,10 @@ struct VfuObject { Error *unplug_blocker; int vfu_poll_fd; + + MSITriggerFunc *default_msi_trigger; + MSIPrepareMessageFunc *default_msi_prepare_message; + MSIxPrepareMessageFunc *default_msix_prepare_message; }; static void vfu_object_init_ctx(VfuObject *o, Error **errp); @@ -520,6 +527,155 @@ static void vfu_object_register_bars(vfu_ctx_t *vfu_ctx, PCIDevice *pdev) } } +static int vfu_object_map_irq(PCIDevice *pci_dev, int intx) +{ + int pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)), + pci_dev->devfn); + + return pci_bdf; +} + +static void vfu_object_set_irq(void *opaque, int pirq, int level) +{ + PCIBus *pci_bus = opaque; + PCIDevice *pci_dev = NULL; + vfu_ctx_t *vfu_ctx = NULL; + int pci_bus_num, devfn; + + if (level) { + pci_bus_num = PCI_BUS_NUM(pirq); + devfn = PCI_BDF_TO_DEVFN(pirq); + + /* + * pci_find_device() performs at O(1) if the device is attached + * to the root PCI bus. Whereas, if the device is attached to a + * secondary PCI bus (such as when a root port is involved), + * finding the parent PCI bus could take O(n) + */ + pci_dev = pci_find_device(pci_bus, pci_bus_num, devfn); + + vfu_ctx = pci_dev->irq_opaque; + + g_assert(vfu_ctx); + + vfu_irq_trigger(vfu_ctx, 0); + } +} + +static MSIMessage vfu_object_msi_prepare_msg(PCIDevice *pci_dev, + unsigned int vector) +{ + MSIMessage msg; + + msg.address = 0; + msg.data = vector; + + return msg; +} + +static void vfu_object_msi_trigger(PCIDevice *pci_dev, MSIMessage msg) +{ + vfu_ctx_t *vfu_ctx = pci_dev->irq_opaque; + + vfu_irq_trigger(vfu_ctx, msg.data); +} + +static void vfu_object_setup_msi_cbs(VfuObject *o) +{ + o->default_msi_trigger = o->pci_dev->msi_trigger; + o->default_msi_prepare_message = o->pci_dev->msi_prepare_message; + o->default_msix_prepare_message = o->pci_dev->msix_prepare_message; + + o->pci_dev->msi_trigger = vfu_object_msi_trigger; + o->pci_dev->msi_prepare_message = vfu_object_msi_prepare_msg; + o->pci_dev->msix_prepare_message = vfu_object_msi_prepare_msg; +} + +static void vfu_object_restore_msi_cbs(VfuObject *o) +{ + o->pci_dev->msi_trigger = o->default_msi_trigger; + o->pci_dev->msi_prepare_message = o->default_msi_prepare_message; + o->pci_dev->msix_prepare_message = o->default_msix_prepare_message; +} + +static void vfu_msix_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start, + uint32_t count, bool mask) +{ + VfuObject *o = vfu_get_private(vfu_ctx); + Error *err = NULL; + uint32_t vector; + + for (vector = start; vector < count; vector++) { + msix_set_mask(o->pci_dev, vector, mask, &err); + if (err) { + VFU_OBJECT_ERROR(o, "vfu: %s: %s", o->device, + error_get_pretty(err)); + error_free(err); + err = NULL; + } + } +} + +static void vfu_msi_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start, + uint32_t count, bool mask) +{ + VfuObject *o = vfu_get_private(vfu_ctx); + Error *err = NULL; + uint32_t vector; + + for (vector = start; vector < count; vector++) { + msi_set_mask(o->pci_dev, vector, mask, &err); + if (err) { + VFU_OBJECT_ERROR(o, "vfu: %s: %s", o->device, + error_get_pretty(err)); + error_free(err); + err = NULL; + } + } +} + +static int vfu_object_setup_irqs(VfuObject *o, PCIDevice *pci_dev) +{ + vfu_ctx_t *vfu_ctx = o->vfu_ctx; + int ret; + + ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_INTX_IRQ, 1); + if (ret < 0) { + return ret; + } + + if (msix_nr_vectors_allocated(pci_dev)) { + ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSIX_IRQ, + msix_nr_vectors_allocated(pci_dev)); + vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSIX_IRQ, + &vfu_msix_irq_state); + } else if (msi_nr_vectors_allocated(pci_dev)) { + ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSI_IRQ, + msi_nr_vectors_allocated(pci_dev)); + vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSI_IRQ, + &vfu_msi_irq_state); + } + + if (ret < 0) { + return ret; + } + + vfu_object_setup_msi_cbs(o); + + pci_dev->irq_opaque = vfu_ctx; + + return 0; +} + +void vfu_object_set_bus_irq(PCIBus *pci_bus) +{ + int bus_num = pci_bus_num(pci_bus); + int max_bdf = PCI_BUILD_BDF(bus_num, PCI_DEVFN_MAX - 1); + + pci_bus_irqs(pci_bus, vfu_object_set_irq, vfu_object_map_irq, pci_bus, + max_bdf); +} + /* * TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device' * properties. It also depends on devices instantiated in QEMU. These @@ -632,6 +788,13 @@ static void vfu_object_init_ctx(VfuObject *o, Error **errp) vfu_object_register_bars(o->vfu_ctx, o->pci_dev); + ret = vfu_object_setup_irqs(o, o->pci_dev); + if (ret < 0) { + error_setg(errp, "vfu: Failed to setup interrupts for %s", + o->device); + goto fail; + } + ret = vfu_realize_ctx(o->vfu_ctx); if (ret < 0) { error_setg(errp, "vfu: Failed to realize device %s- %s", @@ -657,6 +820,8 @@ fail: o->unplug_blocker = NULL; } if (o->pci_dev) { + vfu_object_restore_msi_cbs(o); + o->pci_dev->irq_opaque = NULL; object_unref(OBJECT(o->pci_dev)); o->pci_dev = NULL; } @@ -716,6 +881,8 @@ static void vfu_object_finalize(Object *obj) } if (o->pci_dev) { + vfu_object_restore_msi_cbs(o); + o->pci_dev->irq_opaque = NULL; object_unref(OBJECT(o->pci_dev)); o->pci_dev = NULL; } diff --git a/include/hw/pci/msi.h b/include/hw/pci/msi.h index 4087688486..58aa576215 100644 --- a/include/hw/pci/msi.h +++ b/include/hw/pci/msi.h @@ -43,6 +43,7 @@ void msi_notify(PCIDevice *dev, unsigned int vector); void msi_send_message(PCIDevice *dev, MSIMessage msg); void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len); unsigned int msi_nr_vectors_allocated(const PCIDevice *dev); +void msi_set_mask(PCIDevice *dev, int vector, bool mask, Error **errp); static inline bool msi_present(const PCIDevice *dev) { diff --git a/include/hw/pci/msix.h b/include/hw/pci/msix.h index 4c4a60c739..4f1cda0ebe 100644 --- a/include/hw/pci/msix.h +++ b/include/hw/pci/msix.h @@ -36,6 +36,7 @@ void msix_clr_pending(PCIDevice *dev, int vector); int msix_vector_use(PCIDevice *dev, unsigned vector); void msix_vector_unuse(PCIDevice *dev, unsigned vector); void msix_unuse_all_vectors(PCIDevice *dev); +void msix_set_mask(PCIDevice *dev, int vector, bool mask, Error **errp); void msix_notify(PCIDevice *dev, unsigned vector); diff --git a/include/hw/pci/pci.h b/include/hw/pci/pci.h index 44dacfa224..b54b6ef88f 100644 --- a/include/hw/pci/pci.h +++ b/include/hw/pci/pci.h @@ -16,6 +16,7 @@ extern bool pci_available; #define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f) #define PCI_FUNC(devfn) ((devfn) & 0x07) #define PCI_BUILD_BDF(bus, devfn) ((bus << 8) | (devfn)) +#define PCI_BDF_TO_DEVFN(x) ((x) & 0xff) #define PCI_BUS_MAX 256 #define PCI_DEVFN_MAX 256 #define PCI_SLOT_MAX 32 @@ -127,6 +128,10 @@ typedef void PCIMapIORegionFunc(PCIDevice *pci_dev, int region_num, pcibus_t addr, pcibus_t size, int type); typedef void PCIUnregisterFunc(PCIDevice *pci_dev); +typedef void MSITriggerFunc(PCIDevice *dev, MSIMessage msg); +typedef MSIMessage MSIPrepareMessageFunc(PCIDevice *dev, unsigned vector); +typedef MSIMessage MSIxPrepareMessageFunc(PCIDevice *dev, unsigned vector); + typedef struct PCIIORegion { pcibus_t addr; /* current PCI mapping address. -1 means not mapped */ #define PCI_BAR_UNMAPPED (~(pcibus_t)0) @@ -329,6 +334,14 @@ struct PCIDevice { /* Space to store MSIX table & pending bit array */ uint8_t *msix_table; uint8_t *msix_pba; + + /* May be used by INTx or MSI during interrupt notification */ + void *irq_opaque; + + MSITriggerFunc *msi_trigger; + MSIPrepareMessageFunc *msi_prepare_message; + MSIxPrepareMessageFunc *msix_prepare_message; + /* MemoryRegion container for msix exclusive BAR setup */ MemoryRegion msix_exclusive_bar; /* Memory Regions for MSIX table and pending bit entries. */ diff --git a/include/hw/remote/vfio-user-obj.h b/include/hw/remote/vfio-user-obj.h new file mode 100644 index 0000000000..87ab78b875 --- /dev/null +++ b/include/hw/remote/vfio-user-obj.h @@ -0,0 +1,6 @@ +#ifndef VFIO_USER_OBJ_H +#define VFIO_USER_OBJ_H + +void vfu_object_set_bus_irq(PCIBus *pci_bus); + +#endif diff --git a/stubs/meson.build b/stubs/meson.build index 6f80fec761..d8f3fd5c44 100644 --- a/stubs/meson.build +++ b/stubs/meson.build @@ -60,3 +60,4 @@ if have_system else stub_ss.add(files('qdev.c')) endif +stub_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_false: files('vfio-user-obj.c')) diff --git a/stubs/vfio-user-obj.c b/stubs/vfio-user-obj.c new file mode 100644 index 0000000000..79100d768e --- /dev/null +++ b/stubs/vfio-user-obj.c @@ -0,0 +1,6 @@ +#include "qemu/osdep.h" +#include "hw/remote/vfio-user-obj.h" + +void vfu_object_set_bus_irq(PCIBus *pci_bus) +{ +}