vfio-user: handle device interrupts

Forward remote device's interrupts to the guest

Signed-off-by: Elena Ufimtseva <elena.ufimtseva@oracle.com>
Signed-off-by: John G Johnson <john.g.johnson@oracle.com>
Signed-off-by: Jagannathan Raman <jag.raman@oracle.com>
Message-id: 9523479eaafe050677f4de2af5dd0df18c27cfd9.1655151679.git.jag.raman@oracle.com
Signed-off-by: Stefan Hajnoczi <stefanha@redhat.com>
This commit is contained in:
Jagannathan Raman 2022-06-13 16:26:33 -04:00 committed by Stefan Hajnoczi
parent 3123f93d6b
commit 08cf3dc611
13 changed files with 298 additions and 12 deletions

View File

@ -3644,6 +3644,7 @@ F: hw/remote/iohub.c
F: include/hw/remote/iohub.h
F: subprojects/libvfio-user
F: hw/remote/vfio-user-obj.c
F: include/hw/remote/vfio-user-obj.h
F: hw/remote/iommu.c
F: include/hw/remote/iommu.h

View File

@ -134,7 +134,7 @@ void msi_set_message(PCIDevice *dev, MSIMessage msg)
pci_set_word(dev->config + msi_data_off(dev, msi64bit), msg.data);
}
MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector)
static MSIMessage msi_prepare_message(PCIDevice *dev, unsigned int vector)
{
uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
@ -159,6 +159,11 @@ MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector)
return msg;
}
MSIMessage msi_get_message(PCIDevice *dev, unsigned int vector)
{
return dev->msi_prepare_message(dev, vector);
}
bool msi_enabled(const PCIDevice *dev)
{
return msi_present(dev) &&
@ -241,6 +246,8 @@ int msi_init(struct PCIDevice *dev, uint8_t offset,
0xffffffff >> (PCI_MSI_VECTORS_MAX - nr_vectors));
}
dev->msi_prepare_message = msi_prepare_message;
return 0;
}
@ -256,6 +263,7 @@ void msi_uninit(struct PCIDevice *dev)
cap_size = msi_cap_sizeof(flags);
pci_del_capability(dev, PCI_CAP_ID_MSI, cap_size);
dev->cap_present &= ~QEMU_PCI_CAP_MSI;
dev->msi_prepare_message = NULL;
MSI_DEV_PRINTF(dev, "uninit\n");
}
@ -307,6 +315,39 @@ bool msi_is_masked(const PCIDevice *dev, unsigned int vector)
return mask & (1U << vector);
}
void msi_set_mask(PCIDevice *dev, int vector, bool mask, Error **errp)
{
ERRP_GUARD();
uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
bool msi64bit = flags & PCI_MSI_FLAGS_64BIT;
uint32_t irq_state, vector_mask, pending;
if (vector > PCI_MSI_VECTORS_MAX) {
error_setg(errp, "msi: vector %d not allocated. max vector is %d",
vector, PCI_MSI_VECTORS_MAX);
return;
}
vector_mask = (1U << vector);
irq_state = pci_get_long(dev->config + msi_mask_off(dev, msi64bit));
if (mask) {
irq_state |= vector_mask;
} else {
irq_state &= ~vector_mask;
}
pci_set_long(dev->config + msi_mask_off(dev, msi64bit), irq_state);
pending = pci_get_long(dev->config + msi_pending_off(dev, msi64bit));
if (!mask && (pending & vector_mask)) {
pending &= ~vector_mask;
pci_set_long(dev->config + msi_pending_off(dev, msi64bit), pending);
msi_notify(dev, vector);
}
}
void msi_notify(PCIDevice *dev, unsigned int vector)
{
uint16_t flags = pci_get_word(dev->config + msi_flags_off(dev));
@ -334,11 +375,7 @@ void msi_notify(PCIDevice *dev, unsigned int vector)
void msi_send_message(PCIDevice *dev, MSIMessage msg)
{
MemTxAttrs attrs = {};
attrs.requester_id = pci_requester_id(dev);
address_space_stl_le(&dev->bus_master_as, msg.address, msg.data,
attrs, NULL);
dev->msi_trigger(dev, msg);
}
/* Normally called by pci_default_write_config(). */

View File

@ -31,7 +31,7 @@
#define MSIX_ENABLE_MASK (PCI_MSIX_FLAGS_ENABLE >> 8)
#define MSIX_MASKALL_MASK (PCI_MSIX_FLAGS_MASKALL >> 8)
MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
static MSIMessage msix_prepare_message(PCIDevice *dev, unsigned vector)
{
uint8_t *table_entry = dev->msix_table + vector * PCI_MSIX_ENTRY_SIZE;
MSIMessage msg;
@ -41,6 +41,11 @@ MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
return msg;
}
MSIMessage msix_get_message(PCIDevice *dev, unsigned vector)
{
return dev->msix_prepare_message(dev, vector);
}
/*
* Special API for POWER to configure the vectors through
* a side channel. Should never be used by devices.
@ -131,6 +136,31 @@ static void msix_handle_mask_update(PCIDevice *dev, int vector, bool was_masked)
}
}
void msix_set_mask(PCIDevice *dev, int vector, bool mask, Error **errp)
{
ERRP_GUARD();
unsigned offset;
bool was_masked;
if (vector > dev->msix_entries_nr) {
error_setg(errp, "msix: vector %d not allocated. max vector is %d",
vector, dev->msix_entries_nr);
return;
}
offset = vector * PCI_MSIX_ENTRY_SIZE + PCI_MSIX_ENTRY_VECTOR_CTRL;
was_masked = msix_is_masked(dev, vector);
if (mask) {
dev->msix_table[offset] |= PCI_MSIX_ENTRY_CTRL_MASKBIT;
} else {
dev->msix_table[offset] &= ~PCI_MSIX_ENTRY_CTRL_MASKBIT;
}
msix_handle_mask_update(dev, vector, was_masked);
}
static bool msix_masked(PCIDevice *dev)
{
return dev->config[dev->msix_cap + MSIX_CONTROL_OFFSET] & MSIX_MASKALL_MASK;
@ -344,6 +374,8 @@ int msix_init(struct PCIDevice *dev, unsigned short nentries,
"msix-pba", pba_size);
memory_region_add_subregion(pba_bar, pba_offset, &dev->msix_pba_mmio);
dev->msix_prepare_message = msix_prepare_message;
return 0;
}
@ -429,6 +461,7 @@ void msix_uninit(PCIDevice *dev, MemoryRegion *table_bar, MemoryRegion *pba_bar)
g_free(dev->msix_entry_used);
dev->msix_entry_used = NULL;
dev->cap_present &= ~QEMU_PCI_CAP_MSIX;
dev->msix_prepare_message = NULL;
}
void msix_uninit_exclusive_bar(PCIDevice *dev)

View File

@ -317,6 +317,15 @@ void pci_device_deassert_intx(PCIDevice *dev)
}
}
static void pci_msi_trigger(PCIDevice *dev, MSIMessage msg)
{
MemTxAttrs attrs = {};
attrs.requester_id = pci_requester_id(dev);
address_space_stl_le(&dev->bus_master_as, msg.address, msg.data,
attrs, NULL);
}
static void pci_reset_regions(PCIDevice *dev)
{
int r;
@ -1212,6 +1221,8 @@ static void pci_qdev_unrealize(DeviceState *dev)
pci_device_deassert_intx(pci_dev);
do_pci_unregister_device(pci_dev);
pci_dev->msi_trigger = NULL;
}
void pci_register_bar(PCIDevice *pci_dev, int region_num,
@ -2251,6 +2262,8 @@ static void pci_qdev_realize(DeviceState *qdev, Error **errp)
}
pci_set_power(pci_dev, true);
pci_dev->msi_trigger = pci_msi_trigger;
}
PCIDevice *pci_new_multifunction(int devfn, bool multifunction,

View File

@ -23,6 +23,8 @@
#include "hw/remote/iommu.h"
#include "hw/qdev-core.h"
#include "hw/remote/iommu.h"
#include "hw/remote/vfio-user-obj.h"
#include "hw/pci/msi.h"
static void remote_machine_init(MachineState *machine)
{
@ -54,13 +56,17 @@ static void remote_machine_init(MachineState *machine)
if (s->vfio_user) {
remote_iommu_setup(pci_host->bus);
msi_nonbroken = true;
vfu_object_set_bus_irq(pci_host->bus);
} else {
remote_iohub_init(&s->iohub);
pci_bus_irqs(pci_host->bus, remote_iohub_set_irq, remote_iohub_map_irq,
&s->iohub, REMOTE_IOHUB_NB_PIRQS);
}
remote_iohub_init(&s->iohub);
pci_bus_irqs(pci_host->bus, remote_iohub_set_irq, remote_iohub_map_irq,
&s->iohub, REMOTE_IOHUB_NB_PIRQS);
qbus_set_hotplug_handler(BUS(pci_host->bus), OBJECT(s));
}

View File

@ -12,3 +12,4 @@ vfu_dma_unregister(uint64_t gpa) "vfu: unregistering GPA 0x%"PRIx64""
vfu_bar_register(int i, uint64_t addr, uint64_t size) "vfu: BAR %d: addr 0x%"PRIx64" size 0x%"PRIx64""
vfu_bar_rw_enter(const char *op, uint64_t addr) "vfu: %s request for BAR address 0x%"PRIx64""
vfu_bar_rw_exit(const char *op, uint64_t addr) "vfu: Finished %s of BAR address 0x%"PRIx64""
vfu_interrupt(int pirq) "vfu: sending interrupt to device - PIRQ %d"

View File

@ -53,6 +53,9 @@
#include "hw/pci/pci.h"
#include "qemu/timer.h"
#include "exec/memory.h"
#include "hw/pci/msi.h"
#include "hw/pci/msix.h"
#include "hw/remote/vfio-user-obj.h"
#define TYPE_VFU_OBJECT "x-vfio-user-server"
OBJECT_DECLARE_TYPE(VfuObject, VfuObjectClass, VFU_OBJECT)
@ -96,6 +99,10 @@ struct VfuObject {
Error *unplug_blocker;
int vfu_poll_fd;
MSITriggerFunc *default_msi_trigger;
MSIPrepareMessageFunc *default_msi_prepare_message;
MSIxPrepareMessageFunc *default_msix_prepare_message;
};
static void vfu_object_init_ctx(VfuObject *o, Error **errp);
@ -520,6 +527,155 @@ static void vfu_object_register_bars(vfu_ctx_t *vfu_ctx, PCIDevice *pdev)
}
}
static int vfu_object_map_irq(PCIDevice *pci_dev, int intx)
{
int pci_bdf = PCI_BUILD_BDF(pci_bus_num(pci_get_bus(pci_dev)),
pci_dev->devfn);
return pci_bdf;
}
static void vfu_object_set_irq(void *opaque, int pirq, int level)
{
PCIBus *pci_bus = opaque;
PCIDevice *pci_dev = NULL;
vfu_ctx_t *vfu_ctx = NULL;
int pci_bus_num, devfn;
if (level) {
pci_bus_num = PCI_BUS_NUM(pirq);
devfn = PCI_BDF_TO_DEVFN(pirq);
/*
* pci_find_device() performs at O(1) if the device is attached
* to the root PCI bus. Whereas, if the device is attached to a
* secondary PCI bus (such as when a root port is involved),
* finding the parent PCI bus could take O(n)
*/
pci_dev = pci_find_device(pci_bus, pci_bus_num, devfn);
vfu_ctx = pci_dev->irq_opaque;
g_assert(vfu_ctx);
vfu_irq_trigger(vfu_ctx, 0);
}
}
static MSIMessage vfu_object_msi_prepare_msg(PCIDevice *pci_dev,
unsigned int vector)
{
MSIMessage msg;
msg.address = 0;
msg.data = vector;
return msg;
}
static void vfu_object_msi_trigger(PCIDevice *pci_dev, MSIMessage msg)
{
vfu_ctx_t *vfu_ctx = pci_dev->irq_opaque;
vfu_irq_trigger(vfu_ctx, msg.data);
}
static void vfu_object_setup_msi_cbs(VfuObject *o)
{
o->default_msi_trigger = o->pci_dev->msi_trigger;
o->default_msi_prepare_message = o->pci_dev->msi_prepare_message;
o->default_msix_prepare_message = o->pci_dev->msix_prepare_message;
o->pci_dev->msi_trigger = vfu_object_msi_trigger;
o->pci_dev->msi_prepare_message = vfu_object_msi_prepare_msg;
o->pci_dev->msix_prepare_message = vfu_object_msi_prepare_msg;
}
static void vfu_object_restore_msi_cbs(VfuObject *o)
{
o->pci_dev->msi_trigger = o->default_msi_trigger;
o->pci_dev->msi_prepare_message = o->default_msi_prepare_message;
o->pci_dev->msix_prepare_message = o->default_msix_prepare_message;
}
static void vfu_msix_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start,
uint32_t count, bool mask)
{
VfuObject *o = vfu_get_private(vfu_ctx);
Error *err = NULL;
uint32_t vector;
for (vector = start; vector < count; vector++) {
msix_set_mask(o->pci_dev, vector, mask, &err);
if (err) {
VFU_OBJECT_ERROR(o, "vfu: %s: %s", o->device,
error_get_pretty(err));
error_free(err);
err = NULL;
}
}
}
static void vfu_msi_irq_state(vfu_ctx_t *vfu_ctx, uint32_t start,
uint32_t count, bool mask)
{
VfuObject *o = vfu_get_private(vfu_ctx);
Error *err = NULL;
uint32_t vector;
for (vector = start; vector < count; vector++) {
msi_set_mask(o->pci_dev, vector, mask, &err);
if (err) {
VFU_OBJECT_ERROR(o, "vfu: %s: %s", o->device,
error_get_pretty(err));
error_free(err);
err = NULL;
}
}
}
static int vfu_object_setup_irqs(VfuObject *o, PCIDevice *pci_dev)
{
vfu_ctx_t *vfu_ctx = o->vfu_ctx;
int ret;
ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_INTX_IRQ, 1);
if (ret < 0) {
return ret;
}
if (msix_nr_vectors_allocated(pci_dev)) {
ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSIX_IRQ,
msix_nr_vectors_allocated(pci_dev));
vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSIX_IRQ,
&vfu_msix_irq_state);
} else if (msi_nr_vectors_allocated(pci_dev)) {
ret = vfu_setup_device_nr_irqs(vfu_ctx, VFU_DEV_MSI_IRQ,
msi_nr_vectors_allocated(pci_dev));
vfu_setup_irq_state_callback(vfu_ctx, VFU_DEV_MSI_IRQ,
&vfu_msi_irq_state);
}
if (ret < 0) {
return ret;
}
vfu_object_setup_msi_cbs(o);
pci_dev->irq_opaque = vfu_ctx;
return 0;
}
void vfu_object_set_bus_irq(PCIBus *pci_bus)
{
int bus_num = pci_bus_num(pci_bus);
int max_bdf = PCI_BUILD_BDF(bus_num, PCI_DEVFN_MAX - 1);
pci_bus_irqs(pci_bus, vfu_object_set_irq, vfu_object_map_irq, pci_bus,
max_bdf);
}
/*
* TYPE_VFU_OBJECT depends on the availability of the 'socket' and 'device'
* properties. It also depends on devices instantiated in QEMU. These
@ -632,6 +788,13 @@ static void vfu_object_init_ctx(VfuObject *o, Error **errp)
vfu_object_register_bars(o->vfu_ctx, o->pci_dev);
ret = vfu_object_setup_irqs(o, o->pci_dev);
if (ret < 0) {
error_setg(errp, "vfu: Failed to setup interrupts for %s",
o->device);
goto fail;
}
ret = vfu_realize_ctx(o->vfu_ctx);
if (ret < 0) {
error_setg(errp, "vfu: Failed to realize device %s- %s",
@ -657,6 +820,8 @@ fail:
o->unplug_blocker = NULL;
}
if (o->pci_dev) {
vfu_object_restore_msi_cbs(o);
o->pci_dev->irq_opaque = NULL;
object_unref(OBJECT(o->pci_dev));
o->pci_dev = NULL;
}
@ -716,6 +881,8 @@ static void vfu_object_finalize(Object *obj)
}
if (o->pci_dev) {
vfu_object_restore_msi_cbs(o);
o->pci_dev->irq_opaque = NULL;
object_unref(OBJECT(o->pci_dev));
o->pci_dev = NULL;
}

View File

@ -43,6 +43,7 @@ void msi_notify(PCIDevice *dev, unsigned int vector);
void msi_send_message(PCIDevice *dev, MSIMessage msg);
void msi_write_config(PCIDevice *dev, uint32_t addr, uint32_t val, int len);
unsigned int msi_nr_vectors_allocated(const PCIDevice *dev);
void msi_set_mask(PCIDevice *dev, int vector, bool mask, Error **errp);
static inline bool msi_present(const PCIDevice *dev)
{

View File

@ -36,6 +36,7 @@ void msix_clr_pending(PCIDevice *dev, int vector);
int msix_vector_use(PCIDevice *dev, unsigned vector);
void msix_vector_unuse(PCIDevice *dev, unsigned vector);
void msix_unuse_all_vectors(PCIDevice *dev);
void msix_set_mask(PCIDevice *dev, int vector, bool mask, Error **errp);
void msix_notify(PCIDevice *dev, unsigned vector);

View File

@ -16,6 +16,7 @@ extern bool pci_available;
#define PCI_SLOT(devfn) (((devfn) >> 3) & 0x1f)
#define PCI_FUNC(devfn) ((devfn) & 0x07)
#define PCI_BUILD_BDF(bus, devfn) ((bus << 8) | (devfn))
#define PCI_BDF_TO_DEVFN(x) ((x) & 0xff)
#define PCI_BUS_MAX 256
#define PCI_DEVFN_MAX 256
#define PCI_SLOT_MAX 32
@ -127,6 +128,10 @@ typedef void PCIMapIORegionFunc(PCIDevice *pci_dev, int region_num,
pcibus_t addr, pcibus_t size, int type);
typedef void PCIUnregisterFunc(PCIDevice *pci_dev);
typedef void MSITriggerFunc(PCIDevice *dev, MSIMessage msg);
typedef MSIMessage MSIPrepareMessageFunc(PCIDevice *dev, unsigned vector);
typedef MSIMessage MSIxPrepareMessageFunc(PCIDevice *dev, unsigned vector);
typedef struct PCIIORegion {
pcibus_t addr; /* current PCI mapping address. -1 means not mapped */
#define PCI_BAR_UNMAPPED (~(pcibus_t)0)
@ -329,6 +334,14 @@ struct PCIDevice {
/* Space to store MSIX table & pending bit array */
uint8_t *msix_table;
uint8_t *msix_pba;
/* May be used by INTx or MSI during interrupt notification */
void *irq_opaque;
MSITriggerFunc *msi_trigger;
MSIPrepareMessageFunc *msi_prepare_message;
MSIxPrepareMessageFunc *msix_prepare_message;
/* MemoryRegion container for msix exclusive BAR setup */
MemoryRegion msix_exclusive_bar;
/* Memory Regions for MSIX table and pending bit entries. */

View File

@ -0,0 +1,6 @@
#ifndef VFIO_USER_OBJ_H
#define VFIO_USER_OBJ_H
void vfu_object_set_bus_irq(PCIBus *pci_bus);
#endif

View File

@ -60,3 +60,4 @@ if have_system
else
stub_ss.add(files('qdev.c'))
endif
stub_ss.add(when: 'CONFIG_VFIO_USER_SERVER', if_false: files('vfio-user-obj.c'))

6
stubs/vfio-user-obj.c Normal file
View File

@ -0,0 +1,6 @@
#include "qemu/osdep.h"
#include "hw/remote/vfio-user-obj.h"
void vfu_object_set_bus_irq(PCIBus *pci_bus)
{
}