diff --git a/hw/vfio/pci.c b/hw/vfio/pci.c index 8bc36f081a..ef9d7bf326 100644 --- a/hw/vfio/pci.c +++ b/hw/vfio/pci.c @@ -45,6 +45,9 @@ #define TYPE_VFIO_PCI_NOHOTPLUG "vfio-pci-nohotplug" +/* Protected by BQL */ +static KVMRouteChange vfio_route_change; + static void vfio_disable_interrupts(VFIOPCIDevice *vdev); static void vfio_mmap_set_enabled(VFIOPCIDevice *vdev, bool enabled); static void vfio_msi_disable_common(VFIOPCIDevice *vdev); @@ -413,33 +416,36 @@ static int vfio_enable_vectors(VFIOPCIDevice *vdev, bool msix) static void vfio_add_kvm_msi_virq(VFIOPCIDevice *vdev, VFIOMSIVector *vector, int vector_n, bool msix) { - KVMRouteChange c; - int virq; - if ((msix && vdev->no_kvm_msix) || (!msix && vdev->no_kvm_msi)) { return; } - if (event_notifier_init(&vector->kvm_interrupt, 0)) { + vector->virq = kvm_irqchip_add_msi_route(&vfio_route_change, + vector_n, &vdev->pdev); +} + +static void vfio_connect_kvm_msi_virq(VFIOMSIVector *vector) +{ + if (vector->virq < 0) { return; } - c = kvm_irqchip_begin_route_changes(kvm_state); - virq = kvm_irqchip_add_msi_route(&c, vector_n, &vdev->pdev); - if (virq < 0) { - event_notifier_cleanup(&vector->kvm_interrupt); - return; + if (event_notifier_init(&vector->kvm_interrupt, 0)) { + goto fail_notifier; } - kvm_irqchip_commit_route_changes(&c); if (kvm_irqchip_add_irqfd_notifier_gsi(kvm_state, &vector->kvm_interrupt, - NULL, virq) < 0) { - kvm_irqchip_release_virq(kvm_state, virq); - event_notifier_cleanup(&vector->kvm_interrupt); - return; + NULL, vector->virq) < 0) { + goto fail_kvm; } - vector->virq = virq; + return; + +fail_kvm: + event_notifier_cleanup(&vector->kvm_interrupt); +fail_notifier: + kvm_irqchip_release_virq(kvm_state, vector->virq); + vector->virq = -1; } static void vfio_remove_kvm_msi_virq(VFIOMSIVector *vector) @@ -494,7 +500,14 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, } } else { if (msg) { - vfio_add_kvm_msi_virq(vdev, vector, nr, true); + if (vdev->defer_kvm_irq_routing) { + vfio_add_kvm_msi_virq(vdev, vector, nr, true); + } else { + vfio_route_change = kvm_irqchip_begin_route_changes(kvm_state); + vfio_add_kvm_msi_virq(vdev, vector, nr, true); + kvm_irqchip_commit_route_changes(&vfio_route_change); + vfio_connect_kvm_msi_virq(vector); + } } } @@ -504,11 +517,13 @@ static int vfio_msix_vector_do_use(PCIDevice *pdev, unsigned int nr, * increase them as needed. */ if (vdev->nr_vectors < nr + 1) { - vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX); vdev->nr_vectors = nr + 1; - ret = vfio_enable_vectors(vdev, true); - if (ret) { - error_report("vfio: failed to enable vectors, %d", ret); + if (!vdev->defer_kvm_irq_routing) { + vfio_disable_irqindex(&vdev->vbasedev, VFIO_PCI_MSIX_IRQ_INDEX); + ret = vfio_enable_vectors(vdev, true); + if (ret) { + error_report("vfio: failed to enable vectors, %d", ret); + } } } else { Error *err = NULL; @@ -570,6 +585,27 @@ static void vfio_msix_vector_release(PCIDevice *pdev, unsigned int nr) } } +static void vfio_prepare_kvm_msi_virq_batch(VFIOPCIDevice *vdev) +{ + assert(!vdev->defer_kvm_irq_routing); + vdev->defer_kvm_irq_routing = true; + vfio_route_change = kvm_irqchip_begin_route_changes(kvm_state); +} + +static void vfio_commit_kvm_msi_virq_batch(VFIOPCIDevice *vdev) +{ + int i; + + assert(vdev->defer_kvm_irq_routing); + vdev->defer_kvm_irq_routing = false; + + kvm_irqchip_commit_route_changes(&vfio_route_change); + + for (i = 0; i < vdev->nr_vectors; i++) { + vfio_connect_kvm_msi_virq(&vdev->msi_vectors[i]); + } +} + static void vfio_msix_enable(VFIOPCIDevice *vdev) { vfio_disable_interrupts(vdev); @@ -579,26 +615,45 @@ static void vfio_msix_enable(VFIOPCIDevice *vdev) vdev->interrupt = VFIO_INT_MSIX; /* - * Some communication channels between VF & PF or PF & fw rely on the - * physical state of the device and expect that enabling MSI-X from the - * guest enables the same on the host. When our guest is Linux, the - * guest driver call to pci_enable_msix() sets the enabling bit in the - * MSI-X capability, but leaves the vector table masked. We therefore - * can't rely on a vector_use callback (from request_irq() in the guest) - * to switch the physical device into MSI-X mode because that may come a - * long time after pci_enable_msix(). This code enables vector 0 with - * triggering to userspace, then immediately release the vector, leaving - * the physical device with no vectors enabled, but MSI-X enabled, just - * like the guest view. + * Setting vector notifiers triggers synchronous vector-use + * callbacks for each active vector. Deferring to commit the KVM + * routes once rather than per vector provides a substantial + * performance improvement. */ - vfio_msix_vector_do_use(&vdev->pdev, 0, NULL, NULL); - vfio_msix_vector_release(&vdev->pdev, 0); + vfio_prepare_kvm_msi_virq_batch(vdev); if (msix_set_vector_notifiers(&vdev->pdev, vfio_msix_vector_use, vfio_msix_vector_release, NULL)) { error_report("vfio: msix_set_vector_notifiers failed"); } + vfio_commit_kvm_msi_virq_batch(vdev); + + if (vdev->nr_vectors) { + int ret; + + ret = vfio_enable_vectors(vdev, true); + if (ret) { + error_report("vfio: failed to enable vectors, %d", ret); + } + } else { + /* + * Some communication channels between VF & PF or PF & fw rely on the + * physical state of the device and expect that enabling MSI-X from the + * guest enables the same on the host. When our guest is Linux, the + * guest driver call to pci_enable_msix() sets the enabling bit in the + * MSI-X capability, but leaves the vector table masked. We therefore + * can't rely on a vector_use callback (from request_irq() in the guest) + * to switch the physical device into MSI-X mode because that may come a + * long time after pci_enable_msix(). This code enables vector 0 with + * triggering to userspace, then immediately release the vector, leaving + * the physical device with no vectors enabled, but MSI-X enabled, just + * like the guest view. + */ + vfio_msix_vector_do_use(&vdev->pdev, 0, NULL, NULL); + vfio_msix_vector_release(&vdev->pdev, 0); + } + trace_vfio_msix_enable(vdev->vbasedev.name); } @@ -608,6 +663,13 @@ static void vfio_msi_enable(VFIOPCIDevice *vdev) vfio_disable_interrupts(vdev); + /* + * Setting vector notifiers needs to enable route for each vector. + * Deferring to commit the KVM routes once rather than per vector + * provides a substantial performance improvement. + */ + vfio_prepare_kvm_msi_virq_batch(vdev); + vdev->nr_vectors = msi_nr_vectors_allocated(&vdev->pdev); retry: vdev->msi_vectors = g_new0(VFIOMSIVector, vdev->nr_vectors); @@ -633,6 +695,8 @@ retry: vfio_add_kvm_msi_virq(vdev, vector, i, false); } + vfio_commit_kvm_msi_virq_batch(vdev); + /* Set interrupt type prior to possible interrupts */ vdev->interrupt = VFIO_INT_MSI; diff --git a/hw/vfio/pci.h b/hw/vfio/pci.h index 64777516d1..7c236a52f4 100644 --- a/hw/vfio/pci.h +++ b/hw/vfio/pci.h @@ -19,6 +19,7 @@ #include "qemu/queue.h" #include "qemu/timer.h" #include "qom/object.h" +#include "sysemu/kvm.h" #define PCI_ANY_ID (~0) @@ -171,6 +172,7 @@ struct VFIOPCIDevice { bool no_kvm_ioeventfd; bool no_vfio_ioeventfd; bool enable_ramfb; + bool defer_kvm_irq_routing; VFIODisplay *dpy; Notifier irqchip_change_notifier; };