From 65f6ae66a6fb44f614a1226c398fcb38e94b3c59 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Mon, 13 May 2013 11:05:48 +0200 Subject: [PATCH 1/2] PCI: Allocate only as many MSI vectors as requested by driver Because of the encoding of the "Multiple Message Capable" and "Multiple Message Enable" fields, a device can only advertise that it's capable of a power-of-two number of vectors, and the OS can only enable a power-of-two number. For example, a device that's limited internally to using 18 vectors would have to advertise that it's capable of 32. The 14 extra vectors consume vector numbers and IRQ descriptors even though the device can't actually use them. This fix introduces a 'msi_desc::nvec_used' field to address this issue. When non-zero, it is the actual number of MSIs the device will send, as requested by the device driver. This value should be used by architectures to set up and tear down only as many interrupt resources as the device will actually use. Note, although the existing 'msi_desc::multiple' field might seem redundant, in fact it is not. The number of MSIs advertised need not be the smallest power-of-two larger than the number of MSIs the device will send. Thus, it is not always possible to derive the former from the latter, so we need to keep them both to handle this case. [bhelgaas: changelog, rename to "nvec_used"] Signed-off-by: Alexander Gordeev Signed-off-by: Bjorn Helgaas --- drivers/pci/msi.c | 10 ++++++++-- include/linux/msi.h | 1 + 2 files changed, 9 insertions(+), 2 deletions(-) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 2c1075213bec..aca7578b05e5 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -81,7 +81,10 @@ void default_teardown_msi_irqs(struct pci_dev *dev) int i, nvec; if (entry->irq == 0) continue; - nvec = 1 << entry->msi_attrib.multiple; + if (entry->nvec_used) + nvec = entry->nvec_used; + else + nvec = 1 << entry->msi_attrib.multiple; for (i = 0; i < nvec; i++) arch_teardown_msi_irq(entry->irq + i); } @@ -336,7 +339,10 @@ static void free_msi_irqs(struct pci_dev *dev) int i, nvec; if (!entry->irq) continue; - nvec = 1 << entry->msi_attrib.multiple; + if (entry->nvec_used) + nvec = entry->nvec_used; + else + nvec = 1 << entry->msi_attrib.multiple; #ifdef CONFIG_GENERIC_HARDIRQS for (i = 0; i < nvec; i++) BUG_ON(irq_has_action(entry->irq + i)); diff --git a/include/linux/msi.h b/include/linux/msi.h index 20c2d6dd5d25..ee66f3a12fb6 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -35,6 +35,7 @@ struct msi_desc { u32 masked; /* mask bits */ unsigned int irq; + unsigned int nvec_used; /* number of messages */ struct list_head list; union { From 5fec945105448b958b8418b7e5d043d630ec3155 Mon Sep 17 00:00:00 2001 From: Alexander Gordeev Date: Mon, 13 May 2013 11:06:17 +0200 Subject: [PATCH 2/2] x86/MSI: Conserve interrupt resources when using multiple-MSIs Current multiple-MSI implementation does not take into account actual number of requested MSIs and always rounds that number to a larger power-of-two value. Yet, the number of MSIs a PCI device could send (and therefore the number of messages a device driver could request) may be smaller. As result, resources allocated for extra MSIs are just wasted. This update takes advantage of 'msi_desc::nvec_used' field introduced with generic MSI code to track the number of requested and used MSIs. As result, resources associated with interrupts are conserved. Of those resources most noticeable are x86 interrupt vectors. The initial version of this fix also conserved IRTEs, but Jan noticed that a malfunctioning PCI device might send a message number it did not claim and thus refer to an IRTE it does not own. To avoid this security hole, as many IRTEs are reserved as the device could possibly send. [bhelgaas: changelog, rename to "nvec_used"] Signed-off-by: Alexander Gordeev Signed-off-by: Bjorn Helgaas --- drivers/iommu/irq_remapping.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/irq_remapping.c b/drivers/iommu/irq_remapping.c index dcfea4e39be7..39f81aeefcd6 100644 --- a/drivers/iommu/irq_remapping.c +++ b/drivers/iommu/irq_remapping.c @@ -51,26 +51,27 @@ static void irq_remapping_disable_io_apic(void) static int do_setup_msi_irqs(struct pci_dev *dev, int nvec) { - int node, ret, sub_handle, index = 0; + int node, ret, sub_handle, nvec_pow2, index = 0; unsigned int irq; struct msi_desc *msidesc; - nvec = __roundup_pow_of_two(nvec); - WARN_ON(!list_is_singular(&dev->msi_list)); msidesc = list_entry(dev->msi_list.next, struct msi_desc, list); WARN_ON(msidesc->irq); WARN_ON(msidesc->msi_attrib.multiple); + WARN_ON(msidesc->nvec_used); node = dev_to_node(&dev->dev); irq = __create_irqs(get_nr_irqs_gsi(), nvec, node); if (irq == 0) return -ENOSPC; - msidesc->msi_attrib.multiple = ilog2(nvec); + nvec_pow2 = __roundup_pow_of_two(nvec); + msidesc->nvec_used = nvec; + msidesc->msi_attrib.multiple = ilog2(nvec_pow2); for (sub_handle = 0; sub_handle < nvec; sub_handle++) { if (!sub_handle) { - index = msi_alloc_remapped_irq(dev, irq, nvec); + index = msi_alloc_remapped_irq(dev, irq, nvec_pow2); if (index < 0) { ret = index; goto error; @@ -95,6 +96,7 @@ error: * IRQs from tearing down again in default_teardown_msi_irqs() */ msidesc->irq = 0; + msidesc->nvec_used = 0; msidesc->msi_attrib.multiple = 0; return ret;