From 43f7392ba9e2585bf34f21399b1ed78692b5d437 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Sat, 3 Jan 2009 23:56:27 +0100 Subject: [PATCH 1/3] intel-iommu: fix build error with INTR_REMAP=y and DMAR=n This fix should be safe since iommu->agaw is only used in intel-iommu.c. And this file is only compiled with DMAR=y. Signed-off-by: Joerg Roedel Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index f5a662a50acb..2b4162d9ca30 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -491,7 +491,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) int map_size; u32 ver; static int iommu_allocated = 0; - int agaw; + int agaw = 0; iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); if (!iommu) @@ -507,6 +507,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) iommu->cap = dmar_readq(iommu->reg + DMAR_CAP_REG); iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); +#ifdef CONFIG_DMAR agaw = iommu_calculate_agaw(iommu); if (agaw < 0) { printk(KERN_ERR @@ -514,6 +515,7 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) iommu->seq_id); goto error; } +#endif iommu->agaw = agaw; /* the registers might be more than one page */ From 704126ad81b8cb7d3d70adb9ecb143f4d3fb38af Mon Sep 17 00:00:00 2001 From: Yu Zhao Date: Sun, 4 Jan 2009 16:28:52 +0800 Subject: [PATCH 2/3] VT-d: handle Invalidation Queue Error to avoid system hang When hardware detects any error with a descriptor from the invalidation queue, it stops fetching new descriptors from the queue until software clears the Invalidation Queue Error bit in the Fault Status register. Following fix handles the IQE so the kernel won't be trapped in an infinite loop. Signed-off-by: Yu Zhao Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 61 ++++++++++++++++++++++++++---------- drivers/pci/intr_remapping.c | 21 +++++++------ include/linux/intel-iommu.h | 3 +- 3 files changed, 59 insertions(+), 26 deletions(-) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 2b4162d9ca30..8d3e9c261061 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -573,19 +573,49 @@ static inline void reclaim_free_desc(struct q_inval *qi) } } +static int qi_check_fault(struct intel_iommu *iommu, int index) +{ + u32 fault; + int head; + struct q_inval *qi = iommu->qi; + int wait_index = (index + 1) % QI_LENGTH; + + fault = readl(iommu->reg + DMAR_FSTS_REG); + + /* + * If IQE happens, the head points to the descriptor associated + * with the error. No new descriptors are fetched until the IQE + * is cleared. + */ + if (fault & DMA_FSTS_IQE) { + head = readl(iommu->reg + DMAR_IQH_REG); + if ((head >> 4) == index) { + memcpy(&qi->desc[index], &qi->desc[wait_index], + sizeof(struct qi_desc)); + __iommu_flush_cache(iommu, &qi->desc[index], + sizeof(struct qi_desc)); + writel(DMA_FSTS_IQE, iommu->reg + DMAR_FSTS_REG); + return -EINVAL; + } + } + + return 0; +} + /* * Submit the queued invalidation descriptor to the remapping * hardware unit and wait for its completion. */ -void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) +int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) { + int rc = 0; struct q_inval *qi = iommu->qi; struct qi_desc *hw, wait_desc; int wait_index, index; unsigned long flags; if (!qi) - return; + return 0; hw = qi->desc; @@ -603,7 +633,8 @@ void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) hw[index] = *desc; - wait_desc.low = QI_IWD_STATUS_DATA(2) | QI_IWD_STATUS_WRITE | QI_IWD_TYPE; + wait_desc.low = QI_IWD_STATUS_DATA(QI_DONE) | + QI_IWD_STATUS_WRITE | QI_IWD_TYPE; wait_desc.high = virt_to_phys(&qi->desc_status[wait_index]); hw[wait_index] = wait_desc; @@ -614,13 +645,11 @@ void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) qi->free_head = (qi->free_head + 2) % QI_LENGTH; qi->free_cnt -= 2; - spin_lock(&iommu->register_lock); /* * update the HW tail register indicating the presence of * new descriptors. */ writel(qi->free_head << 4, iommu->reg + DMAR_IQT_REG); - spin_unlock(&iommu->register_lock); while (qi->desc_status[wait_index] != QI_DONE) { /* @@ -630,15 +659,21 @@ void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu) * a deadlock where the interrupt context can wait indefinitely * for free slots in the queue. */ + rc = qi_check_fault(iommu, index); + if (rc) + goto out; + spin_unlock(&qi->q_lock); cpu_relax(); spin_lock(&qi->q_lock); } - - qi->desc_status[index] = QI_DONE; +out: + qi->desc_status[index] = qi->desc_status[wait_index] = QI_DONE; reclaim_free_desc(qi); spin_unlock_irqrestore(&qi->q_lock, flags); + + return rc; } /* @@ -651,13 +686,13 @@ void qi_global_iec(struct intel_iommu *iommu) desc.low = QI_IEC_TYPE; desc.high = 0; + /* should never fail */ qi_submit_sync(&desc, iommu); } int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, u64 type, int non_present_entry_flush) { - struct qi_desc desc; if (non_present_entry_flush) { @@ -671,10 +706,7 @@ int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid, u8 fm, | QI_CC_GRAN(type) | QI_CC_TYPE; desc.high = 0; - qi_submit_sync(&desc, iommu); - - return 0; - + return qi_submit_sync(&desc, iommu); } int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, @@ -704,10 +736,7 @@ int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, desc.high = QI_IOTLB_ADDR(addr) | QI_IOTLB_IH(ih) | QI_IOTLB_AM(size_order); - qi_submit_sync(&desc, iommu); - - return 0; - + return qi_submit_sync(&desc, iommu); } /* diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index f78371b22529..45effc5726c0 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -207,7 +207,7 @@ int alloc_irte(struct intel_iommu *iommu, int irq, u16 count) return index; } -static void qi_flush_iec(struct intel_iommu *iommu, int index, int mask) +static int qi_flush_iec(struct intel_iommu *iommu, int index, int mask) { struct qi_desc desc; @@ -215,7 +215,7 @@ static void qi_flush_iec(struct intel_iommu *iommu, int index, int mask) | QI_IEC_SELECTIVE; desc.high = 0; - qi_submit_sync(&desc, iommu); + return qi_submit_sync(&desc, iommu); } int map_irq_to_irte_handle(int irq, u16 *sub_handle) @@ -283,6 +283,7 @@ int clear_irte_irq(int irq, struct intel_iommu *iommu, u16 index) int modify_irte(int irq, struct irte *irte_modified) { + int rc; int index; struct irte *irte; struct intel_iommu *iommu; @@ -303,14 +304,15 @@ int modify_irte(int irq, struct irte *irte_modified) set_64bit((unsigned long *)irte, irte_modified->low | (1 << 1)); __iommu_flush_cache(iommu, irte, sizeof(*irte)); - qi_flush_iec(iommu, index, 0); - + rc = qi_flush_iec(iommu, index, 0); spin_unlock(&irq_2_ir_lock); - return 0; + + return rc; } int flush_irte(int irq) { + int rc; int index; struct intel_iommu *iommu; struct irq_2_iommu *irq_iommu; @@ -326,10 +328,10 @@ int flush_irte(int irq) index = irq_iommu->irte_index + irq_iommu->sub_handle; - qi_flush_iec(iommu, index, irq_iommu->irte_mask); + rc = qi_flush_iec(iommu, index, irq_iommu->irte_mask); spin_unlock(&irq_2_ir_lock); - return 0; + return rc; } struct intel_iommu *map_ioapic_to_ir(int apic) @@ -355,6 +357,7 @@ struct intel_iommu *map_dev_to_ir(struct pci_dev *dev) int free_irte(int irq) { + int rc = 0; int index, i; struct irte *irte; struct intel_iommu *iommu; @@ -375,7 +378,7 @@ int free_irte(int irq) if (!irq_iommu->sub_handle) { for (i = 0; i < (1 << irq_iommu->irte_mask); i++) set_64bit((unsigned long *)irte, 0); - qi_flush_iec(iommu, index, irq_iommu->irte_mask); + rc = qi_flush_iec(iommu, index, irq_iommu->irte_mask); } irq_iommu->iommu = NULL; @@ -385,7 +388,7 @@ int free_irte(int irq) spin_unlock(&irq_2_ir_lock); - return 0; + return rc; } static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index c4f6c101dbcd..d2e3cbfba14f 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -194,6 +194,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) /* FSTS_REG */ #define DMA_FSTS_PPF ((u32)2) #define DMA_FSTS_PFO ((u32)1) +#define DMA_FSTS_IQE (1 << 4) #define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff) /* FRCD_REG, 32 bits access */ @@ -328,7 +329,7 @@ extern int qi_flush_iotlb(struct intel_iommu *iommu, u16 did, u64 addr, unsigned int size_order, u64 type, int non_present_entry_flush); -extern void qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); +extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); extern void *intel_alloc_coherent(struct device *, size_t, dma_addr_t *, gfp_t); extern void intel_free_coherent(struct device *, size_t, void *, dma_addr_t); From 084eb960e81505680a9963665722d1bfd94af6a7 Mon Sep 17 00:00:00 2001 From: Tony Battersby Date: Wed, 11 Feb 2009 13:24:19 -0800 Subject: [PATCH 3/3] intel-iommu: fix endless "Unknown DMAR structure type" loop I have a SuperMicro C2SBX motherboard with BIOS revision 1.0b. With vt-d enabled in the BIOS, Linux gets into an endless loop printing "DMAR:Unknown DMAR structure type" when booting. Here is the DMAR ACPI table: DMAR @ 0x7fe86dec 0000: 44 4d 41 52 98 00 00 00 01 6f 49 6e 74 65 6c 20 DMAR.....oIntel 0010: 4f 45 4d 44 4d 41 52 20 00 00 04 06 4c 4f 48 52 OEMDMAR ....LOHR 0020: 01 00 00 00 23 00 00 00 00 00 00 00 00 00 00 00 ....#........... 0030: 01 00 58 00 00 00 00 00 00 a0 e8 7f 00 00 00 00 ..X............. 0040: ff ff ef 7f 00 00 00 00 01 08 00 00 00 00 1d 00 ................ 0050: 01 08 00 00 00 00 1d 01 01 08 00 00 00 00 1d 02 ................ 0060: 01 08 00 00 00 00 1d 07 01 08 00 00 00 00 1a 00 ................ 0070: 01 08 00 00 00 00 1a 01 01 08 00 00 00 00 1a 02 ................ 0080: 01 08 00 00 00 00 1a 07 01 08 00 00 00 00 1a 07 ................ 0090: c0 00 68 00 04 10 66 60 ..h...f` Here are the messages printed by the kernel: DMAR:Host address width 36 DMAR:RMRR base: 0x000000007fe8a000 end: 0x000000007fefffff DMAR:Unknown DMAR structure type DMAR:Unknown DMAR structure type DMAR:Unknown DMAR structure type ... Although I not very familiar with ACPI, to me it looks like struct acpi_dmar_header::length == 0x0058 is incorrect, causing parse_dmar_table() to look at an invalid offset on the next loop. This offset happens to have struct acpi_dmar_header::length == 0x0000, which prevents the loop from ever terminating. This patch checks for this condition and bails out instead of looping forever. Signed-off-by: Tony Battersby Signed-off-by: Andrew Morton Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 8d3e9c261061..26c536b51c5a 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -330,6 +330,14 @@ parse_dmar_table(void) entry_header = (struct acpi_dmar_header *)(dmar + 1); while (((unsigned long)entry_header) < (((unsigned long)dmar) + dmar_tbl->length)) { + /* Avoid looping forever on bad ACPI tables */ + if (entry_header->length == 0) { + printk(KERN_WARNING PREFIX + "Invalid 0-length structure\n"); + ret = -EINVAL; + break; + } + dmar_table_print_dmar_entry(entry_header); switch (entry_header->type) {