diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5b2196ab8168..4b3408206091 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -253,6 +253,7 @@ config SMP config X86_X2APIC bool "Support x2apic" depends on X86_LOCAL_APIC && X86_64 + select INTR_REMAP ---help--- This enables x2apic support on CPUs that have this feature. @@ -1881,7 +1882,6 @@ config DMAR_FLOPPY_WA config INTR_REMAP bool "Support for Interrupt Remapping (EXPERIMENTAL)" depends on X86_64 && X86_IO_APIC && PCI_MSI && ACPI && EXPERIMENTAL - select X86_X2APIC ---help--- Supports Interrupt remapping for IO-APIC and MSI devices. To use x2apic mode in the CPU's which support x2APIC enhancements or diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index df8a300dfe6c..42f2f8377422 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -107,6 +107,9 @@ extern u32 native_safe_apic_wait_icr_idle(void); extern void native_apic_icr_write(u32 low, u32 id); extern u64 native_apic_icr_read(void); +#define EIM_8BIT_APIC_ID 0 +#define EIM_32BIT_APIC_ID 1 + #ifdef CONFIG_X86_X2APIC /* * Make previous memory operations globally visible before diff --git a/arch/x86/include/asm/io_apic.h b/arch/x86/include/asm/io_apic.h index 373cc2bbcad2..9d826e436010 100644 --- a/arch/x86/include/asm/io_apic.h +++ b/arch/x86/include/asm/io_apic.h @@ -162,10 +162,13 @@ extern int (*ioapic_renumber_irq)(int ioapic, int irq); extern void ioapic_init_mappings(void); #ifdef CONFIG_X86_64 -extern int save_IO_APIC_setup(void); -extern void mask_IO_APIC_setup(void); -extern void restore_IO_APIC_setup(void); -extern void reinit_intr_remapped_IO_APIC(int); +extern struct IO_APIC_route_entry **alloc_ioapic_entries(void); +extern void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries); +extern int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); +extern void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); +extern int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries); +extern void reinit_intr_remapped_IO_APIC(int intr_remapping, + struct IO_APIC_route_entry **ioapic_entries); #endif extern void probe_nr_irqs_gsi(void); diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 85eb8e100818..098ec84b8c00 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1304,6 +1304,7 @@ void __init enable_IR_x2apic(void) #ifdef CONFIG_INTR_REMAP int ret; unsigned long flags; + struct IO_APIC_route_entry **ioapic_entries = NULL; if (!cpu_has_x2apic) return; @@ -1334,17 +1335,23 @@ void __init enable_IR_x2apic(void) return; } - ret = save_IO_APIC_setup(); + ioapic_entries = alloc_ioapic_entries(); + if (!ioapic_entries) { + pr_info("Allocate ioapic_entries failed: %d\n", ret); + goto end; + } + + ret = save_IO_APIC_setup(ioapic_entries); if (ret) { pr_info("Saving IO-APIC state failed: %d\n", ret); goto end; } local_irq_save(flags); - mask_IO_APIC_setup(); + mask_IO_APIC_setup(ioapic_entries); mask_8259A(); - ret = enable_intr_remapping(1); + ret = enable_intr_remapping(EIM_32BIT_APIC_ID); if (ret && x2apic_preenabled) { local_irq_restore(flags); @@ -1364,9 +1371,9 @@ end_restore: /* * IR enabling failed */ - restore_IO_APIC_setup(); + restore_IO_APIC_setup(ioapic_entries); else - reinit_intr_remapped_IO_APIC(x2apic_preenabled); + reinit_intr_remapped_IO_APIC(x2apic_preenabled, ioapic_entries); unmask_8259A(); local_irq_restore(flags); @@ -1379,6 +1386,8 @@ end: pr_info("Enabled Interrupt-remapping\n"); } else pr_err("Failed to enable Interrupt-remapping and x2apic\n"); + if (ioapic_entries) + free_ioapic_entries(ioapic_entries); #else if (!cpu_has_x2apic) return; @@ -1954,6 +1963,10 @@ static int lapic_suspend(struct sys_device *dev, pm_message_t state) local_irq_save(flags); disable_local_APIC(); +#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) + disable_intr_remapping(); +#endif local_irq_restore(flags); return 0; } @@ -1964,15 +1977,41 @@ static int lapic_resume(struct sys_device *dev) unsigned long flags; int maxlvt; +#ifdef CONFIG_INTR_REMAP + int ret; + struct IO_APIC_route_entry **ioapic_entries = NULL; + if (!apic_pm_state.active) return 0; - maxlvt = lapic_get_maxlvt(); + local_irq_save(flags); + if (x2apic) { + ioapic_entries = alloc_ioapic_entries(); + if (!ioapic_entries) { + WARN(1, "Alloc ioapic_entries in lapic resume failed."); + return -ENOMEM; + } + + ret = save_IO_APIC_setup(ioapic_entries); + if (ret) { + WARN(1, "Saving IO-APIC state failed: %d\n", ret); + free_ioapic_entries(ioapic_entries); + return ret; + } + + mask_IO_APIC_setup(ioapic_entries); + mask_8259A(); + enable_x2apic(); + } +#else + if (!apic_pm_state.active) + return 0; local_irq_save(flags); - if (x2apic) enable_x2apic(); +#endif + else { /* * Make sure the APICBASE points to the right address @@ -1986,6 +2025,7 @@ static int lapic_resume(struct sys_device *dev) wrmsr(MSR_IA32_APICBASE, l, h); } + maxlvt = lapic_get_maxlvt(); apic_write(APIC_LVTERR, ERROR_APIC_VECTOR | APIC_LVT_MASKED); apic_write(APIC_ID, apic_pm_state.apic_id); apic_write(APIC_DFR, apic_pm_state.apic_dfr); @@ -2009,8 +2049,20 @@ static int lapic_resume(struct sys_device *dev) apic_write(APIC_ESR, 0); apic_read(APIC_ESR); +#ifdef CONFIG_INTR_REMAP + if (intr_remapping_enabled) + reenable_intr_remapping(EIM_32BIT_APIC_ID); + + if (x2apic) { + unmask_8259A(); + restore_IO_APIC_setup(ioapic_entries); + free_ioapic_entries(ioapic_entries); + } +#endif + local_irq_restore(flags); + return 0; } @@ -2048,7 +2100,9 @@ static int __init init_lapic_sysfs(void) error = sysdev_register(&device_lapic); return error; } -device_initcall(init_lapic_sysfs); + +/* local apic needs to resume before other devices access its registers. */ +core_initcall(init_lapic_sysfs); #else /* CONFIG_PM */ diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 1bb5c6cee3eb..767fe7e46d68 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -851,63 +851,74 @@ __setup("pirq=", ioapic_pirq_setup); #endif /* CONFIG_X86_32 */ #ifdef CONFIG_INTR_REMAP -/* I/O APIC RTE contents at the OS boot up */ -static struct IO_APIC_route_entry *early_ioapic_entries[MAX_IO_APICS]; +struct IO_APIC_route_entry **alloc_ioapic_entries(void) +{ + int apic; + struct IO_APIC_route_entry **ioapic_entries; + + ioapic_entries = kzalloc(sizeof(*ioapic_entries) * nr_ioapics, + GFP_ATOMIC); + if (!ioapic_entries) + return 0; + + for (apic = 0; apic < nr_ioapics; apic++) { + ioapic_entries[apic] = + kzalloc(sizeof(struct IO_APIC_route_entry) * + nr_ioapic_registers[apic], GFP_ATOMIC); + if (!ioapic_entries[apic]) + goto nomem; + } + + return ioapic_entries; + +nomem: + while (--apic >= 0) + kfree(ioapic_entries[apic]); + kfree(ioapic_entries); + + return 0; +} /* * Saves all the IO-APIC RTE's */ -int save_IO_APIC_setup(void) +int save_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) { - union IO_APIC_reg_01 reg_01; - unsigned long flags; int apic, pin; - /* - * The number of IO-APIC IRQ registers (== #pins): - */ - for (apic = 0; apic < nr_ioapics; apic++) { - spin_lock_irqsave(&ioapic_lock, flags); - reg_01.raw = io_apic_read(apic, 1); - spin_unlock_irqrestore(&ioapic_lock, flags); - nr_ioapic_registers[apic] = reg_01.bits.entries+1; - } + if (!ioapic_entries) + return -ENOMEM; for (apic = 0; apic < nr_ioapics; apic++) { - early_ioapic_entries[apic] = - kzalloc(sizeof(struct IO_APIC_route_entry) * - nr_ioapic_registers[apic], GFP_KERNEL); - if (!early_ioapic_entries[apic]) - goto nomem; - } + if (!ioapic_entries[apic]) + return -ENOMEM; - for (apic = 0; apic < nr_ioapics; apic++) for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) - early_ioapic_entries[apic][pin] = + ioapic_entries[apic][pin] = ioapic_read_entry(apic, pin); + } return 0; - -nomem: - while (apic >= 0) - kfree(early_ioapic_entries[apic--]); - memset(early_ioapic_entries, 0, - ARRAY_SIZE(early_ioapic_entries)); - - return -ENOMEM; } -void mask_IO_APIC_setup(void) +/* + * Mask all IO APIC entries. + */ +void mask_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) { int apic, pin; + if (!ioapic_entries) + return; + for (apic = 0; apic < nr_ioapics; apic++) { - if (!early_ioapic_entries[apic]) + if (!ioapic_entries[apic]) break; + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) { struct IO_APIC_route_entry entry; - entry = early_ioapic_entries[apic][pin]; + entry = ioapic_entries[apic][pin]; if (!entry.mask) { entry.mask = 1; ioapic_write_entry(apic, pin, entry); @@ -916,22 +927,30 @@ void mask_IO_APIC_setup(void) } } -void restore_IO_APIC_setup(void) +/* + * Restore IO APIC entries which was saved in ioapic_entries. + */ +int restore_IO_APIC_setup(struct IO_APIC_route_entry **ioapic_entries) { int apic, pin; + if (!ioapic_entries) + return -ENOMEM; + for (apic = 0; apic < nr_ioapics; apic++) { - if (!early_ioapic_entries[apic]) - break; + if (!ioapic_entries[apic]) + return -ENOMEM; + for (pin = 0; pin < nr_ioapic_registers[apic]; pin++) ioapic_write_entry(apic, pin, - early_ioapic_entries[apic][pin]); - kfree(early_ioapic_entries[apic]); - early_ioapic_entries[apic] = NULL; + ioapic_entries[apic][pin]); } + return 0; } -void reinit_intr_remapped_IO_APIC(int intr_remapping) +void reinit_intr_remapped_IO_APIC(int intr_remapping, + struct IO_APIC_route_entry **ioapic_entries) + { /* * for now plain restore of previous settings. @@ -940,7 +959,17 @@ void reinit_intr_remapped_IO_APIC(int intr_remapping) * table entries. for now, do a plain restore, and wait for * the setup_IO_APIC_irqs() to do proper initialization. */ - restore_IO_APIC_setup(); + restore_IO_APIC_setup(ioapic_entries); +} + +void free_ioapic_entries(struct IO_APIC_route_entry **ioapic_entries) +{ + int apic; + + for (apic = 0; apic < nr_ioapics; apic++) + kfree(ioapic_entries[apic]); + + kfree(ioapic_entries); } #endif @@ -2495,7 +2524,7 @@ static void irq_complete_move(struct irq_desc **descp) static inline void irq_complete_move(struct irq_desc **descp) {} #endif -#ifdef CONFIG_INTR_REMAP +#ifdef CONFIG_X86_X2APIC static void __eoi_ioapic_irq(unsigned int irq, struct irq_cfg *cfg) { int apic, pin; @@ -2540,7 +2569,6 @@ static void ack_x2apic_edge(unsigned int irq) { ack_x2APIC_irq(); } - #endif static void ack_apic_edge(unsigned int irq) @@ -2651,6 +2679,26 @@ static void ack_apic_level(unsigned int irq) #endif } +#ifdef CONFIG_INTR_REMAP +static void ir_ack_apic_edge(unsigned int irq) +{ +#ifdef CONFIG_X86_X2APIC + if (x2apic_enabled()) + return ack_x2apic_edge(irq); +#endif + return ack_apic_edge(irq); +} + +static void ir_ack_apic_level(unsigned int irq) +{ +#ifdef CONFIG_X86_X2APIC + if (x2apic_enabled()) + return ack_x2apic_level(irq); +#endif + return ack_apic_level(irq); +} +#endif /* CONFIG_INTR_REMAP */ + static struct irq_chip ioapic_chip __read_mostly = { .name = "IO-APIC", .startup = startup_ioapic_irq, @@ -2670,8 +2718,8 @@ static struct irq_chip ir_ioapic_chip __read_mostly = { .mask = mask_IO_APIC_irq, .unmask = unmask_IO_APIC_irq, #ifdef CONFIG_INTR_REMAP - .ack = ack_x2apic_edge, - .eoi = ack_x2apic_level, + .ack = ir_ack_apic_edge, + .eoi = ir_ack_apic_level, #ifdef CONFIG_SMP .set_affinity = set_ir_ioapic_affinity_irq, #endif @@ -3397,7 +3445,7 @@ static struct irq_chip msi_ir_chip = { .unmask = unmask_msi_irq, .mask = mask_msi_irq, #ifdef CONFIG_INTR_REMAP - .ack = ack_x2apic_edge, + .ack = ir_ack_apic_edge, #ifdef CONFIG_SMP .set_affinity = ir_set_msi_irq_affinity, #endif diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index d313039e2fdf..25a00ce4f24d 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -180,6 +180,7 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header) dmaru->hdr = header; drhd = (struct acpi_dmar_hardware_unit *)header; dmaru->reg_base_addr = drhd->address; + dmaru->segment = drhd->segment; dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */ ret = alloc_iommu(dmaru); @@ -789,6 +790,35 @@ end: spin_unlock_irqrestore(&iommu->register_lock, flags); } +/* + * Enable queued invalidation. + */ +static void __dmar_enable_qi(struct intel_iommu *iommu) +{ + u32 cmd, sts; + unsigned long flags; + struct q_inval *qi = iommu->qi; + + qi->free_head = qi->free_tail = 0; + qi->free_cnt = QI_LENGTH; + + spin_lock_irqsave(&iommu->register_lock, flags); + + /* write zero to the tail reg */ + writel(0, iommu->reg + DMAR_IQT_REG); + + dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc)); + + cmd = iommu->gcmd | DMA_GCMD_QIE; + iommu->gcmd |= DMA_GCMD_QIE; + writel(cmd, iommu->reg + DMAR_GCMD_REG); + + /* Make sure hardware complete it */ + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts); + + spin_unlock_irqrestore(&iommu->register_lock, flags); +} + /* * Enable Queued Invalidation interface. This is a must to support * interrupt-remapping. Also used by DMA-remapping, which replaces @@ -796,8 +826,6 @@ end: */ int dmar_enable_qi(struct intel_iommu *iommu) { - u32 cmd, sts; - unsigned long flags; struct q_inval *qi; if (!ecap_qis(iommu->ecap)) @@ -835,19 +863,7 @@ int dmar_enable_qi(struct intel_iommu *iommu) spin_lock_init(&qi->q_lock); - spin_lock_irqsave(&iommu->register_lock, flags); - /* write zero to the tail reg */ - writel(0, iommu->reg + DMAR_IQT_REG); - - dmar_writeq(iommu->reg + DMAR_IQA_REG, virt_to_phys(qi->desc)); - - cmd = iommu->gcmd | DMA_GCMD_QIE; - iommu->gcmd |= DMA_GCMD_QIE; - writel(cmd, iommu->reg + DMAR_GCMD_REG); - - /* Make sure hardware complete it */ - IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_QIES), sts); - spin_unlock_irqrestore(&iommu->register_lock, flags); + __dmar_enable_qi(iommu); return 0; } @@ -1102,3 +1118,28 @@ int __init enable_drhd_fault_handling(void) return 0; } + +/* + * Re-enable Queued Invalidation interface. + */ +int dmar_reenable_qi(struct intel_iommu *iommu) +{ + if (!ecap_qis(iommu->ecap)) + return -ENOENT; + + if (!iommu->qi) + return -ENOENT; + + /* + * First disable queued invalidation. + */ + dmar_disable_qi(iommu); + /* + * Then enable queued invalidation again. Since there is no pending + * invalidation requests now, it's safe to re-enable queued + * invalidation. + */ + __dmar_enable_qi(iommu); + + return 0; +} diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 23e56a564e05..dcda5212f3bb 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include "pci.h" @@ -247,7 +248,8 @@ struct dmar_domain { struct device_domain_info { struct list_head link; /* link to domain siblings */ struct list_head global; /* link to global list */ - u8 bus; /* PCI bus numer */ + int segment; /* PCI domain */ + u8 bus; /* PCI bus number */ u8 devfn; /* PCI devfn number */ struct pci_dev *dev; /* it's NULL for PCIE-to-PCI bridge */ struct dmar_domain *domain; /* pointer to domain */ @@ -467,7 +469,7 @@ static void domain_update_iommu_cap(struct dmar_domain *domain) domain_update_iommu_snooping(domain); } -static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn) +static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn) { struct dmar_drhd_unit *drhd = NULL; int i; @@ -475,12 +477,20 @@ static struct intel_iommu *device_to_iommu(u8 bus, u8 devfn) for_each_drhd_unit(drhd) { if (drhd->ignored) continue; + if (segment != drhd->segment) + continue; - for (i = 0; i < drhd->devices_cnt; i++) + for (i = 0; i < drhd->devices_cnt; i++) { if (drhd->devices[i] && drhd->devices[i]->bus->number == bus && drhd->devices[i]->devfn == devfn) return drhd->iommu; + if (drhd->devices[i] && + drhd->devices[i]->subordinate && + drhd->devices[i]->subordinate->number <= bus && + drhd->devices[i]->subordinate->subordinate >= bus) + return drhd->iommu; + } if (drhd->include_all) return drhd->iommu; @@ -1312,7 +1322,7 @@ static void domain_exit(struct dmar_domain *domain) } static int domain_context_mapping_one(struct dmar_domain *domain, - u8 bus, u8 devfn) + int segment, u8 bus, u8 devfn) { struct context_entry *context; unsigned long flags; @@ -1327,7 +1337,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, bus, PCI_SLOT(devfn), PCI_FUNC(devfn)); BUG_ON(!domain->pgd); - iommu = device_to_iommu(bus, devfn); + iommu = device_to_iommu(segment, bus, devfn); if (!iommu) return -ENODEV; @@ -1417,8 +1427,8 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev) int ret; struct pci_dev *tmp, *parent; - ret = domain_context_mapping_one(domain, pdev->bus->number, - pdev->devfn); + ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus), + pdev->bus->number, pdev->devfn); if (ret) return ret; @@ -1429,18 +1439,23 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev) /* Secondary interface's bus number and devfn 0 */ parent = pdev->bus->self; while (parent != tmp) { - ret = domain_context_mapping_one(domain, parent->bus->number, - parent->devfn); + ret = domain_context_mapping_one(domain, + pci_domain_nr(parent->bus), + parent->bus->number, + parent->devfn); if (ret) return ret; parent = parent->bus->self; } if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */ return domain_context_mapping_one(domain, - tmp->subordinate->number, 0); + pci_domain_nr(tmp->subordinate), + tmp->subordinate->number, 0); else /* this is a legacy PCI bridge */ return domain_context_mapping_one(domain, - tmp->bus->number, tmp->devfn); + pci_domain_nr(tmp->bus), + tmp->bus->number, + tmp->devfn); } static int domain_context_mapped(struct pci_dev *pdev) @@ -1449,12 +1464,12 @@ static int domain_context_mapped(struct pci_dev *pdev) struct pci_dev *tmp, *parent; struct intel_iommu *iommu; - iommu = device_to_iommu(pdev->bus->number, pdev->devfn); + iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number, + pdev->devfn); if (!iommu) return -ENODEV; - ret = device_context_mapped(iommu, - pdev->bus->number, pdev->devfn); + ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn); if (!ret) return ret; /* dependent device mapping */ @@ -1465,17 +1480,17 @@ static int domain_context_mapped(struct pci_dev *pdev) parent = pdev->bus->self; while (parent != tmp) { ret = device_context_mapped(iommu, parent->bus->number, - parent->devfn); + parent->devfn); if (!ret) return ret; parent = parent->bus->self; } if (tmp->is_pcie) - return device_context_mapped(iommu, - tmp->subordinate->number, 0); + return device_context_mapped(iommu, tmp->subordinate->number, + 0); else - return device_context_mapped(iommu, - tmp->bus->number, tmp->devfn); + return device_context_mapped(iommu, tmp->bus->number, + tmp->devfn); } static int @@ -1542,7 +1557,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain) info->dev->dev.archdata.iommu = NULL; spin_unlock_irqrestore(&device_domain_lock, flags); - iommu = device_to_iommu(info->bus, info->devfn); + iommu = device_to_iommu(info->segment, info->bus, info->devfn); iommu_detach_dev(iommu, info->bus, info->devfn); free_devinfo_mem(info); @@ -1577,11 +1592,14 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) struct pci_dev *dev_tmp; unsigned long flags; int bus = 0, devfn = 0; + int segment; domain = find_domain(pdev); if (domain) return domain; + segment = pci_domain_nr(pdev->bus); + dev_tmp = pci_find_upstream_pcie_bridge(pdev); if (dev_tmp) { if (dev_tmp->is_pcie) { @@ -1593,7 +1611,8 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) } spin_lock_irqsave(&device_domain_lock, flags); list_for_each_entry(info, &device_domain_list, global) { - if (info->bus == bus && info->devfn == devfn) { + if (info->segment == segment && + info->bus == bus && info->devfn == devfn) { found = info->domain; break; } @@ -1631,6 +1650,7 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) domain_exit(domain); goto error; } + info->segment = segment; info->bus = bus; info->devfn = devfn; info->dev = NULL; @@ -1642,7 +1662,8 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) found = NULL; spin_lock_irqsave(&device_domain_lock, flags); list_for_each_entry(tmp, &device_domain_list, global) { - if (tmp->bus == bus && tmp->devfn == devfn) { + if (tmp->segment == segment && + tmp->bus == bus && tmp->devfn == devfn) { found = tmp->domain; break; } @@ -1662,6 +1683,7 @@ found_domain: info = alloc_devinfo_mem(); if (!info) goto error; + info->segment = segment; info->bus = pdev->bus->number; info->devfn = pdev->devfn; info->dev = pdev; @@ -1946,6 +1968,15 @@ static int __init init_dmars(void) } } +#ifdef CONFIG_INTR_REMAP + if (!intr_remapping_enabled) { + ret = enable_intr_remapping(0); + if (ret) + printk(KERN_ERR + "IOMMU: enable interrupt remapping failed\n"); + } +#endif + /* * For each rmrr * for each dev attached to rmrr @@ -2597,6 +2628,150 @@ static void __init init_no_remapping_devices(void) } } +#ifdef CONFIG_SUSPEND +static int init_iommu_hw(void) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu = NULL; + + for_each_active_iommu(iommu, drhd) + if (iommu->qi) + dmar_reenable_qi(iommu); + + for_each_active_iommu(iommu, drhd) { + iommu_flush_write_buffer(iommu); + + iommu_set_root_entry(iommu); + + iommu->flush.flush_context(iommu, 0, 0, 0, + DMA_CCMD_GLOBAL_INVL, 0); + iommu->flush.flush_iotlb(iommu, 0, 0, 0, + DMA_TLB_GLOBAL_FLUSH, 0); + iommu_disable_protect_mem_regions(iommu); + iommu_enable_translation(iommu); + } + + return 0; +} + +static void iommu_flush_all(void) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + + for_each_active_iommu(iommu, drhd) { + iommu->flush.flush_context(iommu, 0, 0, 0, + DMA_CCMD_GLOBAL_INVL, 0); + iommu->flush.flush_iotlb(iommu, 0, 0, 0, + DMA_TLB_GLOBAL_FLUSH, 0); + } +} + +static int iommu_suspend(struct sys_device *dev, pm_message_t state) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu = NULL; + unsigned long flag; + + for_each_active_iommu(iommu, drhd) { + iommu->iommu_state = kzalloc(sizeof(u32) * MAX_SR_DMAR_REGS, + GFP_ATOMIC); + if (!iommu->iommu_state) + goto nomem; + } + + iommu_flush_all(); + + for_each_active_iommu(iommu, drhd) { + iommu_disable_translation(iommu); + + spin_lock_irqsave(&iommu->register_lock, flag); + + iommu->iommu_state[SR_DMAR_FECTL_REG] = + readl(iommu->reg + DMAR_FECTL_REG); + iommu->iommu_state[SR_DMAR_FEDATA_REG] = + readl(iommu->reg + DMAR_FEDATA_REG); + iommu->iommu_state[SR_DMAR_FEADDR_REG] = + readl(iommu->reg + DMAR_FEADDR_REG); + iommu->iommu_state[SR_DMAR_FEUADDR_REG] = + readl(iommu->reg + DMAR_FEUADDR_REG); + + spin_unlock_irqrestore(&iommu->register_lock, flag); + } + return 0; + +nomem: + for_each_active_iommu(iommu, drhd) + kfree(iommu->iommu_state); + + return -ENOMEM; +} + +static int iommu_resume(struct sys_device *dev) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu = NULL; + unsigned long flag; + + if (init_iommu_hw()) { + WARN(1, "IOMMU setup failed, DMAR can not resume!\n"); + return -EIO; + } + + for_each_active_iommu(iommu, drhd) { + + spin_lock_irqsave(&iommu->register_lock, flag); + + writel(iommu->iommu_state[SR_DMAR_FECTL_REG], + iommu->reg + DMAR_FECTL_REG); + writel(iommu->iommu_state[SR_DMAR_FEDATA_REG], + iommu->reg + DMAR_FEDATA_REG); + writel(iommu->iommu_state[SR_DMAR_FEADDR_REG], + iommu->reg + DMAR_FEADDR_REG); + writel(iommu->iommu_state[SR_DMAR_FEUADDR_REG], + iommu->reg + DMAR_FEUADDR_REG); + + spin_unlock_irqrestore(&iommu->register_lock, flag); + } + + for_each_active_iommu(iommu, drhd) + kfree(iommu->iommu_state); + + return 0; +} + +static struct sysdev_class iommu_sysclass = { + .name = "iommu", + .resume = iommu_resume, + .suspend = iommu_suspend, +}; + +static struct sys_device device_iommu = { + .cls = &iommu_sysclass, +}; + +static int __init init_iommu_sysfs(void) +{ + int error; + + error = sysdev_class_register(&iommu_sysclass); + if (error) + return error; + + error = sysdev_register(&device_iommu); + if (error) + sysdev_class_unregister(&iommu_sysclass); + + return error; +} + +#else +static int __init init_iommu_sysfs(void) +{ + return 0; +} +#endif /* CONFIG_PM */ + int __init intel_iommu_init(void) { int ret = 0; @@ -2632,6 +2807,7 @@ int __init intel_iommu_init(void) init_timer(&unmap_timer); force_iommu = 1; dma_ops = &intel_dma_ops; + init_iommu_sysfs(); register_iommu(&intel_iommu_ops); @@ -2648,6 +2824,7 @@ static int vm_domain_add_dev_info(struct dmar_domain *domain, if (!info) return -ENOMEM; + info->segment = pci_domain_nr(pdev->bus); info->bus = pdev->bus->number; info->devfn = pdev->devfn; info->dev = pdev; @@ -2677,15 +2854,15 @@ static void iommu_detach_dependent_devices(struct intel_iommu *iommu, parent = pdev->bus->self; while (parent != tmp) { iommu_detach_dev(iommu, parent->bus->number, - parent->devfn); + parent->devfn); parent = parent->bus->self; } if (tmp->is_pcie) /* this is a PCIE-to-PCI bridge */ iommu_detach_dev(iommu, tmp->subordinate->number, 0); else /* this is a legacy PCI bridge */ - iommu_detach_dev(iommu, - tmp->bus->number, tmp->devfn); + iommu_detach_dev(iommu, tmp->bus->number, + tmp->devfn); } } @@ -2698,13 +2875,15 @@ static void vm_domain_remove_one_dev_info(struct dmar_domain *domain, int found = 0; struct list_head *entry, *tmp; - iommu = device_to_iommu(pdev->bus->number, pdev->devfn); + iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number, + pdev->devfn); if (!iommu) return; spin_lock_irqsave(&device_domain_lock, flags); list_for_each_safe(entry, tmp, &domain->devices) { info = list_entry(entry, struct device_domain_info, link); + /* No need to compare PCI domain; it has to be the same */ if (info->bus == pdev->bus->number && info->devfn == pdev->devfn) { list_del(&info->link); @@ -2729,7 +2908,8 @@ static void vm_domain_remove_one_dev_info(struct dmar_domain *domain, * owned by this domain, clear this iommu in iommu_bmp * update iommu count and coherency */ - if (device_to_iommu(info->bus, info->devfn) == iommu) + if (iommu == device_to_iommu(info->segment, info->bus, + info->devfn)) found = 1; } @@ -2762,7 +2942,7 @@ static void vm_domain_remove_all_dev_info(struct dmar_domain *domain) spin_unlock_irqrestore(&device_domain_lock, flags1); - iommu = device_to_iommu(info->bus, info->devfn); + iommu = device_to_iommu(info->segment, info->bus, info->devfn); iommu_detach_dev(iommu, info->bus, info->devfn); iommu_detach_dependent_devices(iommu, info->dev); @@ -2950,7 +3130,8 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, } } - iommu = device_to_iommu(pdev->bus->number, pdev->devfn); + iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number, + pdev->devfn); if (!iommu) return -ENODEV; diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index b041a409f4a7..f5e0ea724a6f 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -9,6 +9,7 @@ #include #include #include "intr_remapping.h" +#include static struct ioapic_scope ir_ioapic[MAX_IO_APICS]; static int ir_ioapic_num; @@ -415,12 +416,27 @@ static void iommu_set_intr_remapping(struct intel_iommu *iommu, int mode) /* Set interrupt-remapping table pointer */ cmd = iommu->gcmd | DMA_GCMD_SIRTP; + iommu->gcmd |= DMA_GCMD_SIRTP; writel(cmd, iommu->reg + DMAR_GCMD_REG); IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, readl, (sts & DMA_GSTS_IRTPS), sts); spin_unlock_irqrestore(&iommu->register_lock, flags); + if (mode == 0) { + spin_lock_irqsave(&iommu->register_lock, flags); + + /* enable comaptiblity format interrupt pass through */ + cmd = iommu->gcmd | DMA_GCMD_CFI; + iommu->gcmd |= DMA_GCMD_CFI; + writel(cmd, iommu->reg + DMAR_GCMD_REG); + + IOMMU_WAIT_OP(iommu, DMAR_GSTS_REG, + readl, (sts & DMA_GSTS_CFIS), sts); + + spin_unlock_irqrestore(&iommu->register_lock, flags); + } + /* * global invalidation of interrupt entry cache before enabling * interrupt-remapping. @@ -470,7 +486,7 @@ static int setup_intr_remapping(struct intel_iommu *iommu, int mode) /* * Disable Interrupt Remapping. */ -static void disable_intr_remapping(struct intel_iommu *iommu) +static void iommu_disable_intr_remapping(struct intel_iommu *iommu) { unsigned long flags; u32 sts; @@ -478,6 +494,12 @@ static void disable_intr_remapping(struct intel_iommu *iommu) if (!ecap_ir_support(iommu->ecap)) return; + /* + * global invalidation of interrupt entry cache before disabling + * interrupt-remapping. + */ + qi_global_iec(iommu); + spin_lock_irqsave(&iommu->register_lock, flags); sts = dmar_readq(iommu->reg + DMAR_GSTS_REG); @@ -502,6 +524,13 @@ int __init enable_intr_remapping(int eim) for_each_drhd_unit(drhd) { struct intel_iommu *iommu = drhd->iommu; + /* + * If the queued invalidation is already initialized, + * shouldn't disable it. + */ + if (iommu->qi) + continue; + /* * Clear previous faults. */ @@ -511,7 +540,7 @@ int __init enable_intr_remapping(int eim) * Disable intr remapping and queued invalidation, if already * enabled prior to OS handover. */ - disable_intr_remapping(iommu); + iommu_disable_intr_remapping(iommu); dmar_disable_qi(iommu); } @@ -639,3 +668,54 @@ int __init parse_ioapics_under_ir(void) return ir_supported; } + +void disable_intr_remapping(void) +{ + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu = NULL; + + /* + * Disable Interrupt-remapping for all the DRHD's now. + */ + for_each_iommu(iommu, drhd) { + if (!ecap_ir_support(iommu->ecap)) + continue; + + iommu_disable_intr_remapping(iommu); + } +} + +int reenable_intr_remapping(int eim) +{ + struct dmar_drhd_unit *drhd; + int setup = 0; + struct intel_iommu *iommu = NULL; + + for_each_iommu(iommu, drhd) + if (iommu->qi) + dmar_reenable_qi(iommu); + + /* + * Setup Interrupt-remapping for all the DRHD's now. + */ + for_each_iommu(iommu, drhd) { + if (!ecap_ir_support(iommu->ecap)) + continue; + + /* Set up interrupt remapping for iommu.*/ + iommu_set_intr_remapping(iommu, eim); + setup = 1; + } + + if (!setup) + goto error; + + return 0; + +error: + /* + * handle error condition gracefully here! + */ + return -1; +} + diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 2f3427468956..e397dc342cda 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -34,6 +34,7 @@ struct dmar_drhd_unit { u64 reg_base_addr; /* register base address*/ struct pci_dev **devices; /* target device array */ int devices_cnt; /* target device count */ + u16 segment; /* PCI domain */ u8 ignored:1; /* ignore drhd */ u8 include_all:1; struct intel_iommu *iommu; @@ -44,6 +45,14 @@ extern struct list_head dmar_drhd_units; #define for_each_drhd_unit(drhd) \ list_for_each_entry(drhd, &dmar_drhd_units, list) +#define for_each_active_iommu(i, drhd) \ + list_for_each_entry(drhd, &dmar_drhd_units, list) \ + if (i=drhd->iommu, drhd->ignored) {} else + +#define for_each_iommu(i, drhd) \ + list_for_each_entry(drhd, &dmar_drhd_units, list) \ + if (i=drhd->iommu, 0) {} else + extern int dmar_table_init(void); extern int dmar_dev_scope_init(void); @@ -100,6 +109,8 @@ struct irte { #ifdef CONFIG_INTR_REMAP extern int intr_remapping_enabled; extern int enable_intr_remapping(int); +extern void disable_intr_remapping(void); +extern int reenable_intr_remapping(int); extern int get_irte(int irq, struct irte *entry); extern int modify_irte(int irq, struct irte *irte_modified); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 77214ead1a36..aa8c53171233 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -164,6 +164,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define DMA_GCMD_QIE (((u32)1) << 26) #define DMA_GCMD_SIRTP (((u32)1) << 24) #define DMA_GCMD_IRE (((u32) 1) << 25) +#define DMA_GCMD_CFI (((u32) 1) << 23) /* GSTS_REG */ #define DMA_GSTS_TES (((u32)1) << 31) @@ -174,6 +175,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define DMA_GSTS_QIES (((u32)1) << 26) #define DMA_GSTS_IRTPS (((u32)1) << 24) #define DMA_GSTS_IRES (((u32)1) << 25) +#define DMA_GSTS_CFIS (((u32)1) << 23) /* CCMD_REG */ #define DMA_CCMD_ICC (((u64)1) << 63) @@ -284,6 +286,14 @@ struct iommu_flush { unsigned int size_order, u64 type, int non_present_entry_flush); }; +enum { + SR_DMAR_FECTL_REG, + SR_DMAR_FEDATA_REG, + SR_DMAR_FEADDR_REG, + SR_DMAR_FEUADDR_REG, + MAX_SR_DMAR_REGS +}; + struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ u64 cap; @@ -304,6 +314,8 @@ struct intel_iommu { struct iommu_flush flush; #endif struct q_inval *qi; /* Queued invalidation info */ + u32 *iommu_state; /* Store iommu states between suspend and resume.*/ + #ifdef CONFIG_INTR_REMAP struct ir_table *ir_table; /* Interrupt remapping info */ #endif @@ -322,6 +334,7 @@ extern int alloc_iommu(struct dmar_drhd_unit *drhd); extern void free_iommu(struct intel_iommu *iommu); extern int dmar_enable_qi(struct intel_iommu *iommu); extern void dmar_disable_qi(struct intel_iommu *iommu); +extern int dmar_reenable_qi(struct intel_iommu *iommu); extern void qi_global_iec(struct intel_iommu *iommu); extern int qi_flush_context(struct intel_iommu *iommu, u16 did, u16 sid,