From ee34b32d8c2950f66038c8975747ef9aec855289 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 2 Oct 2009 11:01:21 -0700 Subject: [PATCH 01/12] dmar: support for parsing Remapping Hardware Static Affinity structure Add support for parsing Remapping Hardware Static Affinity (RHSA) structure. This enables identifying the association between remapping hardware units and the corresponding proximity domain. This enables to allocate transalation structures closer to the remapping hardware unit. Signed-off-by: Suresh Siddha Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 24 +++++++++++++++++++++++- include/linux/intel-iommu.h | 1 + 2 files changed, 24 insertions(+), 1 deletion(-) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 22b02c6df854..577956566a0b 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -348,6 +348,26 @@ found: } #endif +static int __init +dmar_parse_one_rhsa(struct acpi_dmar_header *header) +{ + struct acpi_dmar_rhsa *rhsa; + struct dmar_drhd_unit *drhd; + + rhsa = (struct acpi_dmar_rhsa *)header; + for_each_drhd_unit(drhd) + if (drhd->reg_base_addr == rhsa->base_address) { + int node = acpi_map_pxm_to_node(rhsa->proximity_domain); + + if (!node_online(node)) + node = -1; + drhd->iommu->node = node; + return 0; + } + + return -ENODEV; +} + static void __init dmar_table_print_dmar_entry(struct acpi_dmar_header *header) { @@ -467,7 +487,7 @@ parse_dmar_table(void) #endif break; case ACPI_DMAR_HARDWARE_AFFINITY: - /* We don't do anything with RHSA (yet?) */ + ret = dmar_parse_one_rhsa(entry_header); break; default: printk(KERN_WARNING PREFIX @@ -677,6 +697,8 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) iommu->agaw = agaw; iommu->msagaw = msagaw; + iommu->node = -1; + /* the registers might be more than one page */ map_size = max_t(int, ecap_max_iotlb_offset(iommu->ecap), cap_max_fault_reg_offset(iommu->cap)); diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 4f0a72a9740c..9310c699a37d 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -332,6 +332,7 @@ struct intel_iommu { #ifdef CONFIG_INTR_REMAP struct ir_table *ir_table; /* Interrupt remapping info */ #endif + int node; }; static inline void __iommu_flush_cache( From 751cafe3aeceb9ff887c97237f6daaf596c9e547 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 2 Oct 2009 11:01:22 -0700 Subject: [PATCH 02/12] dmar: Allocate queued invalidation structure using numa locality info Allocate queued invalidation descriptor structures using numa locality info. Signed-off-by: Suresh Siddha Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 577956566a0b..63aa52973d4d 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -1040,6 +1040,7 @@ static void __dmar_enable_qi(struct intel_iommu *iommu) int dmar_enable_qi(struct intel_iommu *iommu) { struct q_inval *qi; + struct page *desc_page; if (!ecap_qis(iommu->ecap)) return -ENOENT; @@ -1056,13 +1057,16 @@ int dmar_enable_qi(struct intel_iommu *iommu) qi = iommu->qi; - qi->desc = (void *)(get_zeroed_page(GFP_ATOMIC)); - if (!qi->desc) { + + desc_page = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, 0); + if (!desc_page) { kfree(qi); iommu->qi = 0; return -ENOMEM; } + qi->desc = page_address(desc_page); + qi->desc_status = kmalloc(QI_LENGTH * sizeof(int), GFP_ATOMIC); if (!qi->desc_status) { free_page((unsigned long) qi->desc); From 824cd75bf30cfc52c4b468f3cabf6932fd012654 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 2 Oct 2009 11:01:23 -0700 Subject: [PATCH 03/12] intr_remap: Allocate intr-remapping table using numa locality info Allocate intr-remapping table using numa locality info. On platforms having remapping hardware units span different nodes, this enables optimized intr-remapping table entry access by remapping hardware. Signed-off-by: Suresh Siddha Signed-off-by: David Woodhouse --- drivers/pci/intr_remapping.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/pci/intr_remapping.c b/drivers/pci/intr_remapping.c index 0ed78a764ded..fccf0e2fcba3 100644 --- a/drivers/pci/intr_remapping.c +++ b/drivers/pci/intr_remapping.c @@ -548,7 +548,8 @@ static int setup_intr_remapping(struct intel_iommu *iommu, int mode) if (!iommu->ir_table) return -ENOMEM; - pages = alloc_pages(GFP_ATOMIC | __GFP_ZERO, INTR_REMAP_PAGE_ORDER); + pages = alloc_pages_node(iommu->node, GFP_ATOMIC | __GFP_ZERO, + INTR_REMAP_PAGE_ORDER); if (!pages) { printk(KERN_ERR "failed to allocate pages of order %d\n", From 4c923d4714821cf32ff115bb9c91867dff711972 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Fri, 2 Oct 2009 11:01:24 -0700 Subject: [PATCH 04/12] iommu: Allocate dma-remapping structures using numa locality info Allocate dma-remapping structures using numa locality info. On platforms having remapping hardware units span different nodes, this enables optimized dma-remapping transalation structures access by remapping hardware. Signed-off-by: Suresh Siddha Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 27 +++++++++++++++++++-------- 1 file changed, 19 insertions(+), 8 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index b1e97e682500..c83113646223 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -277,6 +277,7 @@ static int hw_pass_through = 1; struct dmar_domain { int id; /* domain id */ + int nid; /* node id */ unsigned long iommu_bmp; /* bitmap of iommus this domain uses*/ struct list_head devices; /* all devices' list */ @@ -400,15 +401,18 @@ static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep) } -static inline void *alloc_pgtable_page(void) +static inline void *alloc_pgtable_page(int node) { unsigned int flags; - void *vaddr; + struct page *page; + void *vaddr = NULL; /* trying to avoid low memory issues */ flags = current->flags & PF_MEMALLOC; current->flags |= PF_MEMALLOC; - vaddr = (void *)get_zeroed_page(GFP_ATOMIC); + page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0); + if (page) + vaddr = page_address(page); current->flags &= (~PF_MEMALLOC | flags); return vaddr; } @@ -589,7 +593,8 @@ static struct context_entry * device_to_context_entry(struct intel_iommu *iommu, root = &iommu->root_entry[bus]; context = get_context_addr_from_root(root); if (!context) { - context = (struct context_entry *)alloc_pgtable_page(); + context = (struct context_entry *) + alloc_pgtable_page(iommu->node); if (!context) { spin_unlock_irqrestore(&iommu->lock, flags); return NULL; @@ -732,7 +737,7 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, if (!dma_pte_present(pte)) { uint64_t pteval; - tmp_page = alloc_pgtable_page(); + tmp_page = alloc_pgtable_page(domain->nid); if (!tmp_page) return NULL; @@ -868,7 +873,7 @@ static int iommu_alloc_root_entry(struct intel_iommu *iommu) struct root_entry *root; unsigned long flags; - root = (struct root_entry *)alloc_pgtable_page(); + root = (struct root_entry *)alloc_pgtable_page(iommu->node); if (!root) return -ENOMEM; @@ -1263,6 +1268,7 @@ static struct dmar_domain *alloc_domain(void) if (!domain) return NULL; + domain->nid = -1; memset(&domain->iommu_bmp, 0, sizeof(unsigned long)); domain->flags = 0; @@ -1420,9 +1426,10 @@ static int domain_init(struct dmar_domain *domain, int guest_width) domain->iommu_snooping = 0; domain->iommu_count = 1; + domain->nid = iommu->node; /* always allocate the top pgd */ - domain->pgd = (struct dma_pte *)alloc_pgtable_page(); + domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); if (!domain->pgd) return -ENOMEM; __iommu_flush_cache(iommu, domain->pgd, PAGE_SIZE); @@ -1577,6 +1584,8 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment, spin_lock_irqsave(&domain->iommu_lock, flags); if (!test_and_set_bit(iommu->seq_id, &domain->iommu_bmp)) { domain->iommu_count++; + if (domain->iommu_count == 1) + domain->nid = iommu->node; domain_update_iommu_cap(domain); } spin_unlock_irqrestore(&domain->iommu_lock, flags); @@ -3416,6 +3425,7 @@ static struct dmar_domain *iommu_alloc_vm_domain(void) return NULL; domain->id = vm_domid++; + domain->nid = -1; memset(&domain->iommu_bmp, 0, sizeof(unsigned long)); domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE; @@ -3442,9 +3452,10 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) domain->iommu_coherency = 0; domain->iommu_snooping = 0; domain->max_addr = 0; + domain->nid = -1; /* always allocate the top pgd */ - domain->pgd = (struct dma_pte *)alloc_pgtable_page(); + domain->pgd = (struct dma_pte *)alloc_pgtable_page(domain->nid); if (!domain->pgd) return -ENOMEM; domain_flush_cache(domain, domain->pgd, PAGE_SIZE); From aa697079ee66315c4b9747a5eb3e48487fb1b8be Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 7 Oct 2009 12:18:00 +0100 Subject: [PATCH 05/12] dmar: Fix build failure without NUMA, warn on bogus RHSA tables and don't abort Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 63aa52973d4d..0d064d1e840a 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -348,6 +348,7 @@ found: } #endif +#ifdef CONFIG_ACPI_NUMA static int __init dmar_parse_one_rhsa(struct acpi_dmar_header *header) { @@ -355,18 +356,26 @@ dmar_parse_one_rhsa(struct acpi_dmar_header *header) struct dmar_drhd_unit *drhd; rhsa = (struct acpi_dmar_rhsa *)header; - for_each_drhd_unit(drhd) + for_each_drhd_unit(drhd) { if (drhd->reg_base_addr == rhsa->base_address) { int node = acpi_map_pxm_to_node(rhsa->proximity_domain); if (!node_online(node)) node = -1; drhd->iommu->node = node; - return 0; + return 0; + } } + WARN(1, "Your BIOS is broken; RHSA refers to non-existent DMAR unit at %llx\n" + "BIOS vendor: %s; Ver: %s; Product Version: %s\n", + drhd->reg_base_addr, + dmi_get_system_info(DMI_BIOS_VENDOR), + dmi_get_system_info(DMI_BIOS_VERSION), + dmi_get_system_info(DMI_PRODUCT_VERSION)); - return -ENODEV; + return 0; } +#endif static void __init dmar_table_print_dmar_entry(struct acpi_dmar_header *header) @@ -487,7 +496,9 @@ parse_dmar_table(void) #endif break; case ACPI_DMAR_HARDWARE_AFFINITY: +#ifdef CONFIG_ACPI_NUMA ret = dmar_parse_one_rhsa(entry_header); +#endif break; default: printk(KERN_WARNING PREFIX From 2c99220810c1c79322034628b993573b088ff2da Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Wed, 2 Dec 2009 09:17:13 +0000 Subject: [PATCH 06/12] intel-iommu: Detect DMAR in hyperspace at probe time. Many BIOSes will lie to us about the existence of an IOMMU, and claim that there is one at an address which actually returns all 0xFF. We need to detect this early, so that we know we don't have a viable IOMMU and can set up swiotlb before it's too late. Cc: stable@kernel.org Signed-off-by: Chris Wright Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 34 +++++++++++++++++++++++++++++----- 1 file changed, 29 insertions(+), 5 deletions(-) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 525a32487abd..56883fc1c7be 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -632,6 +632,9 @@ int __init check_zero_address(void) } if (entry_header->type == ACPI_DMAR_TYPE_HARDWARE_UNIT) { + void __iomem *addr; + u64 cap, ecap; + drhd = (void *)entry_header; if (!drhd->address) { /* Promote an attitude of violence to a BIOS engineer today */ @@ -640,17 +643,38 @@ int __init check_zero_address(void) dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); -#ifdef CONFIG_DMAR - dmar_disabled = 1; -#endif - return 0; + goto failed; + } + + addr = early_ioremap(drhd->address, VTD_PAGE_SIZE); + if (!addr ) { + printk("IOMMU: can't validate: %llx\n", drhd->address); + goto failed; + } + cap = dmar_readq(addr + DMAR_CAP_REG); + ecap = dmar_readq(addr + DMAR_ECAP_REG); + early_iounmap(addr, VTD_PAGE_SIZE); + if (cap == (uint64_t)-1 && ecap == (uint64_t)-1) { + /* Promote an attitude of violence to a BIOS engineer today */ + WARN(1, "Your BIOS is broken; DMAR reported at address %llx returns all ones!\n" + "BIOS vendor: %s; Ver: %s; Product Version: %s\n", + drhd->address, + dmi_get_system_info(DMI_BIOS_VENDOR), + dmi_get_system_info(DMI_BIOS_VERSION), + dmi_get_system_info(DMI_PRODUCT_VERSION)); + goto failed; } - break; } entry_header = ((void *)entry_header + entry_header->length); } return 1; + +failed: +#ifdef CONFIG_DMAR + dmar_disabled = 1; +#endif + return 0; } void __init detect_intel_iommu(void) From 6ecbf01c7ce4c0f4c3bdfa0e64ac6258328fda6c Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 2 Dec 2009 09:20:27 +0000 Subject: [PATCH 07/12] intel-iommu: Apply BIOS sanity checks for interrupt remapping too. The BIOS errors where an IOMMU is reported either at zero or a bogus address are causing problems even when the IOMMU is disabled -- because interrupt remapping uses the same hardware. Ensure that the checks get applied for the interrupt remapping initialisation too. Cc: stable@kernel.org Signed-off-by: David Woodhouse --- drivers/pci/dmar.c | 33 ++++++++++++++++++++++++++------- 1 file changed, 26 insertions(+), 7 deletions(-) diff --git a/drivers/pci/dmar.c b/drivers/pci/dmar.c index 56883fc1c7be..beeaef84e151 100644 --- a/drivers/pci/dmar.c +++ b/drivers/pci/dmar.c @@ -613,6 +613,8 @@ int __init dmar_table_init(void) return 0; } +static int bios_warned; + int __init check_zero_address(void) { struct acpi_table_dmar *dmar; @@ -643,6 +645,7 @@ int __init check_zero_address(void) dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); + bios_warned = 1; goto failed; } @@ -662,6 +665,7 @@ int __init check_zero_address(void) dmi_get_system_info(DMI_BIOS_VENDOR), dmi_get_system_info(DMI_BIOS_VERSION), dmi_get_system_info(DMI_PRODUCT_VERSION)); + bios_warned = 1; goto failed; } } @@ -722,6 +726,18 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) int agaw = 0; int msagaw = 0; + if (!drhd->reg_base_addr) { + if (!bios_warned) { + WARN(1, "Your BIOS is broken; DMAR reported at address zero!\n" + "BIOS vendor: %s; Ver: %s; Product Version: %s\n", + dmi_get_system_info(DMI_BIOS_VENDOR), + dmi_get_system_info(DMI_BIOS_VERSION), + dmi_get_system_info(DMI_PRODUCT_VERSION)); + bios_warned = 1; + } + return -EINVAL; + } + iommu = kzalloc(sizeof(*iommu), GFP_KERNEL); if (!iommu) return -ENOMEM; @@ -738,13 +754,16 @@ int alloc_iommu(struct dmar_drhd_unit *drhd) iommu->ecap = dmar_readq(iommu->reg + DMAR_ECAP_REG); if (iommu->cap == (uint64_t)-1 && iommu->ecap == (uint64_t)-1) { - /* Promote an attitude of violence to a BIOS engineer today */ - WARN(1, "Your BIOS is broken; DMAR reported at address %llx returns all ones!\n" - "BIOS vendor: %s; Ver: %s; Product Version: %s\n", - drhd->reg_base_addr, - dmi_get_system_info(DMI_BIOS_VENDOR), - dmi_get_system_info(DMI_BIOS_VERSION), - dmi_get_system_info(DMI_PRODUCT_VERSION)); + if (!bios_warned) { + /* Promote an attitude of violence to a BIOS engineer today */ + WARN(1, "Your BIOS is broken; DMAR reported at address %llx returns all ones!\n" + "BIOS vendor: %s; Ver: %s; Product Version: %s\n", + drhd->reg_base_addr, + dmi_get_system_info(DMI_BIOS_VENDOR), + dmi_get_system_info(DMI_BIOS_VERSION), + dmi_get_system_info(DMI_PRODUCT_VERSION)); + bios_warned = 1; + } goto err_unmap; } From 5595b528b49a702c0428c0762bab60999648254c Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 2 Dec 2009 09:21:55 +0000 Subject: [PATCH 08/12] intel-iommu: Check for an RMRR which ends before it starts. Some HP BIOSes report an RMRR region (a region which needs a 1:1 mapping in the IOMMU for a given device) which has an end address lower than its start address. Detect that and warn, rather than triggering the BUG() in dma_pte_clear_range(). Cc: stable@kernel.org Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index cb5cae3e0205..83cabdc118db 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -2000,6 +2000,16 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev, "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", pci_name(pdev), start, end); + if (end < start) { + WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n" + "BIOS vendor: %s; Ver: %s; Product Version: %s\n", + dmi_get_system_info(DMI_BIOS_VENDOR), + dmi_get_system_info(DMI_BIOS_VERSION), + dmi_get_system_info(DMI_PRODUCT_VERSION)); + ret = -EIO; + goto error; + } + if (end >> agaw_to_width(domain->agaw)) { WARN(1, "Your BIOS is broken; RMRR exceeds permitted address width (%d bits)\n" "BIOS vendor: %s; Ver: %s; Product Version: %s\n", From 44cd613c0e4cd93079ea2a93aa06649d8ca0830a Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 2 Dec 2009 10:18:30 +0000 Subject: [PATCH 09/12] intel-iommu: Fix oops with intel_iommu=igfx_off The hotplug notifier will call find_domain() to see if the device in question has been assigned an IOMMU domain. However, this should never be called for devices with a "dummy" domain, such as graphics devices when intel_iommu=igfx_off is set and the corresponding IOMMU isn't even initialised. If you do that, it'll oops as it dereferences the (-1) pointer. The notifier function should check iommu_no_mapping() for the device before doing anything else. Cc: stable@kernel.org Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 83cabdc118db..e3e84cac0e98 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -3247,6 +3247,9 @@ static int device_notifier(struct notifier_block *nb, struct pci_dev *pdev = to_pci_dev(dev); struct dmar_domain *domain; + if (iommu_no_mapping(dev)) + return 0; + domain = find_domain(pdev); if (!domain) return 0; From 1672af1164d3d50ba8908014fd34cc0b58afdc1e Mon Sep 17 00:00:00 2001 From: Chris Wright Date: Wed, 2 Dec 2009 12:06:34 -0800 Subject: [PATCH 10/12] intel-iommu: ignore page table validation in pass through mode We are seeing a bug when booting w/ iommu=pt with current upstream (bisect blames 19943b0e30b05d42e494ae6fef78156ebc8c637e "intel-iommu: Unify hardware and software passthrough support). The issue is specific to this loop during identity map initialization of each device: domain_context_mapping_one(si_domain, ..., CONTEXT_TT_PASS_THROUGH) ... /* Skip top levels of page tables for * iommu which has less agaw than default. */ for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) { pgd = phys_to_virt(dma_pte_addr(pgd)); if (!dma_pte_present(pgd)) { <------ failing here spin_unlock_irqrestore(&iommu->lock, flags); return -ENOMEM; } This box has 2 iommu's in it. The catchall iommu has MGAW == 48, and SAGAW == 4. The other iommu has MGAW == 39, SAGAW == 2. The device that's failing the above pgd test is the only device connected to the non-catchall iommu, which has a smaller address width than the domain default. This test is not necessary since the context is in PT mode and the ASR is ignored. Thanks to Don Dutile for discovering and debugging this one. Cc: stable@kernel.org Signed-off-by: Chris Wright Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index e3e84cac0e98..466079535330 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -1530,12 +1530,15 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment, /* Skip top levels of page tables for * iommu which has less agaw than default. + * Unnecessary for PT mode. */ - for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) { - pgd = phys_to_virt(dma_pte_addr(pgd)); - if (!dma_pte_present(pgd)) { - spin_unlock_irqrestore(&iommu->lock, flags); - return -ENOMEM; + if (translation != CONTEXT_TT_PASS_THROUGH) { + for (agaw = domain->agaw; agaw != iommu->agaw; agaw--) { + pgd = phys_to_virt(dma_pte_addr(pgd)); + if (!dma_pte_present(pgd)) { + spin_unlock_irqrestore(&iommu->lock, flags); + return -ENOMEM; + } } } } From 354bb65e6e0df0aaae0e5b1ea33948d8e0b61418 Mon Sep 17 00:00:00 2001 From: KOSAKI Motohiro Date: Tue, 17 Nov 2009 16:21:09 +0900 Subject: [PATCH 11/12] Revert "Intel IOMMU: Avoid memory allocation failures in dma map api calls" commit eb3fa7cb51 said Intel IOMMU Intel IOMMU driver needs memory during DMA map calls to setup its internal page tables and for other data structures. As we all know that these DMA map calls are mostly called in the interrupt context or with the spinlock held by the upper level drivers(network/storage drivers), so in order to avoid any memory allocation failure due to low memory issues, this patch makes memory allocation by temporarily setting PF_MEMALLOC flags for the current task before making memory allocation calls. We evaluated mempools as a backup when kmem_cache_alloc() fails and found that mempools are really not useful here because 1) We don't know for sure how much to reserve in advance 2) And mempools are not useful for GFP_ATOMIC case (as we call memory alloc functions with GFP_ATOMIC) (akpm: point 2 is wrong...) The above description doesn't justify to waste system emergency memory at all. Non MM subsystem must not use PF_MEMALLOC. Memory reclaim need few memory, anyone must not prevent it. Otherwise the system cause mysterious hang-up and/or OOM Killer invokation. Plus, akpm already pointed out what we should do. Then, this patch revert it. Cc: Keshavamurthy Anil S Signed-off-by: KOSAKI Motohiro Signed-off-by: David Woodhouse --- drivers/pci/intel-iommu.c | 25 +++---------------------- 1 file changed, 3 insertions(+), 22 deletions(-) diff --git a/drivers/pci/intel-iommu.c b/drivers/pci/intel-iommu.c index 466079535330..4e1dd40f18e3 100644 --- a/drivers/pci/intel-iommu.c +++ b/drivers/pci/intel-iommu.c @@ -387,33 +387,14 @@ static struct kmem_cache *iommu_domain_cache; static struct kmem_cache *iommu_devinfo_cache; static struct kmem_cache *iommu_iova_cache; -static inline void *iommu_kmem_cache_alloc(struct kmem_cache *cachep) -{ - unsigned int flags; - void *vaddr; - - /* trying to avoid low memory issues */ - flags = current->flags & PF_MEMALLOC; - current->flags |= PF_MEMALLOC; - vaddr = kmem_cache_alloc(cachep, GFP_ATOMIC); - current->flags &= (~PF_MEMALLOC | flags); - return vaddr; -} - - static inline void *alloc_pgtable_page(int node) { - unsigned int flags; struct page *page; void *vaddr = NULL; - /* trying to avoid low memory issues */ - flags = current->flags & PF_MEMALLOC; - current->flags |= PF_MEMALLOC; page = alloc_pages_node(node, GFP_ATOMIC | __GFP_ZERO, 0); if (page) vaddr = page_address(page); - current->flags &= (~PF_MEMALLOC | flags); return vaddr; } @@ -424,7 +405,7 @@ static inline void free_pgtable_page(void *vaddr) static inline void *alloc_domain_mem(void) { - return iommu_kmem_cache_alloc(iommu_domain_cache); + return kmem_cache_alloc(iommu_domain_cache, GFP_ATOMIC); } static void free_domain_mem(void *vaddr) @@ -434,7 +415,7 @@ static void free_domain_mem(void *vaddr) static inline void * alloc_devinfo_mem(void) { - return iommu_kmem_cache_alloc(iommu_devinfo_cache); + return kmem_cache_alloc(iommu_devinfo_cache, GFP_ATOMIC); } static inline void free_devinfo_mem(void *vaddr) @@ -444,7 +425,7 @@ static inline void free_devinfo_mem(void *vaddr) struct iova *alloc_iova_mem(void) { - return iommu_kmem_cache_alloc(iommu_iova_cache); + return kmem_cache_alloc(iommu_iova_cache, GFP_ATOMIC); } void free_iova_mem(struct iova *iova) From cd7bcf32d42b15891620b3f1387a00178b54291a Mon Sep 17 00:00:00 2001 From: "Luck, Tony" Date: Mon, 14 Dec 2009 20:00:36 +0000 Subject: [PATCH 12/12] implement early_io{re,un}map for ia64 Needed for commit 2c992208 ("intel-iommu: Detect DMAR in hyperspace at probe time.) to build on IA64. Signed-off-by: Tony Luck Signed-off-by: David Woodhouse --- arch/ia64/include/asm/io.h | 2 ++ arch/ia64/mm/ioremap.c | 11 +++++++++++ 2 files changed, 13 insertions(+) diff --git a/arch/ia64/include/asm/io.h b/arch/ia64/include/asm/io.h index 0d9d16e2d949..cc8335eb3110 100644 --- a/arch/ia64/include/asm/io.h +++ b/arch/ia64/include/asm/io.h @@ -424,6 +424,8 @@ __writeq (unsigned long val, volatile void __iomem *addr) extern void __iomem * ioremap(unsigned long offset, unsigned long size); extern void __iomem * ioremap_nocache (unsigned long offset, unsigned long size); extern void iounmap (volatile void __iomem *addr); +extern void __iomem * early_ioremap (unsigned long phys_addr, unsigned long size); +extern void early_iounmap (volatile void __iomem *addr, unsigned long size); /* * String version of IO memory access ops: diff --git a/arch/ia64/mm/ioremap.c b/arch/ia64/mm/ioremap.c index 2a140627dfd6..3dccdd8eb275 100644 --- a/arch/ia64/mm/ioremap.c +++ b/arch/ia64/mm/ioremap.c @@ -21,6 +21,12 @@ __ioremap (unsigned long phys_addr) return (void __iomem *) (__IA64_UNCACHED_OFFSET | phys_addr); } +void __iomem * +early_ioremap (unsigned long phys_addr, unsigned long size) +{ + return __ioremap(phys_addr); +} + void __iomem * ioremap (unsigned long phys_addr, unsigned long size) { @@ -101,6 +107,11 @@ ioremap_nocache (unsigned long phys_addr, unsigned long size) } EXPORT_SYMBOL(ioremap_nocache); +void +early_iounmap (volatile void __iomem *addr, unsigned long size) +{ +} + void iounmap (volatile void __iomem *addr) {