From 636e97b0fb0a063dc89773b13064647f5da632cb Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Thu, 30 Jan 2014 18:18:08 +0000 Subject: [PATCH 01/75] iommu/arm-smmu: set MAX_MASTER_STREAMIDS to MAX_PHANDLE_ARGS The DT parsing code that determines stream IDs uses of_parse_phandle_with_args and thus MAX_MASTER_STREAMIDS is always bound by MAX_PHANDLE_ARGS. Signed-off-by: Andreas Herrmann Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 1d9ab39af29f..a3ce60887efa 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -48,7 +48,7 @@ #include /* Maximum number of stream IDs assigned to a single device */ -#define MAX_MASTER_STREAMIDS 8 +#define MAX_MASTER_STREAMIDS MAX_PHANDLE_ARGS /* Maximum number of context banks per SMMU */ #define ARM_SMMU_MAX_CBS 128 From 3a5df8ff35a28bfc0a3662a5906af2c98d4807d8 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Thu, 30 Jan 2014 18:18:04 +0000 Subject: [PATCH 02/75] iommu/arm-smmu: support buggy implementations with secure cfg accesses In such a case we have to use secure aliases of some non-secure registers. This handling is switched on by DT property "calxeda,smmu-secure-config-access" for an SMMU node. Signed-off-by: Andreas Herrmann [will: merged with driver option handling patch] Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 58 +++++++++++++++++++++++++++++++++------- 1 file changed, 48 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index a3ce60887efa..e04fdcb4b9ba 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -60,6 +60,16 @@ #define ARM_SMMU_GR0(smmu) ((smmu)->base) #define ARM_SMMU_GR1(smmu) ((smmu)->base + (smmu)->pagesize) +/* + * SMMU global address space with conditional offset to access secure + * aliases of non-secure registers (e.g. nsCR0: 0x400, nsGFSR: 0x448, + * nsGFSYNR0: 0x450) + */ +#define ARM_SMMU_GR0_NS(smmu) \ + ((smmu)->base + \ + ((smmu->options & ARM_SMMU_OPT_SECURE_CFG_ACCESS) \ + ? 0x400 : 0)) + /* Page table bits */ #define ARM_SMMU_PTE_XN (((pteval_t)3) << 53) #define ARM_SMMU_PTE_CONT (((pteval_t)1) << 52) @@ -351,6 +361,9 @@ struct arm_smmu_device { #define ARM_SMMU_FEAT_TRANS_S2 (1 << 3) #define ARM_SMMU_FEAT_TRANS_NESTED (1 << 4) u32 features; + +#define ARM_SMMU_OPT_SECURE_CFG_ACCESS (1 << 0) + u32 options; int version; u32 num_context_banks; @@ -401,6 +414,29 @@ struct arm_smmu_domain { static DEFINE_SPINLOCK(arm_smmu_devices_lock); static LIST_HEAD(arm_smmu_devices); +struct arm_smmu_option_prop { + u32 opt; + const char *prop; +}; + +static struct arm_smmu_option_prop arm_smmu_options [] = { + { ARM_SMMU_OPT_SECURE_CFG_ACCESS, "calxeda,smmu-secure-config-access" }, + { 0, NULL}, +}; + +static void parse_driver_options(struct arm_smmu_device *smmu) +{ + int i = 0; + do { + if (of_property_read_bool(smmu->dev->of_node, + arm_smmu_options[i].prop)) { + smmu->options |= arm_smmu_options[i].opt; + dev_notice(smmu->dev, "option %s\n", + arm_smmu_options[i].prop); + } + } while (arm_smmu_options[++i].opt); +} + static struct arm_smmu_master *find_smmu_master(struct arm_smmu_device *smmu, struct device_node *dev_node) { @@ -614,16 +650,16 @@ static irqreturn_t arm_smmu_global_fault(int irq, void *dev) { u32 gfsr, gfsynr0, gfsynr1, gfsynr2; struct arm_smmu_device *smmu = dev; - void __iomem *gr0_base = ARM_SMMU_GR0(smmu); + void __iomem *gr0_base = ARM_SMMU_GR0_NS(smmu); gfsr = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR); - if (!gfsr) - return IRQ_NONE; - gfsynr0 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR0); gfsynr1 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR1); gfsynr2 = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSYNR2); + if (!gfsr) + return IRQ_NONE; + dev_err_ratelimited(smmu->dev, "Unexpected global fault, this could be serious\n"); dev_err_ratelimited(smmu->dev, @@ -1597,9 +1633,9 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu) int i = 0; u32 reg; - /* Clear Global FSR */ - reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sGFSR); - writel(reg, gr0_base + ARM_SMMU_GR0_sGFSR); + /* clear global FSR */ + reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR); + writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sGFSR); /* Mark all SMRn as invalid and all S2CRn as bypass */ for (i = 0; i < smmu->num_mapping_groups; ++i) { @@ -1619,7 +1655,7 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu) writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLH); writel_relaxed(0, gr0_base + ARM_SMMU_GR0_TLBIALLNSNH); - reg = readl_relaxed(gr0_base + ARM_SMMU_GR0_sCR0); + reg = readl_relaxed(ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0); /* Enable fault reporting */ reg |= (sCR0_GFRE | sCR0_GFIE | sCR0_GCFGFRE | sCR0_GCFGFIE); @@ -1638,7 +1674,7 @@ static void arm_smmu_device_reset(struct arm_smmu_device *smmu) /* Push the button */ arm_smmu_tlb_sync(smmu); - writel_relaxed(reg, gr0_base + ARM_SMMU_GR0_sCR0); + writel(reg, ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0); } static int arm_smmu_id_size_to_bits(int size) @@ -1885,6 +1921,8 @@ static int arm_smmu_device_dt_probe(struct platform_device *pdev) if (err) goto out_put_parent; + parse_driver_options(smmu); + if (smmu->version > 1 && smmu->num_context_banks != smmu->num_context_irqs) { dev_err(dev, @@ -1969,7 +2007,7 @@ static int arm_smmu_device_remove(struct platform_device *pdev) free_irq(smmu->irqs[i], smmu); /* Turn the thing off */ - writel_relaxed(sCR0_CLIENTPD, ARM_SMMU_GR0(smmu) + ARM_SMMU_GR0_sCR0); + writel(sCR0_CLIENTPD,ARM_SMMU_GR0_NS(smmu) + ARM_SMMU_GR0_sCR0); return 0; } From 0c9d70d72453036195682e64efce77872e9f268b Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Thu, 30 Jan 2014 20:17:52 +0000 Subject: [PATCH 03/75] documentation/iommu: update description of ARM System MMU binding This patch adds descriptions for new properties of the device tree binding for the ARM SMMU architecture. These properties control arm-smmu driver options. Cc: Grant Likely Acked-by: Rob Herring Signed-off-by: Andreas Herrmann [will: removed device isolation property, as this has been dropped and fixed up spacing in documentation] Signed-off-by: Will Deacon --- Documentation/devicetree/bindings/iommu/arm,smmu.txt | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/Documentation/devicetree/bindings/iommu/arm,smmu.txt b/Documentation/devicetree/bindings/iommu/arm,smmu.txt index e34c6cdd8ba8..f284b99402bc 100644 --- a/Documentation/devicetree/bindings/iommu/arm,smmu.txt +++ b/Documentation/devicetree/bindings/iommu/arm,smmu.txt @@ -48,6 +48,12 @@ conditions. from the mmu-masters towards memory) node for this SMMU. +- calxeda,smmu-secure-config-access : Enable proper handling of buggy + implementations that always use secure access to + SMMU configuration registers. In this case non-secure + aliases of secure registers have to be used during + SMMU configuration. + Example: smmu { From b410aed93288d0bd7650c4d17fd0f306b5082d6f Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 20 Feb 2014 16:31:06 +0000 Subject: [PATCH 04/75] iommu/arm-smmu: clean up use of `flags' in page table handling code Commit 972157cac528 ("arm/smmu: Use irqsafe spinlock for domain lock") fixed our page table locks to be the irq{save,restore} variants, since the DMA mapping API can be invoked from interrupt context. This patch cleans up our use of the flags variable so we can distinguish between IRQ flags (now `flags') and pte protection bits (now `prot'). Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 38 +++++++++++++++++++------------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index e04fdcb4b9ba..83297fe0878d 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1254,7 +1254,7 @@ static bool arm_smmu_pte_is_contiguous_range(unsigned long addr, static int arm_smmu_alloc_init_pte(struct arm_smmu_device *smmu, pmd_t *pmd, unsigned long addr, unsigned long end, - unsigned long pfn, int flags, int stage) + unsigned long pfn, int prot, int stage) { pte_t *pte, *start; pteval_t pteval = ARM_SMMU_PTE_PAGE | ARM_SMMU_PTE_AF | ARM_SMMU_PTE_XN; @@ -1276,28 +1276,28 @@ static int arm_smmu_alloc_init_pte(struct arm_smmu_device *smmu, pmd_t *pmd, if (stage == 1) { pteval |= ARM_SMMU_PTE_AP_UNPRIV | ARM_SMMU_PTE_nG; - if (!(flags & IOMMU_WRITE) && (flags & IOMMU_READ)) + if (!(prot & IOMMU_WRITE) && (prot & IOMMU_READ)) pteval |= ARM_SMMU_PTE_AP_RDONLY; - if (flags & IOMMU_CACHE) + if (prot & IOMMU_CACHE) pteval |= (MAIR_ATTR_IDX_CACHE << ARM_SMMU_PTE_ATTRINDX_SHIFT); } else { pteval |= ARM_SMMU_PTE_HAP_FAULT; - if (flags & IOMMU_READ) + if (prot & IOMMU_READ) pteval |= ARM_SMMU_PTE_HAP_READ; - if (flags & IOMMU_WRITE) + if (prot & IOMMU_WRITE) pteval |= ARM_SMMU_PTE_HAP_WRITE; - if (flags & IOMMU_CACHE) + if (prot & IOMMU_CACHE) pteval |= ARM_SMMU_PTE_MEMATTR_OIWB; else pteval |= ARM_SMMU_PTE_MEMATTR_NC; } /* If no access, create a faulting entry to avoid TLB fills */ - if (flags & IOMMU_EXEC) + if (prot & IOMMU_EXEC) pteval &= ~ARM_SMMU_PTE_XN; - else if (!(flags & (IOMMU_READ | IOMMU_WRITE))) + else if (!(prot & (IOMMU_READ | IOMMU_WRITE))) pteval &= ~ARM_SMMU_PTE_PAGE; pteval |= ARM_SMMU_PTE_SH_IS; @@ -1359,7 +1359,7 @@ static int arm_smmu_alloc_init_pte(struct arm_smmu_device *smmu, pmd_t *pmd, static int arm_smmu_alloc_init_pmd(struct arm_smmu_device *smmu, pud_t *pud, unsigned long addr, unsigned long end, - phys_addr_t phys, int flags, int stage) + phys_addr_t phys, int prot, int stage) { int ret; pmd_t *pmd; @@ -1383,7 +1383,7 @@ static int arm_smmu_alloc_init_pmd(struct arm_smmu_device *smmu, pud_t *pud, do { next = pmd_addr_end(addr, end); ret = arm_smmu_alloc_init_pte(smmu, pmd, addr, end, pfn, - flags, stage); + prot, stage); phys += next - addr; } while (pmd++, addr = next, addr < end); @@ -1392,7 +1392,7 @@ static int arm_smmu_alloc_init_pmd(struct arm_smmu_device *smmu, pud_t *pud, static int arm_smmu_alloc_init_pud(struct arm_smmu_device *smmu, pgd_t *pgd, unsigned long addr, unsigned long end, - phys_addr_t phys, int flags, int stage) + phys_addr_t phys, int prot, int stage) { int ret = 0; pud_t *pud; @@ -1416,7 +1416,7 @@ static int arm_smmu_alloc_init_pud(struct arm_smmu_device *smmu, pgd_t *pgd, do { next = pud_addr_end(addr, end); ret = arm_smmu_alloc_init_pmd(smmu, pud, addr, next, phys, - flags, stage); + prot, stage); phys += next - addr; } while (pud++, addr = next, addr < end); @@ -1425,7 +1425,7 @@ static int arm_smmu_alloc_init_pud(struct arm_smmu_device *smmu, pgd_t *pgd, static int arm_smmu_handle_mapping(struct arm_smmu_domain *smmu_domain, unsigned long iova, phys_addr_t paddr, - size_t size, int flags) + size_t size, int prot) { int ret, stage; unsigned long end; @@ -1433,7 +1433,7 @@ static int arm_smmu_handle_mapping(struct arm_smmu_domain *smmu_domain, struct arm_smmu_cfg *root_cfg = &smmu_domain->root_cfg; pgd_t *pgd = root_cfg->pgd; struct arm_smmu_device *smmu = root_cfg->smmu; - unsigned long irqflags; + unsigned long flags; if (root_cfg->cbar == CBAR_TYPE_S2_TRANS) { stage = 2; @@ -1456,14 +1456,14 @@ static int arm_smmu_handle_mapping(struct arm_smmu_domain *smmu_domain, if (paddr & ~output_mask) return -ERANGE; - spin_lock_irqsave(&smmu_domain->lock, irqflags); + spin_lock_irqsave(&smmu_domain->lock, flags); pgd += pgd_index(iova); end = iova + size; do { unsigned long next = pgd_addr_end(iova, end); ret = arm_smmu_alloc_init_pud(smmu, pgd, iova, next, paddr, - flags, stage); + prot, stage); if (ret) goto out_unlock; @@ -1472,13 +1472,13 @@ static int arm_smmu_handle_mapping(struct arm_smmu_domain *smmu_domain, } while (pgd++, iova != end); out_unlock: - spin_unlock_irqrestore(&smmu_domain->lock, irqflags); + spin_unlock_irqrestore(&smmu_domain->lock, flags); return ret; } static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova, - phys_addr_t paddr, size_t size, int flags) + phys_addr_t paddr, size_t size, int prot) { struct arm_smmu_domain *smmu_domain = domain->priv; @@ -1489,7 +1489,7 @@ static int arm_smmu_map(struct iommu_domain *domain, unsigned long iova, if ((phys_addr_t)iova & ~smmu_domain->output_mask) return -ERANGE; - return arm_smmu_handle_mapping(smmu_domain, iova, paddr, size, flags); + return arm_smmu_handle_mapping(smmu_domain, iova, paddr, size, prot); } static size_t arm_smmu_unmap(struct iommu_domain *domain, unsigned long iova, From 3aa80ea4c90d46ffbe200d05b9ceb997001b36df Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 5 Feb 2014 23:35:47 +0000 Subject: [PATCH 05/75] iommu/arm-smmu: provide option to dsb macro when publishing tables On coherent systems, publishing new page tables to the SMMU walker is achieved with a dsb instruction. In fact, this can be a dsb(ishst) which also provides the mandatory barrier option for arm64. Acked-by: Catalin Marinas Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 83297fe0878d..1da5b41afc31 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -678,7 +678,7 @@ static void arm_smmu_flush_pgtable(struct arm_smmu_device *smmu, void *addr, /* Ensure new page tables are visible to the hardware walker */ if (smmu->features & ARM_SMMU_FEAT_COHERENT_WALK) { - dsb(); + dsb(ishst); } else { /* * If the SMMU can't walk tables in the CPU caches, treat them From 34fb4b37b7b6da7dc34797d1abf234dd30b091d8 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Wed, 26 Feb 2014 11:14:37 +0000 Subject: [PATCH 06/75] iommu/arm-smmu: fix incorrect comment regarding TLB invalidation Commit 1463fe44fd0f ("iommu/arm-smmu: Don't use VMIDs for stage-1 translations") moved our TLB invalidation from context creation time to context destruction time, but forgot to update an associated comment. This patch fixes the broken comment. Signed-off-by: Will Deacon --- drivers/iommu/arm-smmu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/arm-smmu.c b/drivers/iommu/arm-smmu.c index 1da5b41afc31..8b89e33a89fe 100644 --- a/drivers/iommu/arm-smmu.c +++ b/drivers/iommu/arm-smmu.c @@ -1026,9 +1026,8 @@ static void arm_smmu_free_pgtables(struct arm_smmu_domain *smmu_domain) /* * Recursively free the page tables for this domain. We don't - * care about speculative TLB filling, because the TLB will be - * nuked next time this context bank is re-allocated and no devices - * currently map to these tables. + * care about speculative TLB filling because the tables should + * not be active in any context bank at this point (SCTLR.M is 0). */ pgd = pgd_base; for (i = 0; i < PTRS_PER_PGD; ++i) { From e8d2d82d4a73f37b3270e4fd19ba83e48b589656 Mon Sep 17 00:00:00 2001 From: Jay Cornwall Date: Wed, 26 Feb 2014 15:49:31 -0600 Subject: [PATCH 07/75] iommu/amd: Fix PASID format in INVALIDATE_IOTLB_PAGES command This patch corrects the PASID format in the INVALIDATE_IOTLB_PAGES command, which was caused by incorrect information in the AMD IOMMU Architectural Specification v2.01 document. Incorrect format: cmd->data[0][16:23] = PASID[7:0] cmd->data[1][16:27] = PASID[19:8] Correct format: cmd->data[0][16:23] = PASID[15:8] cmd->data[1][16:23] = PASID[7:0] However, this does not affect the IOMMUv2 hardware implementation, and has been corrected since version 2.02 of the specification (available through AMD NDA). Signed-off-by: Jay Cornwall Reviewed-by: Suravee Suthikulpanit Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index faf0da4bb3a2..1dd9f818fc84 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -982,10 +982,10 @@ static void build_inv_iotlb_pasid(struct iommu_cmd *cmd, u16 devid, int pasid, address &= ~(0xfffULL); cmd->data[0] = devid; - cmd->data[0] |= (pasid & 0xff) << 16; + cmd->data[0] |= ((pasid >> 8) & 0xff) << 16; cmd->data[0] |= (qdep & 0xff) << 24; cmd->data[1] = devid; - cmd->data[1] |= ((pasid >> 8) & 0xfff) << 16; + cmd->data[1] |= (pasid & 0xff) << 16; cmd->data[2] = lower_32_bits(address); cmd->data[2] |= CMD_INV_IOMMU_PAGES_GN_MASK; cmd->data[3] = upper_32_bits(address); From b83544390797ce488ccc0f9e40aa2f96661f842b Mon Sep 17 00:00:00 2001 From: Paul Bolle Date: Sat, 8 Feb 2014 22:21:54 +0100 Subject: [PATCH 08/75] iommu/shmobile: Depend on ARCH_SHMOBILE Commit 78a2e12f51d9 ("iommu: shmobile: Enable driver compilation with COMPILE_TEST") added an optional dependency on SH_MOBILE. But that Kconfig symbol doesn't exist. It seems ARCH_SHMOBILE was intended. Use that. Signed-off-by: Paul Bolle Acked-by: Laurent Pinchart Signed-off-by: Joerg Roedel --- drivers/iommu/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/Kconfig b/drivers/iommu/Kconfig index 79bbc21c1d01..df56e4c74a7e 100644 --- a/drivers/iommu/Kconfig +++ b/drivers/iommu/Kconfig @@ -207,7 +207,7 @@ config SHMOBILE_IOMMU bool "IOMMU for Renesas IPMMU/IPMMUI" default n depends on ARM - depends on SH_MOBILE || COMPILE_TEST + depends on ARCH_SHMOBILE || COMPILE_TEST select IOMMU_API select ARM_DMA_USE_IOMMU select SHMOBILE_IPMMU From f129b3dfb5517c91295da9fe0d2e584d8da25518 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Fri, 28 Feb 2014 14:42:32 -0600 Subject: [PATCH 09/75] iommu/omap: Convert to devm_* interfaces Use the various devm_ interfaces to simplify the cleanup in probe and remove functions. Signed-off-by: Florian Vaussard Signed-off-by: Suman Anna Acked-by: Laurent Pinchart Signed-off-by: Joerg Roedel --- drivers/iommu/omap-iommu.c | 52 ++++++++------------------------------ 1 file changed, 10 insertions(+), 42 deletions(-) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index bcd78a720630..fff2ffdc26f4 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -941,7 +941,7 @@ static int omap_iommu_probe(struct platform_device *pdev) struct resource *res; struct iommu_platform_data *pdata = pdev->dev.platform_data; - obj = kzalloc(sizeof(*obj) + MMU_REG_SIZE, GFP_KERNEL); + obj = devm_kzalloc(&pdev->dev, sizeof(*obj) + MMU_REG_SIZE, GFP_KERNEL); if (!obj) return -ENOMEM; @@ -958,33 +958,18 @@ static int omap_iommu_probe(struct platform_device *pdev) INIT_LIST_HEAD(&obj->mmap); res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) { - err = -ENODEV; - goto err_mem; - } - - res = request_mem_region(res->start, resource_size(res), - dev_name(&pdev->dev)); - if (!res) { - err = -EIO; - goto err_mem; - } - - obj->regbase = ioremap(res->start, resource_size(res)); - if (!obj->regbase) { - err = -ENOMEM; - goto err_ioremap; - } + obj->regbase = devm_ioremap_resource(obj->dev, res); + if (IS_ERR(obj->regbase)) + return PTR_ERR(obj->regbase); irq = platform_get_irq(pdev, 0); - if (irq < 0) { - err = -ENODEV; - goto err_irq; - } - err = request_irq(irq, iommu_fault_handler, IRQF_SHARED, - dev_name(&pdev->dev), obj); + if (irq < 0) + return -ENODEV; + + err = devm_request_irq(obj->dev, irq, iommu_fault_handler, IRQF_SHARED, + dev_name(obj->dev), obj); if (err < 0) - goto err_irq; + return err; platform_set_drvdata(pdev, obj); pm_runtime_irq_safe(obj->dev); @@ -992,34 +977,17 @@ static int omap_iommu_probe(struct platform_device *pdev) dev_info(&pdev->dev, "%s registered\n", obj->name); return 0; - -err_irq: - iounmap(obj->regbase); -err_ioremap: - release_mem_region(res->start, resource_size(res)); -err_mem: - kfree(obj); - return err; } static int omap_iommu_remove(struct platform_device *pdev) { - int irq; - struct resource *res; struct omap_iommu *obj = platform_get_drvdata(pdev); iopgtable_clear_entry_all(obj); - irq = platform_get_irq(pdev, 0); - free_irq(irq, obj); - res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - release_mem_region(res->start, resource_size(res)); - iounmap(obj->regbase); - pm_runtime_disable(obj->dev); dev_info(&pdev->dev, "%s removed\n", obj->name); - kfree(obj); return 0; } From 7ee08b9ef2e942a5477f02a71947b933eb4101d2 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Fri, 28 Feb 2014 14:42:33 -0600 Subject: [PATCH 10/75] iommu/omap: Fix error return paths in omap_iommu_attach() There are couple of issues with the error return paths in omap_iommu_attach(): 1. omap_iommu_attach() returns NULL or ERR_PTR in case of error, but omap_iommu_attach_dev() only checks for IS_ERR. Thus a NULL return value (in case driver_find_device fails) will cause the kernel to panic when omap_iommu_attach_dev() dereferences the pointer. 2. A try_module_get() failure returns a valid success value as returned from iommu_enable(). Both the above issues have been fixed up to return the proper ERR_PTR. Signed-off-by: Florian Vaussard Signed-off-by: Suman Anna Signed-off-by: Joerg Roedel --- drivers/iommu/omap-iommu.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index fff2ffdc26f4..647e4ba0df95 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -863,7 +863,7 @@ static int device_match_by_alias(struct device *dev, void *data) **/ static struct omap_iommu *omap_iommu_attach(const char *name, u32 *iopgd) { - int err = -ENOMEM; + int err; struct device *dev; struct omap_iommu *obj; @@ -871,7 +871,7 @@ static struct omap_iommu *omap_iommu_attach(const char *name, u32 *iopgd) (void *)name, device_match_by_alias); if (!dev) - return NULL; + return ERR_PTR(-ENODEV); obj = to_iommu(dev); @@ -890,8 +890,10 @@ static struct omap_iommu *omap_iommu_attach(const char *name, u32 *iopgd) goto err_enable; flush_iotlb_all(obj); - if (!try_module_get(obj->owner)) + if (!try_module_get(obj->owner)) { + err = -ENODEV; goto err_module; + } spin_unlock(&obj->iommu_lock); From 90e569c4ca727b59ba8f82a9cd5e2f43462bbc3b Mon Sep 17 00:00:00 2001 From: Florian Vaussard Date: Fri, 28 Feb 2014 14:42:34 -0600 Subject: [PATCH 11/75] iommu/omap: Allow enable/disable even without pdata When booting with a devicetree, no platform data is provided. Do not prematurely exit iommu_enable() and iommu_disable() in such a case. Note: As OMAP do not yet has a proper reset controller driver, IOMMUs requiring a reset signal should use pdata-quirks as a transitional solution. Signed-off-by: Florian Vaussard Signed-off-by: Suman Anna Acked-by: Laurent Pinchart Signed-off-by: Joerg Roedel --- drivers/iommu/omap-iommu.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 647e4ba0df95..217952b993e1 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -146,13 +146,10 @@ static int iommu_enable(struct omap_iommu *obj) struct platform_device *pdev = to_platform_device(obj->dev); struct iommu_platform_data *pdata = pdev->dev.platform_data; - if (!pdata) - return -EINVAL; - if (!arch_iommu) return -ENODEV; - if (pdata->deassert_reset) { + if (pdata && pdata->deassert_reset) { err = pdata->deassert_reset(pdev, pdata->reset_name); if (err) { dev_err(obj->dev, "deassert_reset failed: %d\n", err); @@ -172,14 +169,11 @@ static void iommu_disable(struct omap_iommu *obj) struct platform_device *pdev = to_platform_device(obj->dev); struct iommu_platform_data *pdata = pdev->dev.platform_data; - if (!pdata) - return; - arch_iommu->disable(obj); pm_runtime_put_sync(obj->dev); - if (pdata->assert_reset) + if (pdata && pdata->assert_reset) pdata->assert_reset(pdev, pdata->reset_name); } From a8689f65de5a8c3e47bf29c2550dfebe0744e077 Mon Sep 17 00:00:00 2001 From: Florian Vaussard Date: Fri, 28 Feb 2014 14:42:35 -0600 Subject: [PATCH 12/75] Documentation: dt: add OMAP iommu bindings This patch adds the iommu bindings for all OMAP2+ SoCs. Apart from the standard bindings used by OMAP peripherals, this patch uses a 'dma-window' (already used by Tegra SMMU) and adds two OMAP custom bindings - 'ti,#tlb-entries' and 'ti,iommu-bus-err-back'. Signed-off-by: Florian Vaussard [s-anna@ti.com: split bindings document, add dra7 and bus error back] Signed-off-by: Suman Anna Signed-off-by: Joerg Roedel --- .../bindings/iommu/ti,omap-iommu.txt | 26 +++++++++++++++++++ 1 file changed, 26 insertions(+) create mode 100644 Documentation/devicetree/bindings/iommu/ti,omap-iommu.txt diff --git a/Documentation/devicetree/bindings/iommu/ti,omap-iommu.txt b/Documentation/devicetree/bindings/iommu/ti,omap-iommu.txt new file mode 100644 index 000000000000..42531dc387aa --- /dev/null +++ b/Documentation/devicetree/bindings/iommu/ti,omap-iommu.txt @@ -0,0 +1,26 @@ +OMAP2+ IOMMU + +Required properties: +- compatible : Should be one of, + "ti,omap2-iommu" for OMAP2/OMAP3 IOMMU instances + "ti,omap4-iommu" for OMAP4/OMAP5 IOMMU instances + "ti,dra7-iommu" for DRA7xx IOMMU instances +- ti,hwmods : Name of the hwmod associated with the IOMMU instance +- reg : Address space for the configuration registers +- interrupts : Interrupt specifier for the IOMMU instance + +Optional properties: +- ti,#tlb-entries : Number of entries in the translation look-aside buffer. + Should be either 8 or 32 (default: 32) +- ti,iommu-bus-err-back : Indicates the IOMMU instance supports throwing + back a bus error response on MMU faults. + +Example: + /* OMAP3 ISP MMU */ + mmu_isp: mmu@480bd400 { + compatible = "ti,omap2-iommu"; + reg = <0x480bd400 0x80>; + interrupts = <24>; + ti,hwmods = "mmu_isp"; + ti,#tlb-entries = <8>; + }; From 3c92748df9941ff8f5be4655a04812ff4d0c7eeb Mon Sep 17 00:00:00 2001 From: Florian Vaussard Date: Fri, 28 Feb 2014 14:42:36 -0600 Subject: [PATCH 13/75] iommu/omap: Add devicetree support As OMAP2+ is moving to a full DT boot for all SoC families, commit 7ce93f3 "ARM: OMAP2+: Fix more missing data for omap3.dtsi file" adds basic DT bits for OMAP3. But the driver is not yet converted, so this will not work and driver will not be probed. Convert it! The legacy boot mode is still supported until OMAP3 is converted to DT-boot only. Signed-off-by: Florian Vaussard [s-anna@ti.com: dev_name adaptation and improved error checking] Signed-off-by: Suman Anna [tony@atomide.com: Ack for arch/arm/*omap* parts] Acked-by: Tony Lindgren Signed-off-by: Joerg Roedel --- arch/arm/mach-omap2/omap-iommu.c | 5 ++++ drivers/iommu/omap-iommu.c | 42 +++++++++++++++++++++++++++++--- 2 files changed, 43 insertions(+), 4 deletions(-) diff --git a/arch/arm/mach-omap2/omap-iommu.c b/arch/arm/mach-omap2/omap-iommu.c index f6daae821ebb..f1fab5684a24 100644 --- a/arch/arm/mach-omap2/omap-iommu.c +++ b/arch/arm/mach-omap2/omap-iommu.c @@ -10,6 +10,7 @@ * published by the Free Software Foundation. */ +#include #include #include #include @@ -58,6 +59,10 @@ static int __init omap_iommu_dev_init(struct omap_hwmod *oh, void *unused) static int __init omap_iommu_init(void) { + /* If dtb is there, the devices will be created dynamically */ + if (of_have_populated_dt()) + return -ENODEV; + return omap_hwmod_for_each_by_class("mmu", omap_iommu_dev_init, NULL); } /* must be ready before omap3isp is probed */ diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 217952b993e1..eb73ef2b6fa9 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -23,6 +23,9 @@ #include #include #include +#include +#include +#include #include @@ -936,17 +939,39 @@ static int omap_iommu_probe(struct platform_device *pdev) struct omap_iommu *obj; struct resource *res; struct iommu_platform_data *pdata = pdev->dev.platform_data; + struct device_node *of = pdev->dev.of_node; obj = devm_kzalloc(&pdev->dev, sizeof(*obj) + MMU_REG_SIZE, GFP_KERNEL); if (!obj) return -ENOMEM; - obj->nr_tlb_entries = pdata->nr_tlb_entries; - obj->name = pdata->name; + if (of) { + obj->name = dev_name(&pdev->dev); + obj->nr_tlb_entries = 32; + err = of_property_read_u32(of, "ti,#tlb-entries", + &obj->nr_tlb_entries); + if (err && err != -EINVAL) + return err; + if (obj->nr_tlb_entries != 32 && obj->nr_tlb_entries != 8) + return -EINVAL; + /* + * da_start and da_end are needed for omap-iovmm, so hardcode + * these values as used by OMAP3 ISP - the only user for + * omap-iovmm + */ + obj->da_start = 0; + obj->da_end = 0xfffff000; + } else { + obj->nr_tlb_entries = pdata->nr_tlb_entries; + obj->name = pdata->name; + obj->da_start = pdata->da_start; + obj->da_end = pdata->da_end; + } + if (obj->da_end <= obj->da_start) + return -EINVAL; + obj->dev = &pdev->dev; obj->ctx = (void *)obj + sizeof(*obj); - obj->da_start = pdata->da_start; - obj->da_end = pdata->da_end; spin_lock_init(&obj->iommu_lock); mutex_init(&obj->mmap_lock); @@ -987,11 +1012,20 @@ static int omap_iommu_remove(struct platform_device *pdev) return 0; } +static struct of_device_id omap_iommu_of_match[] = { + { .compatible = "ti,omap2-iommu" }, + { .compatible = "ti,omap4-iommu" }, + { .compatible = "ti,dra7-iommu" }, + {}, +}; +MODULE_DEVICE_TABLE(of, omap_iommu_of_match); + static struct platform_driver omap_iommu_driver = { .probe = omap_iommu_probe, .remove = omap_iommu_remove, .driver = { .name = "omap-iommu", + .of_match_table = of_match_ptr(omap_iommu_of_match), }, }; From b148d5fb2ef7bd1441e95402effe22b7b34f9a73 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Fri, 28 Feb 2014 14:42:37 -0600 Subject: [PATCH 14/75] iommu/omap: Enable bus-error back on supported iommus The remoteproc MMUs in OMAP4+ SoCs have some additional debug registers that can give out the PC value in addition to the MMU fault address. The PC value can be extracted properly only on the DSP cores, and is not available on the ARM processors within the IPU sub-systems. Instead, the MMUs have been enhanced to throw a bus-error response back to the IPU processors. This functionality is programmable through the MMU_GP_REG register. The cores are simply stalled if the MMU_GP_REG.BUS_ERR_BACK_EN bit is not set. When set, a bus-error exception is raised allowing the processor to handle it as a bus fault and provide additional debug information. This feature is turned on by default by the driver on iommus supporting it. Signed-off-by: Subramaniam Chanderashekarapuram Signed-off-by: Suman Anna Signed-off-by: Joerg Roedel --- drivers/iommu/omap-iommu.c | 2 ++ drivers/iommu/omap-iommu.h | 5 +++++ drivers/iommu/omap-iommu2.c | 3 +++ 3 files changed, 10 insertions(+) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index eb73ef2b6fa9..28bc63143279 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -961,6 +961,8 @@ static int omap_iommu_probe(struct platform_device *pdev) */ obj->da_start = 0; obj->da_end = 0xfffff000; + if (of_find_property(of, "ti,iommu-bus-err-back", NULL)) + obj->has_bus_err_back = MMU_GP_REG_BUS_ERR_BACK_EN; } else { obj->nr_tlb_entries = pdata->nr_tlb_entries; obj->name = pdata->name; diff --git a/drivers/iommu/omap-iommu.h b/drivers/iommu/omap-iommu.h index 120084206602..ea920c3e94ff 100644 --- a/drivers/iommu/omap-iommu.h +++ b/drivers/iommu/omap-iommu.h @@ -52,6 +52,8 @@ struct omap_iommu { void *ctx; /* iommu context: registres saved area */ u32 da_start; u32 da_end; + + int has_bus_err_back; }; struct cr_regs { @@ -130,6 +132,7 @@ static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev) #define MMU_READ_CAM 0x68 #define MMU_READ_RAM 0x6c #define MMU_EMU_FAULT_AD 0x70 +#define MMU_GP_REG 0x88 #define MMU_REG_SIZE 256 @@ -163,6 +166,8 @@ static inline struct omap_iommu *dev_to_omap_iommu(struct device *dev) #define MMU_RAM_MIXED_MASK (1 << MMU_RAM_MIXED_SHIFT) #define MMU_RAM_MIXED MMU_RAM_MIXED_MASK +#define MMU_GP_REG_BUS_ERR_BACK_EN 0x1 + /* * utilities for super page(16MB, 1MB, 64KB and 4KB) */ diff --git a/drivers/iommu/omap-iommu2.c b/drivers/iommu/omap-iommu2.c index d745094a69dd..5e1ea3b0bf16 100644 --- a/drivers/iommu/omap-iommu2.c +++ b/drivers/iommu/omap-iommu2.c @@ -98,6 +98,9 @@ static int omap2_iommu_enable(struct omap_iommu *obj) iommu_write_reg(obj, pa, MMU_TTB); + if (obj->has_bus_err_back) + iommu_write_reg(obj, MMU_GP_REG_BUS_ERR_BACK_EN, MMU_GP_REG); + __iommu_set_twl(obj, true); return 0; From 07a0203021149140f83e64a9ac5fe7ed09a74afc Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Fri, 28 Feb 2014 14:42:38 -0600 Subject: [PATCH 15/75] iommu/omap: Allocate archdata on the fly for DT-based devices The OMAP IOMMU driver locates the IOMMU associated to a device using the IOMMU name stored in the device archdata iommu field. That field is expected to be populated by platform code and is left unset for DT-based devices. This results in a crash when the IOMMU driver attaches a domain to a device. Fix this by allocating the archdata iommu structure when devices are added and freeing when they are removed. Devices without an OF node, and devices without an iommus property in their OF node are ignored. The iommu name is initialized from the IOMMU device node name. This should be simplified when removing non-DT support completely from the IOMMU users as the IOMMU name won't be needed anymore, and the IOMMU device pointer could then be stored in the archdata iommu field directly. Signed-off-by: Laurent Pinchart [s-anna@ti.com: updated to use device name instead of OF name] Signed-off-by: Suman Anna Signed-off-by: Joerg Roedel --- drivers/iommu/omap-iommu.c | 45 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 28bc63143279..8acea87cbc0e 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -1253,6 +1253,49 @@ static int omap_iommu_domain_has_cap(struct iommu_domain *domain, return 0; } +static int omap_iommu_add_device(struct device *dev) +{ + struct omap_iommu_arch_data *arch_data; + struct device_node *np; + + /* + * Allocate the archdata iommu structure for DT-based devices. + * + * TODO: Simplify this when removing non-DT support completely from the + * IOMMU users. + */ + if (!dev->of_node) + return 0; + + np = of_parse_phandle(dev->of_node, "iommus", 0); + if (!np) + return 0; + + arch_data = kzalloc(sizeof(*arch_data), GFP_KERNEL); + if (!arch_data) { + of_node_put(np); + return -ENOMEM; + } + + arch_data->name = kstrdup(dev_name(dev), GFP_KERNEL); + dev->archdata.iommu = arch_data; + + of_node_put(np); + + return 0; +} + +static void omap_iommu_remove_device(struct device *dev) +{ + struct omap_iommu_arch_data *arch_data = dev->archdata.iommu; + + if (!dev->of_node || !arch_data) + return; + + kfree(arch_data->name); + kfree(arch_data); +} + static struct iommu_ops omap_iommu_ops = { .domain_init = omap_iommu_domain_init, .domain_destroy = omap_iommu_domain_destroy, @@ -1262,6 +1305,8 @@ static struct iommu_ops omap_iommu_ops = { .unmap = omap_iommu_unmap, .iova_to_phys = omap_iommu_iova_to_phys, .domain_has_cap = omap_iommu_domain_has_cap, + .add_device = omap_iommu_add_device, + .remove_device = omap_iommu_remove_device, .pgsize_bitmap = OMAP_IOMMU_PGSIZES, }; From 989d51fc99e9df4fb47f34bccf162c59aa386e8c Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 19 Feb 2014 14:07:21 +0800 Subject: [PATCH 16/75] iommu/vt-d: Avoid double free of g_iommus on error recovery path Array 'g_iommus' may be freed twice on error recovery path in function init_dmars() and free_dmar_iommu(), thus cause random system crash as below. [ 6.774301] IOMMU: dmar init failed [ 6.778310] PCI-DMA: Using software bounce buffering for IO (SWIOTLB) [ 6.785615] software IO TLB [mem 0x76bcf000-0x7abcf000] (64MB) mapped at [ffff880076bcf000-ffff88007abcefff] [ 6.796887] general protection fault: 0000 [#1] SMP DEBUG_PAGEALLOC [ 6.804173] Modules linked in: [ 6.807731] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.14.0-rc1+ #108 [ 6.815122] Hardware name: Intel Corporation BRICKLAND/BRICKLAND, BIOS BRIVTIN1.86B.0047.R00.1402050741 02/05/2014 [ 6.836000] task: ffff880455a80000 ti: ffff880455a88000 task.ti: ffff880455a88000 [ 6.844487] RIP: 0010:[] [] memcpy+0x6/0x110 [ 6.853039] RSP: 0000:ffff880455a89cc8 EFLAGS: 00010293 [ 6.859064] RAX: ffff006568636163 RBX: ffff00656863616a RCX: 0000000000000005 [ 6.867134] RDX: 0000000000000005 RSI: ffffffff81cdc439 RDI: ffff006568636163 [ 6.875205] RBP: ffff880455a89d30 R08: 000000000001bc3b R09: 0000000000000000 [ 6.883275] R10: 0000000000000000 R11: ffffffff81cdc43e R12: ffff880455a89da8 [ 6.891338] R13: ffff006568636163 R14: 0000000000000005 R15: ffffffff81cdc439 [ 6.899408] FS: 0000000000000000(0000) GS:ffff88045b800000(0000) knlGS:0000000000000000 [ 6.908575] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 6.915088] CR2: ffff88047e1ff000 CR3: 0000000001e0e000 CR4: 00000000001407f0 [ 6.923160] Stack: [ 6.925487] ffffffff8143c904 ffff88045b407e00 ffff006568636163 ffff006568636163 [ 6.934113] ffffffff8120a1a9 ffffffff81cdc43e 0000000000000007 0000000000000000 [ 6.942747] ffff880455a89da8 ffff006568636163 0000000000000007 ffffffff81cdc439 [ 6.951382] Call Trace: [ 6.954197] [] ? vsnprintf+0x124/0x6f0 [ 6.960323] [] ? __kmalloc_track_caller+0x169/0x360 [ 6.967716] [] kvasprintf+0x6b/0x80 [ 6.973552] [] kobject_set_name_vargs+0x21/0x70 [ 6.980552] [] kobject_init_and_add+0x4d/0x90 [ 6.987364] [] ? __kmalloc+0x169/0x370 [ 6.993492] [] ? cache_add_dev+0x17c/0x4f0 [ 7.000005] [] cache_add_dev+0x3ba/0x4f0 [ 7.006327] [] ? i8237A_init_ops+0x14/0x14 [ 7.012842] [] cache_sysfs_init+0x2e/0x61 [ 7.019260] [] do_one_initcall+0xf2/0x220 [ 7.025679] [] ? parse_args+0x2c9/0x450 [ 7.031903] [] kernel_init_freeable+0x1c9/0x25b [ 7.038904] [] ? do_early_param+0x8a/0x8a [ 7.045322] [] ? rest_init+0x150/0x150 [ 7.051447] [] kernel_init+0xe/0x100 [ 7.057380] [] ret_from_fork+0x7c/0xb0 [ 7.063503] [] ? rest_init+0x150/0x150 [ 7.069628] Code: 89 e5 53 48 89 fb 75 16 80 7f 3c 00 75 05 e8 d2 f9 ff ff 48 8b 43 58 48 2b 43 50 88 43 4e 5b 5d c3 90 90 90 90 48 89 f8 48 89 d1 a4 c3 03 83 e2 07 f3 48 a5 89 d1 f3 a4 c3 20 4c 8b 06 4c 8b [ 7.094960] RIP [] memcpy+0x6/0x110 [ 7.100856] RSP [ 7.104864] ---[ end trace b5d3fdc6c6c28083 ]--- [ 7.110142] Kernel panic - not syncing: Attempted to kill init! exitcode=0x0000000b [ 7.110142] [ 7.120540] Kernel Offset: 0x0 from 0xffffffff81000000 (relocation range: 0xffffffff80000000-0xffffffff9fffffff) Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index a22c86c867fa..52be7555b0df 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1298,15 +1298,6 @@ static void free_dmar_iommu(struct intel_iommu *iommu) g_iommus[iommu->seq_id] = NULL; - /* if all iommus are freed, free g_iommus */ - for (i = 0; i < g_num_of_iommus; i++) { - if (g_iommus[i]) - break; - } - - if (i == g_num_of_iommus) - kfree(g_iommus); - /* free context mapping */ free_context_table(iommu); } @@ -2461,7 +2452,7 @@ static int __init init_dmars(void) sizeof(struct deferred_flush_tables), GFP_KERNEL); if (!deferred_flush) { ret = -ENOMEM; - goto error; + goto free_g_iommus; } for_each_active_iommu(iommu, drhd) { @@ -2469,7 +2460,7 @@ static int __init init_dmars(void) ret = iommu_init_domains(iommu); if (ret) - goto error; + goto free_iommu; /* * TBD: @@ -2479,7 +2470,7 @@ static int __init init_dmars(void) ret = iommu_alloc_root_entry(iommu); if (ret) { printk(KERN_ERR "IOMMU: allocate root entry failed\n"); - goto error; + goto free_iommu; } if (!ecap_pass_through(iommu->ecap)) hw_pass_through = 0; @@ -2548,7 +2539,7 @@ static int __init init_dmars(void) ret = iommu_prepare_static_identity_mapping(hw_pass_through); if (ret) { printk(KERN_CRIT "Failed to setup IOMMU pass-through\n"); - goto error; + goto free_iommu; } } /* @@ -2606,7 +2597,7 @@ static int __init init_dmars(void) ret = dmar_set_interrupt(iommu); if (ret) - goto error; + goto free_iommu; iommu_set_root_entry(iommu); @@ -2615,17 +2606,20 @@ static int __init init_dmars(void) ret = iommu_enable_translation(iommu); if (ret) - goto error; + goto free_iommu; iommu_disable_protect_mem_regions(iommu); } return 0; -error: + +free_iommu: for_each_active_iommu(iommu, drhd) free_dmar_iommu(iommu); kfree(deferred_flush); +free_g_iommus: kfree(g_iommus); +error: return ret; } From 816997d03bca9fabcee65f3481eb0297103eceb7 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 19 Feb 2014 14:07:22 +0800 Subject: [PATCH 17/75] iommu/vt-d: Avoid caching stale domain_device_info and fix memory leak Function device_notifier() in intel-iommu.c fails to remove device_domain_info data structures for PCI devices if they are associated with si_domain because iommu_no_mapping() returns true for those PCI devices. This will cause memory leak and caching of stale information in domain->devices list. So fix the issue by not calling iommu_no_mapping() and skipping check of iommu_pass_through. Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 52be7555b0df..f75d3aef6de6 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3600,14 +3600,14 @@ static int device_notifier(struct notifier_block *nb, struct pci_dev *pdev = to_pci_dev(dev); struct dmar_domain *domain; - if (iommu_no_mapping(dev)) + if (iommu_dummy(pdev)) return 0; domain = find_domain(pdev); if (!domain) return 0; - if (action == BUS_NOTIFY_UNBOUND_DRIVER && !iommu_pass_through) { + if (action == BUS_NOTIFY_UNBOUND_DRIVER) { domain_remove_one_dev_info(domain, pdev); if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) && From 7e7dfab71af851a7350ab47b1c01a2f84deacc12 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 19 Feb 2014 14:07:23 +0800 Subject: [PATCH 18/75] iommu/vt-d: Avoid caching stale domain_device_info when hot-removing PCI device Function device_notifier() in intel-iommu.c only remove domain_device_info data structure associated with a PCI device when handling PCI device driver unbinding events. If a PCI device has never been bound to a PCI device driver, there won't be BUS_NOTIFY_UNBOUND_DRIVER event when hot-removing the PCI device. So associated domain_device_info data structure may get lost. On the other hand, if iommu_pass_through is enabled, function iommu_prepare_static_indentify_mapping() will create domain_device_info data structure for each PCIe to PCIe bridge and PCIe endpoint, no matter whether there are drivers associated with those PCIe devices or not. So those domain_device_info data structures will get lost when hot-removing the assocated PCIe devices if they have never bound to any PCI device driver. To be even worse, it's not only an memory leak issue, but also an caching of stale information bug because the memory are kept in device_domain_list and domain->devices lists. Fix the bug by trying to remove domain_device_info data structure when handling BUS_NOTIFY_DEL_DEVICE event. Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index f75d3aef6de6..5a411e83433c 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3603,18 +3603,19 @@ static int device_notifier(struct notifier_block *nb, if (iommu_dummy(pdev)) return 0; + if (action != BUS_NOTIFY_UNBOUND_DRIVER && + action != BUS_NOTIFY_DEL_DEVICE) + return 0; + domain = find_domain(pdev); if (!domain) return 0; - if (action == BUS_NOTIFY_UNBOUND_DRIVER) { - domain_remove_one_dev_info(domain, pdev); - - if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) && - !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) && - list_empty(&domain->devices)) - domain_exit(domain); - } + domain_remove_one_dev_info(domain, pdev); + if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) && + !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) && + list_empty(&domain->devices)) + domain_exit(domain); return 0; } From bb3a6b7845851d23cb826040b4c3c9c294e7cfb4 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 19 Feb 2014 14:07:24 +0800 Subject: [PATCH 19/75] iommu/vt-d: Factor out dmar_alloc_dev_scope() for later reuse Factor out function dmar_alloc_dev_scope() from dmar_parse_dev_scope() for later reuse. Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 28 ++++++++++++++++------------ include/linux/dmar.h | 1 + 2 files changed, 17 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 158156543410..58dde75c5229 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -117,13 +117,9 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope, return 0; } -int __init dmar_parse_dev_scope(void *start, void *end, int *cnt, - struct pci_dev ***devices, u16 segment) +void *dmar_alloc_dev_scope(void *start, void *end, int *cnt) { struct acpi_dmar_device_scope *scope; - void * tmp = start; - int index; - int ret; *cnt = 0; while (start < end) { @@ -138,15 +134,24 @@ int __init dmar_parse_dev_scope(void *start, void *end, int *cnt, start += scope->length; } if (*cnt == 0) - return 0; + return NULL; - *devices = kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL); - if (!*devices) + return kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL); +} + +int __init dmar_parse_dev_scope(void *start, void *end, int *cnt, + struct pci_dev ***devices, u16 segment) +{ + struct acpi_dmar_device_scope *scope; + int index, ret; + + *devices = dmar_alloc_dev_scope(start, end, cnt); + if (*cnt == 0) + return 0; + else if (!*devices) return -ENOMEM; - start = tmp; - index = 0; - while (start < end) { + for (index = 0; start < end; start += scope->length) { scope = start; if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT || scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) { @@ -158,7 +163,6 @@ int __init dmar_parse_dev_scope(void *start, void *end, int *cnt, } index ++; } - start += scope->length; } return 0; diff --git a/include/linux/dmar.h b/include/linux/dmar.h index eccb0c0c6cf6..1b08ce80bfbf 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -69,6 +69,7 @@ extern int dmar_table_init(void); extern int dmar_dev_scope_init(void); extern int dmar_parse_dev_scope(void *start, void *end, int *cnt, struct pci_dev ***devices, u16 segment); +extern void *dmar_alloc_dev_scope(void *start, void *end, int *cnt); extern void dmar_free_dev_scope(struct pci_dev ***devices, int *cnt); /* Intel IOMMU detection */ From b94e4117f8c4ffb591b1e462364d725e3a1c63c4 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 19 Feb 2014 14:07:25 +0800 Subject: [PATCH 20/75] iommu/vt-d: Move private structures and variables into intel-iommu.c Move private structures and variables into intel-iommu.c, which will help to simplify locking policy for hotplug. Also delete redundant declarations. Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 31 +++++++++++++++++++++++++------ include/linux/dmar.h | 23 +---------------------- 2 files changed, 26 insertions(+), 28 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 5a411e83433c..fffe3d166662 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -380,6 +380,29 @@ struct device_domain_info { struct dmar_domain *domain; /* pointer to domain */ }; +struct dmar_rmrr_unit { + struct list_head list; /* list of rmrr units */ + struct acpi_dmar_header *hdr; /* ACPI header */ + u64 base_address; /* reserved base address*/ + u64 end_address; /* reserved end address */ + struct pci_dev **devices; /* target devices */ + int devices_cnt; /* target device count */ +}; + +struct dmar_atsr_unit { + struct list_head list; /* list of ATSR units */ + struct acpi_dmar_header *hdr; /* ACPI header */ + struct pci_dev **devices; /* target devices */ + int devices_cnt; /* target device count */ + u8 include_all:1; /* include all ports */ +}; + +static LIST_HEAD(dmar_atsr_units); +static LIST_HEAD(dmar_rmrr_units); + +#define for_each_rmrr_units(rmrr) \ + list_for_each_entry(rmrr, &dmar_rmrr_units, list) + static void flush_unmaps_timeout(unsigned long data); static DEFINE_TIMER(unmap_timer, flush_unmaps_timeout, 0, 0); @@ -403,6 +426,8 @@ static int timer_on; static long list_size; static void domain_remove_dev_info(struct dmar_domain *domain); +static void domain_remove_one_dev_info(struct dmar_domain *domain, + struct pci_dev *pdev); #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON int dmar_disabled = 0; @@ -2243,8 +2268,6 @@ static int __init si_domain_init(int hw) return 0; } -static void domain_remove_one_dev_info(struct dmar_domain *domain, - struct pci_dev *pdev); static int identity_mapping(struct pci_dev *pdev) { struct device_domain_info *info; @@ -3432,8 +3455,6 @@ static void __init init_iommu_pm_ops(void) static inline void init_iommu_pm_ops(void) {} #endif /* CONFIG_PM */ -LIST_HEAD(dmar_rmrr_units); - static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr) { list_add(&rmrr->list, &dmar_rmrr_units); @@ -3470,8 +3491,6 @@ rmrr_parse_dev(struct dmar_rmrr_unit *rmrru) rmrr->segment); } -static LIST_HEAD(dmar_atsr_units); - int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr) { struct acpi_dmar_atsr *atsr; diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 1b08ce80bfbf..ea599d4ca9e0 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -139,28 +139,7 @@ extern int arch_setup_dmar_msi(unsigned int irq); #ifdef CONFIG_INTEL_IOMMU extern int iommu_detected, no_iommu; -extern struct list_head dmar_rmrr_units; -struct dmar_rmrr_unit { - struct list_head list; /* list of rmrr units */ - struct acpi_dmar_header *hdr; /* ACPI header */ - u64 base_address; /* reserved base address*/ - u64 end_address; /* reserved end address */ - struct pci_dev **devices; /* target devices */ - int devices_cnt; /* target device count */ -}; - -#define for_each_rmrr_units(rmrr) \ - list_for_each_entry(rmrr, &dmar_rmrr_units, list) - -struct dmar_atsr_unit { - struct list_head list; /* list of ATSR units */ - struct acpi_dmar_header *hdr; /* ACPI header */ - struct pci_dev **devices; /* target devices */ - int devices_cnt; /* target device count */ - u8 include_all:1; /* include all ports */ -}; - -int dmar_parse_rmrr_atsr_dev(void); +extern int dmar_parse_rmrr_atsr_dev(void); extern int dmar_parse_one_rmrr(struct acpi_dmar_header *header); extern int dmar_parse_one_atsr(struct acpi_dmar_header *header); extern int intel_iommu_init(void); From 745f2586e78efc5c226a60cfc8e7fbd735b1c856 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 19 Feb 2014 14:07:26 +0800 Subject: [PATCH 21/75] iommu/vt-d: Simplify function get_domain_for_dev() Function get_domain_for_dev() is a little complex, simplify it by factoring out dmar_search_domain_by_dev_info() and dmar_insert_dev_info(). Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 142 +++++++++++++++++------------------- 1 file changed, 66 insertions(+), 76 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index fffe3d166662..67b114e784bc 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1957,18 +1957,71 @@ find_domain(struct pci_dev *pdev) return NULL; } +static inline struct dmar_domain * +dmar_search_domain_by_dev_info(int segment, int bus, int devfn) +{ + struct device_domain_info *info; + + list_for_each_entry(info, &device_domain_list, global) + if (info->segment == segment && info->bus == bus && + info->devfn == devfn) + return info->domain; + + return NULL; +} + +static int dmar_insert_dev_info(int segment, int bus, int devfn, + struct pci_dev *dev, struct dmar_domain **domp) +{ + struct dmar_domain *found, *domain = *domp; + struct device_domain_info *info; + unsigned long flags; + + info = alloc_devinfo_mem(); + if (!info) + return -ENOMEM; + + info->segment = segment; + info->bus = bus; + info->devfn = devfn; + info->dev = dev; + info->domain = domain; + if (!dev) + domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES; + + spin_lock_irqsave(&device_domain_lock, flags); + if (dev) + found = find_domain(dev); + else + found = dmar_search_domain_by_dev_info(segment, bus, devfn); + if (found) { + spin_unlock_irqrestore(&device_domain_lock, flags); + free_devinfo_mem(info); + if (found != domain) { + domain_exit(domain); + *domp = found; + } + } else { + list_add(&info->link, &domain->devices); + list_add(&info->global, &device_domain_list); + if (dev) + dev->dev.archdata.iommu = info; + spin_unlock_irqrestore(&device_domain_lock, flags); + } + + return 0; +} + /* domain is initialized */ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) { - struct dmar_domain *domain, *found = NULL; + struct dmar_domain *domain; struct intel_iommu *iommu; struct dmar_drhd_unit *drhd; - struct device_domain_info *info, *tmp; struct pci_dev *dev_tmp; unsigned long flags; int bus = 0, devfn = 0; int segment; - int ret; domain = find_domain(pdev); if (domain) @@ -1986,41 +2039,29 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) devfn = dev_tmp->devfn; } spin_lock_irqsave(&device_domain_lock, flags); - list_for_each_entry(info, &device_domain_list, global) { - if (info->segment == segment && - info->bus == bus && info->devfn == devfn) { - found = info->domain; - break; - } - } + domain = dmar_search_domain_by_dev_info(segment, bus, devfn); spin_unlock_irqrestore(&device_domain_lock, flags); /* pcie-pci bridge already has a domain, uses it */ - if (found) { - domain = found; + if (domain) goto found_domain; - } } - domain = alloc_domain(); - if (!domain) - goto error; - - /* Allocate new domain for the device */ drhd = dmar_find_matched_drhd_unit(pdev); if (!drhd) { printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n", pci_name(pdev)); - free_domain_mem(domain); return NULL; } iommu = drhd->iommu; - ret = iommu_attach_domain(domain, iommu); - if (ret) { + /* Allocate and intialize new domain for the device */ + domain = alloc_domain(); + if (!domain) + goto error; + if (iommu_attach_domain(domain, iommu)) { free_domain_mem(domain); goto error; } - if (domain_init(domain, gaw)) { domain_exit(domain); goto error; @@ -2028,67 +2069,16 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) /* register pcie-to-pci device */ if (dev_tmp) { - info = alloc_devinfo_mem(); - if (!info) { + if (dmar_insert_dev_info(segment, bus, devfn, NULL, &domain)) { domain_exit(domain); goto error; } - info->segment = segment; - info->bus = bus; - info->devfn = devfn; - info->dev = NULL; - info->domain = domain; - /* This domain is shared by devices under p2p bridge */ - domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES; - - /* pcie-to-pci bridge already has a domain, uses it */ - found = NULL; - spin_lock_irqsave(&device_domain_lock, flags); - list_for_each_entry(tmp, &device_domain_list, global) { - if (tmp->segment == segment && - tmp->bus == bus && tmp->devfn == devfn) { - found = tmp->domain; - break; - } - } - if (found) { - spin_unlock_irqrestore(&device_domain_lock, flags); - free_devinfo_mem(info); - domain_exit(domain); - domain = found; - } else { - list_add(&info->link, &domain->devices); - list_add(&info->global, &device_domain_list); - spin_unlock_irqrestore(&device_domain_lock, flags); - } } found_domain: - info = alloc_devinfo_mem(); - if (!info) - goto error; - info->segment = segment; - info->bus = pdev->bus->number; - info->devfn = pdev->devfn; - info->dev = pdev; - info->domain = domain; - spin_lock_irqsave(&device_domain_lock, flags); - /* somebody is fast */ - found = find_domain(pdev); - if (found != NULL) { - spin_unlock_irqrestore(&device_domain_lock, flags); - if (found != domain) { - domain_exit(domain); - domain = found; - } - free_devinfo_mem(info); + if (dmar_insert_dev_info(segment, pdev->bus->number, pdev->devfn, + pdev, &domain) == 0) return domain; - } - list_add(&info->link, &domain->devices); - list_add(&info->global, &device_domain_list); - pdev->dev.archdata.iommu = info; - spin_unlock_irqrestore(&device_domain_lock, flags); - return domain; error: /* recheck it here, maybe others set it */ return find_domain(pdev); From e85bb5d4d1d5ef55cf4fbb4da5dda67d2bbacc5c Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 19 Feb 2014 14:07:27 +0800 Subject: [PATCH 22/75] iommu/vt-d: Free resources if failed to create domain for PCIe endpoint Enhance function get_domain_for_dev() to release allocated resources if failed to create domain for PCIe endpoint, otherwise the allocated resources will get lost. Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 67b114e784bc..503cc739d4b1 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2015,7 +2015,7 @@ static int dmar_insert_dev_info(int segment, int bus, int devfn, /* domain is initialized */ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) { - struct dmar_domain *domain; + struct dmar_domain *domain, *free = NULL; struct intel_iommu *iommu; struct dmar_drhd_unit *drhd; struct pci_dev *dev_tmp; @@ -2062,17 +2062,16 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) free_domain_mem(domain); goto error; } - if (domain_init(domain, gaw)) { - domain_exit(domain); + free = domain; + if (domain_init(domain, gaw)) goto error; - } /* register pcie-to-pci device */ if (dev_tmp) { - if (dmar_insert_dev_info(segment, bus, devfn, NULL, &domain)) { - domain_exit(domain); + if (dmar_insert_dev_info(segment, bus, devfn, NULL, &domain)) goto error; - } + else + free = NULL; } found_domain: @@ -2080,6 +2079,8 @@ found_domain: pdev, &domain) == 0) return domain; error: + if (free) + domain_exit(free); /* recheck it here, maybe others set it */ return find_domain(pdev); } From 92d03cc8d0c9bedfa1ab0ac6ee713a1b1f41a901 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 19 Feb 2014 14:07:28 +0800 Subject: [PATCH 23/75] iommu/vt-d: Reduce duplicated code to handle virtual machine domains Reduce duplicated code to handle virtual machine domains, there's no functionality changes. It also improves code readability. Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 177 +++++++++--------------------------- 1 file changed, 45 insertions(+), 132 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 503cc739d4b1..672477241791 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -425,9 +425,12 @@ static LIST_HEAD(unmaps_to_do); static int timer_on; static long list_size; +static void domain_exit(struct dmar_domain *domain); static void domain_remove_dev_info(struct dmar_domain *domain); static void domain_remove_one_dev_info(struct dmar_domain *domain, struct pci_dev *pdev); +static void iommu_detach_dependent_devices(struct intel_iommu *iommu, + struct pci_dev *pdev); #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON int dmar_disabled = 0; @@ -1286,10 +1289,6 @@ static int iommu_init_domains(struct intel_iommu *iommu) return 0; } - -static void domain_exit(struct dmar_domain *domain); -static void vm_domain_exit(struct dmar_domain *domain); - static void free_dmar_iommu(struct intel_iommu *iommu) { struct dmar_domain *domain; @@ -1304,12 +1303,8 @@ static void free_dmar_iommu(struct intel_iommu *iommu) spin_lock_irqsave(&domain->iommu_lock, flags); count = --domain->iommu_count; spin_unlock_irqrestore(&domain->iommu_lock, flags); - if (count == 0) { - if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) - vm_domain_exit(domain); - else - domain_exit(domain); - } + if (count == 0) + domain_exit(domain); } } @@ -1327,8 +1322,10 @@ static void free_dmar_iommu(struct intel_iommu *iommu) free_context_table(iommu); } -static struct dmar_domain *alloc_domain(void) +static struct dmar_domain *alloc_domain(bool vm) { + /* domain id for virtual machine, it won't be set in context */ + static atomic_t vm_domid = ATOMIC_INIT(0); struct dmar_domain *domain; domain = alloc_domain_mem(); @@ -1336,8 +1333,15 @@ static struct dmar_domain *alloc_domain(void) return NULL; domain->nid = -1; + domain->iommu_count = 0; memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp)); domain->flags = 0; + spin_lock_init(&domain->iommu_lock); + INIT_LIST_HEAD(&domain->devices); + if (vm) { + domain->id = atomic_inc_return(&vm_domid); + domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE; + } return domain; } @@ -1374,22 +1378,16 @@ static void iommu_detach_domain(struct dmar_domain *domain, { unsigned long flags; int num, ndomains; - int found = 0; spin_lock_irqsave(&iommu->lock, flags); ndomains = cap_ndoms(iommu->cap); for_each_set_bit(num, iommu->domain_ids, ndomains) { if (iommu->domains[num] == domain) { - found = 1; + clear_bit(num, iommu->domain_ids); + iommu->domains[num] = NULL; break; } } - - if (found) { - clear_bit(num, iommu->domain_ids); - clear_bit(iommu->seq_id, domain->iommu_bmp); - iommu->domains[num] = NULL; - } spin_unlock_irqrestore(&iommu->lock, flags); } @@ -1461,8 +1459,6 @@ static int domain_init(struct dmar_domain *domain, int guest_width) unsigned long sagaw; init_iova_domain(&domain->iovad, DMA_32BIT_PFN); - spin_lock_init(&domain->iommu_lock); - domain_reserve_special_ranges(domain); /* calculate AGAW */ @@ -1481,7 +1477,6 @@ static int domain_init(struct dmar_domain *domain, int guest_width) return -ENODEV; } domain->agaw = agaw; - INIT_LIST_HEAD(&domain->devices); if (ecap_coherent(iommu->ecap)) domain->iommu_coherency = 1; @@ -1518,7 +1513,9 @@ static void domain_exit(struct dmar_domain *domain) if (!intel_iommu_strict) flush_unmaps_timeout(0); + /* remove associated devices */ domain_remove_dev_info(domain); + /* destroy iovas */ put_iova_domain(&domain->iovad); @@ -1528,8 +1525,10 @@ static void domain_exit(struct dmar_domain *domain) /* free page tables */ dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); + /* clear attached or cached domains */ for_each_active_iommu(iommu, drhd) - if (test_bit(iommu->seq_id, domain->iommu_bmp)) + if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE || + test_bit(iommu->seq_id, domain->iommu_bmp)) iommu_detach_domain(domain, iommu); free_domain_mem(domain); @@ -1921,7 +1920,7 @@ static inline void unlink_domain_info(struct device_domain_info *info) static void domain_remove_dev_info(struct dmar_domain *domain) { struct device_domain_info *info; - unsigned long flags; + unsigned long flags, flags2; struct intel_iommu *iommu; spin_lock_irqsave(&device_domain_lock, flags); @@ -1934,8 +1933,22 @@ static void domain_remove_dev_info(struct dmar_domain *domain) iommu_disable_dev_iotlb(info); iommu = device_to_iommu(info->segment, info->bus, info->devfn); iommu_detach_dev(iommu, info->bus, info->devfn); - free_devinfo_mem(info); + if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) { + iommu_detach_dependent_devices(iommu, info->dev); + /* clear this iommu in iommu_bmp, update iommu count + * and capabilities + */ + spin_lock_irqsave(&domain->iommu_lock, flags2); + if (test_and_clear_bit(iommu->seq_id, + domain->iommu_bmp)) { + domain->iommu_count--; + domain_update_iommu_cap(domain); + } + spin_unlock_irqrestore(&domain->iommu_lock, flags2); + } + + free_devinfo_mem(info); spin_lock_irqsave(&device_domain_lock, flags); } spin_unlock_irqrestore(&device_domain_lock, flags); @@ -2055,7 +2068,7 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) iommu = drhd->iommu; /* Allocate and intialize new domain for the device */ - domain = alloc_domain(); + domain = alloc_domain(false); if (!domain) goto error; if (iommu_attach_domain(domain, iommu)) { @@ -2220,10 +2233,12 @@ static int __init si_domain_init(int hw) struct intel_iommu *iommu; int nid, ret = 0; - si_domain = alloc_domain(); + si_domain = alloc_domain(false); if (!si_domain) return -EFAULT; + si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY; + for_each_active_iommu(iommu, drhd) { ret = iommu_attach_domain(si_domain, iommu); if (ret) { @@ -2237,7 +2252,6 @@ static int __init si_domain_init(int hw) return -EFAULT; } - si_domain->flags = DOMAIN_FLAG_STATIC_IDENTITY; pr_debug("IOMMU: identity mapping domain is domain %d\n", si_domain->id); @@ -3810,67 +3824,11 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain, } } -static void vm_domain_remove_all_dev_info(struct dmar_domain *domain) -{ - struct device_domain_info *info; - struct intel_iommu *iommu; - unsigned long flags1, flags2; - - spin_lock_irqsave(&device_domain_lock, flags1); - while (!list_empty(&domain->devices)) { - info = list_entry(domain->devices.next, - struct device_domain_info, link); - unlink_domain_info(info); - spin_unlock_irqrestore(&device_domain_lock, flags1); - - iommu_disable_dev_iotlb(info); - iommu = device_to_iommu(info->segment, info->bus, info->devfn); - iommu_detach_dev(iommu, info->bus, info->devfn); - iommu_detach_dependent_devices(iommu, info->dev); - - /* clear this iommu in iommu_bmp, update iommu count - * and capabilities - */ - spin_lock_irqsave(&domain->iommu_lock, flags2); - if (test_and_clear_bit(iommu->seq_id, - domain->iommu_bmp)) { - domain->iommu_count--; - domain_update_iommu_cap(domain); - } - spin_unlock_irqrestore(&domain->iommu_lock, flags2); - - free_devinfo_mem(info); - spin_lock_irqsave(&device_domain_lock, flags1); - } - spin_unlock_irqrestore(&device_domain_lock, flags1); -} - -/* domain id for virtual machine, it won't be set in context */ -static atomic_t vm_domid = ATOMIC_INIT(0); - -static struct dmar_domain *iommu_alloc_vm_domain(void) -{ - struct dmar_domain *domain; - - domain = alloc_domain_mem(); - if (!domain) - return NULL; - - domain->id = atomic_inc_return(&vm_domid); - domain->nid = -1; - memset(domain->iommu_bmp, 0, sizeof(domain->iommu_bmp)); - domain->flags = DOMAIN_FLAG_VIRTUAL_MACHINE; - - return domain; -} - static int md_domain_init(struct dmar_domain *domain, int guest_width) { int adjust_width; init_iova_domain(&domain->iovad, DMA_32BIT_PFN); - spin_lock_init(&domain->iommu_lock); - domain_reserve_special_ranges(domain); /* calculate AGAW */ @@ -3878,9 +3836,6 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) adjust_width = guestwidth_to_adjustwidth(guest_width); domain->agaw = width_to_agaw(adjust_width); - INIT_LIST_HEAD(&domain->devices); - - domain->iommu_count = 0; domain->iommu_coherency = 0; domain->iommu_snooping = 0; domain->iommu_superpage = 0; @@ -3895,53 +3850,11 @@ static int md_domain_init(struct dmar_domain *domain, int guest_width) return 0; } -static void iommu_free_vm_domain(struct dmar_domain *domain) -{ - unsigned long flags; - struct dmar_drhd_unit *drhd; - struct intel_iommu *iommu; - unsigned long i; - unsigned long ndomains; - - for_each_active_iommu(iommu, drhd) { - ndomains = cap_ndoms(iommu->cap); - for_each_set_bit(i, iommu->domain_ids, ndomains) { - if (iommu->domains[i] == domain) { - spin_lock_irqsave(&iommu->lock, flags); - clear_bit(i, iommu->domain_ids); - iommu->domains[i] = NULL; - spin_unlock_irqrestore(&iommu->lock, flags); - break; - } - } - } -} - -static void vm_domain_exit(struct dmar_domain *domain) -{ - /* Domain 0 is reserved, so dont process it */ - if (!domain) - return; - - vm_domain_remove_all_dev_info(domain); - /* destroy iovas */ - put_iova_domain(&domain->iovad); - - /* clear ptes */ - dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); - - /* free page tables */ - dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); - - iommu_free_vm_domain(domain); - free_domain_mem(domain); -} - static int intel_iommu_domain_init(struct iommu_domain *domain) { struct dmar_domain *dmar_domain; - dmar_domain = iommu_alloc_vm_domain(); + dmar_domain = alloc_domain(true); if (!dmar_domain) { printk(KERN_ERR "intel_iommu_domain_init: dmar_domain == NULL\n"); @@ -3950,7 +3863,7 @@ static int intel_iommu_domain_init(struct iommu_domain *domain) if (md_domain_init(dmar_domain, DEFAULT_DOMAIN_ADDRESS_WIDTH)) { printk(KERN_ERR "intel_iommu_domain_init() failed\n"); - vm_domain_exit(dmar_domain); + domain_exit(dmar_domain); return -ENOMEM; } domain_update_iommu_cap(dmar_domain); @@ -3968,7 +3881,7 @@ static void intel_iommu_domain_destroy(struct iommu_domain *domain) struct dmar_domain *dmar_domain = domain->priv; domain->priv = NULL; - vm_domain_exit(dmar_domain); + domain_exit(dmar_domain); } static int intel_iommu_attach_device(struct iommu_domain *domain, From 9ebd682e5a7a0b61bedc0aaff609bec2135198e3 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 19 Feb 2014 14:07:29 +0800 Subject: [PATCH 24/75] iommu/vt-d: Fix incorrect iommu_count for si_domain The iommu_count field in si_domain(static identity domain) is initialized to zero and never increases. It will underflow when tearing down iommu unit in function free_dmar_iommu() and leak memory. So refine code to correctly manage si_domain->iommu_count. Warning message caused by si_domain memory leak: [ 14.609681] IOMMU: Setting RMRR: [ 14.613496] Ignoring identity map for HW passthrough device 0000:00:1a.0 [0xbdcfd000 - 0xbdd1dfff] [ 14.623809] Ignoring identity map for HW passthrough device 0000:00:1d.0 [0xbdcfd000 - 0xbdd1dfff] [ 14.634162] IOMMU: Prepare 0-16MiB unity mapping for LPC [ 14.640329] Ignoring identity map for HW passthrough device 0000:00:1f.0 [0x0 - 0xffffff] [ 14.673360] IOMMU: dmar init failed [ 14.678157] kmem_cache_destroy iommu_devinfo: Slab cache still has objects [ 14.686076] CPU: 12 PID: 1 Comm: swapper/0 Not tainted 3.13.0-rc1-gerry+ #59 [ 14.694176] Hardware name: Intel Corporation LH Pass ........../SVRBD-ROW_T, BIOS SE5C600.86B.99.99.x059.091020121352 09/10/2012 [ 14.707412] 0000000000000000 ffff88042dd33db0 ffffffff8156223d ffff880c2cc37c00 [ 14.716407] ffff88042dd33dc8 ffffffff811790b1 ffff880c2d3533b8 ffff88042dd33e00 [ 14.725468] ffffffff81dc7a6a ffffffff81b1e8e0 ffffffff81f84058 ffffffff81d8a711 [ 14.734464] Call Trace: [ 14.737453] [] dump_stack+0x4d/0x66 [ 14.743430] [] kmem_cache_destroy+0xf1/0x100 [ 14.750279] [] intel_iommu_init+0x122/0x56a [ 14.757035] [] ? iommu_setup+0x27d/0x27d [ 14.763491] [] pci_iommu_init+0x28/0x52 [ 14.769846] [] do_one_initcall+0x122/0x180 [ 14.776506] [] ? parse_args+0x1e8/0x320 [ 14.782866] [] kernel_init_freeable+0x1e1/0x26c [ 14.789994] [] ? do_early_param+0x88/0x88 [ 14.796556] [] ? rest_init+0xd0/0xd0 [ 14.802626] [] kernel_init+0xe/0x130 [ 14.808698] [] ret_from_fork+0x7c/0xb0 [ 14.814963] [] ? rest_init+0xd0/0xd0 [ 14.821640] kmem_cache_destroy iommu_domain: Slab cache still has objects [ 14.829456] CPU: 12 PID: 1 Comm: swapper/0 Not tainted 3.13.0-rc1-gerry+ #59 [ 14.837562] Hardware name: Intel Corporation LH Pass ........../SVRBD-ROW_T, BIOS SE5C600.86B.99.99.x059.091020121352 09/10/2012 [ 14.850803] 0000000000000000 ffff88042dd33db0 ffffffff8156223d ffff88102c1ee3c0 [ 14.861222] ffff88042dd33dc8 ffffffff811790b1 ffff880c2d3533b8 ffff88042dd33e00 [ 14.870284] ffffffff81dc7a76 ffffffff81b1e8e0 ffffffff81f84058 ffffffff81d8a711 [ 14.879271] Call Trace: [ 14.882227] [] dump_stack+0x4d/0x66 [ 14.888197] [] kmem_cache_destroy+0xf1/0x100 [ 14.895034] [] intel_iommu_init+0x12e/0x56a [ 14.901781] [] ? iommu_setup+0x27d/0x27d [ 14.908238] [] pci_iommu_init+0x28/0x52 [ 14.914594] [] do_one_initcall+0x122/0x180 [ 14.921244] [] ? parse_args+0x1e8/0x320 [ 14.927598] [] kernel_init_freeable+0x1e1/0x26c [ 14.934738] [] ? do_early_param+0x88/0x88 [ 14.941309] [] ? rest_init+0xd0/0xd0 [ 14.947380] [] kernel_init+0xe/0x130 [ 14.953430] [] ret_from_fork+0x7c/0xb0 [ 14.959689] [] ? rest_init+0xd0/0xd0 [ 14.966299] kmem_cache_destroy iommu_iova: Slab cache still has objects [ 14.973923] CPU: 12 PID: 1 Comm: swapper/0 Not tainted 3.13.0-rc1-gerry+ #59 [ 14.982020] Hardware name: Intel Corporation LH Pass ........../SVRBD-ROW_T, BIOS SE5C600.86B.99.99.x059.091020121352 09/10/2012 [ 14.995263] 0000000000000000 ffff88042dd33db0 ffffffff8156223d ffff88042cb5c980 [ 15.004265] ffff88042dd33dc8 ffffffff811790b1 ffff880c2d3533b8 ffff88042dd33e00 [ 15.013322] ffffffff81dc7a82 ffffffff81b1e8e0 ffffffff81f84058 ffffffff81d8a711 [ 15.022318] Call Trace: [ 15.025238] [] dump_stack+0x4d/0x66 [ 15.031202] [] kmem_cache_destroy+0xf1/0x100 [ 15.038038] [] intel_iommu_init+0x13a/0x56a [ 15.044786] [] ? iommu_setup+0x27d/0x27d [ 15.051242] [] pci_iommu_init+0x28/0x52 [ 15.057601] [] do_one_initcall+0x122/0x180 [ 15.064254] [] ? parse_args+0x1e8/0x320 [ 15.070608] [] kernel_init_freeable+0x1e1/0x26c [ 15.077747] [] ? do_early_param+0x88/0x88 [ 15.084300] [] ? rest_init+0xd0/0xd0 [ 15.090362] [] kernel_init+0xe/0x130 [ 15.096431] [] ret_from_fork+0x7c/0xb0 [ 15.102693] [] ? rest_init+0xd0/0xd0 [ 15.189273] PCI-DMA: Using software bounce buffering for IO (SWIOTLB) Signed-off-by: Jiang Liu Cc: Alex Williamson Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 672477241791..a27d3eb14e87 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1365,6 +1365,7 @@ static int iommu_attach_domain(struct dmar_domain *domain, } domain->id = num; + domain->iommu_count++; set_bit(num, iommu->domain_ids); set_bit(iommu->seq_id, domain->iommu_bmp); iommu->domains[num] = domain; @@ -1489,7 +1490,6 @@ static int domain_init(struct dmar_domain *domain, int guest_width) domain->iommu_snooping = 0; domain->iommu_superpage = fls(cap_super_page_val(iommu->cap)); - domain->iommu_count = 1; domain->nid = iommu->node; /* always allocate the top pgd */ From a4eaa86c0cf55d8a4c9955b20e42b6e2f4e6c11c Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 19 Feb 2014 14:07:30 +0800 Subject: [PATCH 25/75] iommu/vt-d: Check for NULL pointer when freeing IOMMU data structure Domain id 0 will be assigned to invalid translation without allocating domain data structure if DMAR unit supports caching mode. So in function free_dmar_iommu(), we should check whether the domain pointer is NULL, otherwise it will cause system crash as below: [ 6.790519] BUG: unable to handle kernel NULL pointer dereference at 00000000000000c8 [ 6.799520] IP: [] __lock_acquire+0x11f8/0x1430 [ 6.806493] PGD 0 [ 6.817972] Oops: 0000 [#1] SMP DEBUG_PAGEALLOC [ 6.823303] Modules linked in: [ 6.826862] CPU: 0 PID: 1 Comm: swapper/0 Not tainted 3.14.0-rc1+ #126 [ 6.834252] Hardware name: Intel Corporation BRICKLAND/BRICKLAND, BIOS BRIVTIN1.86B.0047.R00.1402050741 02/05/2014 [ 6.845951] task: ffff880455a80000 ti: ffff880455a88000 task.ti: ffff880455a88000 [ 6.854437] RIP: 0010:[] [] __lock_acquire+0x11f8/0x1430 [ 6.864154] RSP: 0000:ffff880455a89ce0 EFLAGS: 00010046 [ 6.870179] RAX: 0000000000000046 RBX: 0000000000000002 RCX: 0000000000000000 [ 6.878249] RDX: 0000000000000000 RSI: 0000000000000000 RDI: 00000000000000c8 [ 6.886318] RBP: ffff880455a89d40 R08: 0000000000000002 R09: 0000000000000001 [ 6.894387] R10: 0000000000000000 R11: 0000000000000001 R12: ffff880455a80000 [ 6.902458] R13: 0000000000000000 R14: 00000000000000c8 R15: 0000000000000000 [ 6.910520] FS: 0000000000000000(0000) GS:ffff88045b800000(0000) knlGS:0000000000000000 [ 6.919687] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 6.926198] CR2: 00000000000000c8 CR3: 0000000001e0e000 CR4: 00000000001407f0 [ 6.934269] Stack: [ 6.936588] ffffffffffffff10 ffffffff810f59db 0000000000000010 0000000000000246 [ 6.945219] ffff880455a89d10 0000000000000000 ffffffff82bcb980 0000000000000046 [ 6.953850] 0000000000000000 0000000000000000 0000000000000002 0000000000000000 [ 6.962482] Call Trace: [ 6.965300] [] ? vprintk_emit+0x4fb/0x5a0 [ 6.971716] [] lock_acquire+0x185/0x200 [ 6.977941] [] ? init_dmars+0x839/0xa1d [ 6.984167] [] _raw_spin_lock_irqsave+0x56/0x90 [ 6.991158] [] ? init_dmars+0x839/0xa1d [ 6.997380] [] init_dmars+0x839/0xa1d [ 7.003410] [] ? pci_get_dev_by_id+0x75/0xd0 [ 7.010119] [] intel_iommu_init+0x2f0/0x502 [ 7.016735] [] ? iommu_setup+0x27d/0x27d [ 7.023056] [] pci_iommu_init+0x28/0x52 [ 7.029282] [] do_one_initcall+0xf2/0x220 [ 7.035702] [] ? parse_args+0x2c9/0x450 [ 7.041919] [] kernel_init_freeable+0x1c9/0x25b [ 7.048919] [] ? do_early_param+0x8a/0x8a [ 7.055336] [] ? rest_init+0x150/0x150 [ 7.061461] [] kernel_init+0xe/0x100 [ 7.067393] [] ret_from_fork+0x7c/0xb0 [ 7.073518] [] ? rest_init+0x150/0x150 [ 7.079642] Code: 01 76 18 89 05 46 04 36 01 41 be 01 00 00 00 e9 2f 02 00 00 0f 1f 80 00 00 00 00 41 be 01 00 00 00 e9 1d 02 00 00 0f 1f 44 00 00 <49> 81 3e c0 31 34 82 b8 01 00 00 00 0f 44 d8 41 83 ff 01 0f 87 [ 7.104944] RIP [] __lock_acquire+0x11f8/0x1430 [ 7.112008] RSP [ 7.115988] CR2: 00000000000000c8 [ 7.119784] ---[ end trace 13d756f0f462c538 ]--- [ 7.125034] note: swapper/0[1] exited with preempt_count 1 [ 7.131285] Kernel panic - not syncing: Attempted to kill init! exitcode=0x00000009 [ 7.131285] Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index a27d3eb14e87..afbc8d6f6f85 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1297,6 +1297,13 @@ static void free_dmar_iommu(struct intel_iommu *iommu) if ((iommu->domains) && (iommu->domain_ids)) { for_each_set_bit(i, iommu->domain_ids, cap_ndoms(iommu->cap)) { + /* + * Domain id 0 is reserved for invalid translation + * if hardware supports caching mode. + */ + if (cap_caching_mode(iommu->cap) && i == 0) + continue; + domain = iommu->domains[i]; clear_bit(i, iommu->domain_ids); From b5f82ddf22725146d926c56ea4cb80df9b34b6b9 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 19 Feb 2014 14:07:31 +0800 Subject: [PATCH 26/75] iommu/vt-d: Fix error in detect ATS capability Current Intel IOMMU driver only matches a PCIe root port with the first DRHD unit with the samge segment number. It will report false result if there are multiple DRHD units with the same segment number, thus fail to detect ATS capability for some PCIe devices. This patch refines function dmar_find_matched_atsr_unit() to search all DRHD units with the same segment number. An example DMAR table entries as below: [1D0h 0464 2] Subtable Type : 0002 [1D2h 0466 2] Length : 0028 [1D4h 0468 1] Flags : 00 [1D5h 0469 1] Reserved : 00 [1D6h 0470 2] PCI Segment Number : 0000 [1D8h 0472 1] Device Scope Entry Type : 02 [1D9h 0473 1] Entry Length : 08 [1DAh 0474 2] Reserved : 0000 [1DCh 0476 1] Enumeration ID : 00 [1DDh 0477 1] PCI Bus Number : 00 [1DEh 0478 2] PCI Path : [02, 00] [1E0h 0480 1] Device Scope Entry Type : 02 [1E1h 0481 1] Entry Length : 08 [1E2h 0482 2] Reserved : 0000 [1E4h 0484 1] Enumeration ID : 00 [1E5h 0485 1] PCI Bus Number : 00 [1E6h 0486 2] PCI Path : [03, 00] [1E8h 0488 1] Device Scope Entry Type : 02 [1E9h 0489 1] Entry Length : 08 [1EAh 0490 2] Reserved : 0000 [1ECh 0492 1] Enumeration ID : 00 [1EDh 0493 1] PCI Bus Number : 00 [1EEh 0494 2] PCI Path : [03, 02] [1F0h 0496 1] Device Scope Entry Type : 02 [1F1h 0497 1] Entry Length : 08 [1F2h 0498 2] Reserved : 0000 [1F4h 0500 1] Enumeration ID : 00 [1F5h 0501 1] PCI Bus Number : 00 [1F6h 0502 2] PCI Path : [03, 03] [1F8h 0504 2] Subtable Type : 0002 [1FAh 0506 2] Length : 0020 [1FCh 0508 1] Flags : 00 [1FDh 0509 1] Reserved : 00 [1FEh 0510 2] PCI Segment Number : 0000 [200h 0512 1] Device Scope Entry Type : 02 [201h 0513 1] Entry Length : 08 [202h 0514 2] Reserved : 0000 [204h 0516 1] Enumeration ID : 00 [205h 0517 1] PCI Bus Number : 40 [206h 0518 2] PCI Path : [02, 00] [208h 0520 1] Device Scope Entry Type : 02 [209h 0521 1] Entry Length : 08 [20Ah 0522 2] Reserved : 0000 [20Ch 0524 1] Enumeration ID : 00 [20Dh 0525 1] PCI Bus Number : 40 [20Eh 0526 2] PCI Path : [02, 02] [210h 0528 1] Device Scope Entry Type : 02 [211h 0529 1] Entry Length : 08 [212h 0530 2] Reserved : 0000 [214h 0532 1] Enumeration ID : 00 [215h 0533 1] PCI Bus Number : 40 [216h 0534 2] PCI Path : [03, 00] [218h 0536 2] Subtable Type : 0002 [21Ah 0538 2] Length : 0020 [21Ch 0540 1] Flags : 00 [21Dh 0541 1] Reserved : 00 [21Eh 0542 2] PCI Segment Number : 0000 [220h 0544 1] Device Scope Entry Type : 02 [221h 0545 1] Entry Length : 08 [222h 0546 2] Reserved : 0000 [224h 0548 1] Enumeration ID : 00 [225h 0549 1] PCI Bus Number : 80 [226h 0550 2] PCI Path : [02, 00] [228h 0552 1] Device Scope Entry Type : 02 [229h 0553 1] Entry Length : 08 [22Ah 0554 2] Reserved : 0000 [22Ch 0556 1] Enumeration ID : 00 [22Dh 0557 1] PCI Bus Number : 80 [22Eh 0558 2] PCI Path : [02, 02] [230h 0560 1] Device Scope Entry Type : 02 [231h 0561 1] Entry Length : 08 [232h 0562 2] Reserved : 0000 [234h 0564 1] Enumeration ID : 00 [235h 0565 1] PCI Bus Number : 80 [236h 0566 2] PCI Path : [03, 00] [238h 0568 2] Subtable Type : 0002 [23Ah 0570 2] Length : 0020 [23Ch 0572 1] Flags : 00 [23Dh 0573 1] Reserved : 00 [23Eh 0574 2] PCI Segment Number : 0000 [240h 0576 1] Device Scope Entry Type : 02 [241h 0577 1] Entry Length : 08 [242h 0578 2] Reserved : 0000 [244h 0580 1] Enumeration ID : 00 [245h 0581 1] PCI Bus Number : C0 [246h 0582 2] PCI Path : [02, 00] [248h 0584 1] Device Scope Entry Type : 02 [249h 0585 1] Entry Length : 08 [24Ah 0586 2] Reserved : 0000 [24Ch 0588 1] Enumeration ID : 00 [24Dh 0589 1] PCI Bus Number : C0 [24Eh 0590 2] PCI Path : [02, 02] [250h 0592 1] Device Scope Entry Type : 02 [251h 0593 1] Entry Length : 08 [252h 0594 2] Reserved : 0000 [254h 0596 1] Enumeration ID : 00 [255h 0597 1] PCI Bus Number : C0 [256h 0598 2] PCI Path : [03, 00] Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 37 +++++++++++++++++-------------------- 1 file changed, 17 insertions(+), 20 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index afbc8d6f6f85..7732c43871fa 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3562,37 +3562,34 @@ int dmar_find_matched_atsr_unit(struct pci_dev *dev) { int i; struct pci_bus *bus; + struct pci_dev *bridge = NULL; struct acpi_dmar_atsr *atsr; struct dmar_atsr_unit *atsru; dev = pci_physfn(dev); - - list_for_each_entry(atsru, &dmar_atsr_units, list) { - atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); - if (atsr->segment == pci_domain_nr(dev->bus)) - goto found; - } - - return 0; - -found: for (bus = dev->bus; bus; bus = bus->parent) { - struct pci_dev *bridge = bus->self; - + bridge = bus->self; if (!bridge || !pci_is_pcie(bridge) || pci_pcie_type(bridge) == PCI_EXP_TYPE_PCI_BRIDGE) return 0; - - if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) { - for (i = 0; i < atsru->devices_cnt; i++) - if (atsru->devices[i] == bridge) - return 1; + if (pci_pcie_type(bridge) == PCI_EXP_TYPE_ROOT_PORT) break; - } } + if (!bridge) + return 0; - if (atsru->include_all) - return 1; + list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) { + atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); + if (atsr->segment != pci_domain_nr(dev->bus)) + continue; + + for (i = 0; i < atsru->devices_cnt; i++) + if (atsru->devices[i] == bridge) + return 1; + + if (atsru->include_all) + return 1; + } return 0; } From b683b230a244c3b2b3f6f3292e59d4a63298528b Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 19 Feb 2014 14:07:32 +0800 Subject: [PATCH 27/75] iommu/vt-d: Introduce macro for_each_dev_scope() to walk device scope entries Introduce for_each_dev_scope()/for_each_active_dev_scope() to walk {active} device scope entries. This will help following RCU lock related patches. Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 14 +++-- drivers/iommu/intel-iommu.c | 100 ++++++++++++++++++------------------ include/linux/dmar.h | 6 +++ 3 files changed, 64 insertions(+), 56 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 58dde75c5229..4ae6df27ad5d 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -170,9 +170,12 @@ int __init dmar_parse_dev_scope(void *start, void *end, int *cnt, void dmar_free_dev_scope(struct pci_dev ***devices, int *cnt) { + int i; + struct pci_dev *tmp_dev; + if (*devices && *cnt) { - while (--*cnt >= 0) - pci_dev_put((*devices)[*cnt]); + for_each_active_dev_scope(*devices, *cnt, i, tmp_dev) + pci_dev_put(tmp_dev); kfree(*devices); *devices = NULL; *cnt = 0; @@ -402,10 +405,11 @@ static int dmar_pci_device_match(struct pci_dev *devices[], int cnt, struct pci_dev *dev) { int index; + struct pci_dev *tmp; while (dev) { - for (index = 0; index < cnt; index++) - if (dev == devices[index]) + for_each_active_dev_scope(devices, cnt, index, tmp) + if (dev == tmp) return 1; /* Check our parent */ @@ -452,7 +456,7 @@ int __init dmar_dev_scope_init(void) if (list_empty(&dmar_drhd_units)) goto fail; - list_for_each_entry(drhd, &dmar_drhd_units, list) { + for_each_drhd_unit(drhd) { ret = dmar_parse_dev(drhd); if (ret) goto fail; diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 7732c43871fa..bb98e37f2cf7 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -654,29 +654,31 @@ static void domain_update_iommu_cap(struct dmar_domain *domain) static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn) { struct dmar_drhd_unit *drhd = NULL; + struct intel_iommu *iommu; + struct pci_dev *dev; int i; - for_each_active_drhd_unit(drhd) { + for_each_active_iommu(iommu, drhd) { if (segment != drhd->segment) continue; - for (i = 0; i < drhd->devices_cnt; i++) { - if (drhd->devices[i] && - drhd->devices[i]->bus->number == bus && - drhd->devices[i]->devfn == devfn) - return drhd->iommu; - if (drhd->devices[i] && - drhd->devices[i]->subordinate && - drhd->devices[i]->subordinate->number <= bus && - drhd->devices[i]->subordinate->busn_res.end >= bus) - return drhd->iommu; + for_each_active_dev_scope(drhd->devices, + drhd->devices_cnt, i, dev) { + if (dev->bus->number == bus && dev->devfn == devfn) + goto out; + if (dev->subordinate && + dev->subordinate->number <= bus && + dev->subordinate->busn_res.end >= bus) + goto out; } if (drhd->include_all) - return drhd->iommu; + goto out; } + iommu = NULL; +out: - return NULL; + return iommu; } static void domain_flush_cache(struct dmar_domain *domain, @@ -2333,17 +2335,19 @@ static int domain_add_dev_info(struct dmar_domain *domain, static bool device_has_rmrr(struct pci_dev *dev) { struct dmar_rmrr_unit *rmrr; + struct pci_dev *tmp; int i; for_each_rmrr_units(rmrr) { - for (i = 0; i < rmrr->devices_cnt; i++) { - /* - * Return TRUE if this RMRR contains the device that - * is passed in. - */ - if (rmrr->devices[i] == dev) + /* + * Return TRUE if this RMRR contains the device that + * is passed in. + */ + for_each_active_dev_scope(rmrr->devices, + rmrr->devices_cnt, i, tmp) + if (tmp == dev) { return true; - } + } } return false; } @@ -2593,14 +2597,9 @@ static int __init init_dmars(void) */ printk(KERN_INFO "IOMMU: Setting RMRR:\n"); for_each_rmrr_units(rmrr) { - for (i = 0; i < rmrr->devices_cnt; i++) { - pdev = rmrr->devices[i]; - /* - * some BIOS lists non-exist devices in DMAR - * table. - */ - if (!pdev) - continue; + /* some BIOS lists non-exist devices in DMAR table. */ + for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, + i, pdev) { ret = iommu_prepare_rmrr_dev(rmrr, pdev); if (ret) printk(KERN_ERR @@ -3288,13 +3287,14 @@ DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quir static void __init init_no_remapping_devices(void) { struct dmar_drhd_unit *drhd; + struct pci_dev *dev; + int i; for_each_drhd_unit(drhd) { if (!drhd->include_all) { - int i; - for (i = 0; i < drhd->devices_cnt; i++) - if (drhd->devices[i] != NULL) - break; + for_each_active_dev_scope(drhd->devices, + drhd->devices_cnt, i, dev) + break; /* ignore DMAR unit if no pci devices exist */ if (i == drhd->devices_cnt) drhd->ignored = 1; @@ -3302,15 +3302,13 @@ static void __init init_no_remapping_devices(void) } for_each_active_drhd_unit(drhd) { - int i; if (drhd->include_all) continue; - for (i = 0; i < drhd->devices_cnt; i++) - if (drhd->devices[i] && - !IS_GFX_DEVICE(drhd->devices[i])) + for_each_active_dev_scope(drhd->devices, + drhd->devices_cnt, i, dev) + if (!IS_GFX_DEVICE(dev)) break; - if (i < drhd->devices_cnt) continue; @@ -3320,11 +3318,9 @@ static void __init init_no_remapping_devices(void) intel_iommu_gfx_mapped = 1; } else { drhd->ignored = 1; - for (i = 0; i < drhd->devices_cnt; i++) { - if (!drhd->devices[i]) - continue; - drhd->devices[i]->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; - } + for_each_active_dev_scope(drhd->devices, + drhd->devices_cnt, i, dev) + dev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; } } } @@ -3560,9 +3556,9 @@ static void intel_iommu_free_dmars(void) int dmar_find_matched_atsr_unit(struct pci_dev *dev) { - int i; + int i, ret = 1; struct pci_bus *bus; - struct pci_dev *bridge = NULL; + struct pci_dev *bridge = NULL, *tmp; struct acpi_dmar_atsr *atsr; struct dmar_atsr_unit *atsru; @@ -3583,22 +3579,24 @@ int dmar_find_matched_atsr_unit(struct pci_dev *dev) if (atsr->segment != pci_domain_nr(dev->bus)) continue; - for (i = 0; i < atsru->devices_cnt; i++) - if (atsru->devices[i] == bridge) - return 1; + for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp) + if (tmp == bridge) + goto out; if (atsru->include_all) - return 1; + goto out; } + ret = 0; +out: - return 0; + return ret; } int __init dmar_parse_rmrr_atsr_dev(void) { struct dmar_rmrr_unit *rmrr; struct dmar_atsr_unit *atsr; - int ret = 0; + int ret; list_for_each_entry(rmrr, &dmar_rmrr_units, list) { ret = rmrr_parse_dev(rmrr); @@ -3612,7 +3610,7 @@ int __init dmar_parse_rmrr_atsr_dev(void) return ret; } - return ret; + return 0; } /* diff --git a/include/linux/dmar.h b/include/linux/dmar.h index ea599d4ca9e0..4b77fd8fde76 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -65,6 +65,12 @@ extern struct list_head dmar_drhd_units; list_for_each_entry(drhd, &dmar_drhd_units, list) \ if (i=drhd->iommu, 0) {} else +#define for_each_dev_scope(a, c, p, d) \ + for ((p) = 0; ((d) = (p) < (c) ? (a)[(p)] : NULL, (p) < (c)); (p)++) + +#define for_each_active_dev_scope(a, c, p, d) \ + for_each_dev_scope((a), (c), (p), (d)) if (!(d)) { continue; } else + extern int dmar_table_init(void); extern int dmar_dev_scope_init(void); extern int dmar_parse_dev_scope(void *start, void *end, int *cnt, From 3a5670e8ac932c10a3e50d9dc0ab1da4cc3041d7 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 19 Feb 2014 14:07:33 +0800 Subject: [PATCH 28/75] iommu/vt-d: Introduce a rwsem to protect global data structures Introduce a global rwsem dmar_global_lock, which will be used to protect DMAR related global data structures from DMAR/PCI/memory device hotplug operations in process context. DMA and interrupt remapping related data structures are read most, and only change when memory/PCI/DMAR hotplug event happens. So a global rwsem solution is adopted for balance between simplicity and performance. For interrupt remapping driver, function intel_irq_remapping_supported(), dmar_table_init(), intel_enable_irq_remapping(), disable_irq_remapping(), reenable_irq_remapping() and enable_drhd_fault_handling() etc are called during booting, suspending and resuming with interrupt disabled, so no need to take the global lock. For interrupt remapping entry allocation, the locking model is: down_read(&dmar_global_lock); /* Find corresponding iommu */ iommu = map_hpet_to_ir(id); if (iommu) /* * Allocate remapping entry and mark entry busy, * the IOMMU won't be hot-removed until the * allocated entry has been released. */ index = alloc_irte(iommu, irq, 1); up_read(&dmar_global_lock); For DMA remmaping driver, we only uses the dmar_global_lock rwsem to protect functions which are only called in process context. For any function which may be called in interrupt context, we will use RCU to protect them in following patches. Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 19 ++++- drivers/iommu/intel-iommu.c | 22 +++--- drivers/iommu/intel_irq_remapping.c | 108 ++++++++++++++++++---------- include/linux/dmar.h | 2 + 4 files changed, 103 insertions(+), 48 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 4ae6df27ad5d..c9aca8841fa0 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -43,10 +43,19 @@ #include "irq_remapping.h" -/* No locks are needed as DMA remapping hardware unit - * list is constructed at boot time and hotplug of - * these units are not supported by the architecture. +/* + * Assumptions: + * 1) The hotplug framework guarentees that DMAR unit will be hot-added + * before IO devices managed by that unit. + * 2) The hotplug framework guarantees that DMAR unit will be hot-removed + * after IO devices managed by that unit. + * 3) Hotplug events are rare. + * + * Locking rules for DMA and interrupt remapping related global data structures: + * 1) Use dmar_global_lock in process context + * 2) Use RCU in interrupt context */ +DECLARE_RWSEM(dmar_global_lock); LIST_HEAD(dmar_drhd_units); struct acpi_table_header * __initdata dmar_tbl; @@ -565,6 +574,7 @@ int __init detect_intel_iommu(void) { int ret; + down_write(&dmar_global_lock); ret = dmar_table_detect(); if (ret) ret = check_zero_address(); @@ -582,6 +592,7 @@ int __init detect_intel_iommu(void) } early_acpi_os_unmap_memory((void __iomem *)dmar_tbl, dmar_tbl_size); dmar_tbl = NULL; + up_write(&dmar_global_lock); return ret ? 1 : -ENODEV; } @@ -1394,10 +1405,12 @@ static int __init dmar_free_unused_resources(void) if (irq_remapping_enabled || intel_iommu_enabled) return 0; + down_write(&dmar_global_lock); list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) { list_del(&dmaru->list); dmar_free_drhd(dmaru); } + up_write(&dmar_global_lock); return 0; } diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index bb98e37f2cf7..50d639a2df88 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3637,11 +3637,13 @@ static int device_notifier(struct notifier_block *nb, if (!domain) return 0; + down_read(&dmar_global_lock); domain_remove_one_dev_info(domain, pdev); if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) && !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) && list_empty(&domain->devices)) domain_exit(domain); + up_read(&dmar_global_lock); return 0; } @@ -3659,6 +3661,13 @@ int __init intel_iommu_init(void) /* VT-d is required for a TXT/tboot launch, so enforce that */ force_on = tboot_force_iommu(); + if (iommu_init_mempool()) { + if (force_on) + panic("tboot: Failed to initialize iommu memory\n"); + return -ENOMEM; + } + + down_write(&dmar_global_lock); if (dmar_table_init()) { if (force_on) panic("tboot: Failed to initialize DMAR table\n"); @@ -3681,12 +3690,6 @@ int __init intel_iommu_init(void) if (no_iommu || dmar_disabled) goto out_free_dmar; - if (iommu_init_mempool()) { - if (force_on) - panic("tboot: Failed to initialize iommu memory\n"); - goto out_free_dmar; - } - if (list_empty(&dmar_rmrr_units)) printk(KERN_INFO "DMAR: No RMRR found\n"); @@ -3696,7 +3699,7 @@ int __init intel_iommu_init(void) if (dmar_init_reserved_ranges()) { if (force_on) panic("tboot: Failed to reserve iommu ranges\n"); - goto out_free_mempool; + goto out_free_reserved_range; } init_no_remapping_devices(); @@ -3708,6 +3711,7 @@ int __init intel_iommu_init(void) printk(KERN_ERR "IOMMU: dmar init failed\n"); goto out_free_reserved_range; } + up_write(&dmar_global_lock); printk(KERN_INFO "PCI-DMA: Intel(R) Virtualization Technology for Directed I/O\n"); @@ -3729,10 +3733,10 @@ int __init intel_iommu_init(void) out_free_reserved_range: put_iova_domain(&reserved_iova_list); -out_free_mempool: - iommu_exit_mempool(); out_free_dmar: intel_iommu_free_dmars(); + up_write(&dmar_global_lock); + iommu_exit_mempool(); return ret; } diff --git a/drivers/iommu/intel_irq_remapping.c b/drivers/iommu/intel_irq_remapping.c index ef5f65dbafe9..9b174893f0f5 100644 --- a/drivers/iommu/intel_irq_remapping.c +++ b/drivers/iommu/intel_irq_remapping.c @@ -38,6 +38,17 @@ static struct ioapic_scope ir_ioapic[MAX_IO_APICS]; static struct hpet_scope ir_hpet[MAX_HPET_TBS]; static int ir_ioapic_num, ir_hpet_num; +/* + * Lock ordering: + * ->dmar_global_lock + * ->irq_2_ir_lock + * ->qi->q_lock + * ->iommu->register_lock + * Note: + * intel_irq_remap_ops.{supported,prepare,enable,disable,reenable} are called + * in single-threaded environment with interrupt disabled, so no need to tabke + * the dmar_global_lock. + */ static DEFINE_RAW_SPINLOCK(irq_2_ir_lock); static int __init parse_ioapics_under_ir(void); @@ -307,12 +318,14 @@ static int set_ioapic_sid(struct irte *irte, int apic) if (!irte) return -1; + down_read(&dmar_global_lock); for (i = 0; i < MAX_IO_APICS; i++) { if (ir_ioapic[i].id == apic) { sid = (ir_ioapic[i].bus << 8) | ir_ioapic[i].devfn; break; } } + up_read(&dmar_global_lock); if (sid == 0) { pr_warning("Failed to set source-id of IOAPIC (%d)\n", apic); @@ -332,12 +345,14 @@ static int set_hpet_sid(struct irte *irte, u8 id) if (!irte) return -1; + down_read(&dmar_global_lock); for (i = 0; i < MAX_HPET_TBS; i++) { if (ir_hpet[i].id == id) { sid = (ir_hpet[i].bus << 8) | ir_hpet[i].devfn; break; } } + up_read(&dmar_global_lock); if (sid == 0) { pr_warning("Failed to set source-id of HPET block (%d)\n", id); @@ -794,10 +809,16 @@ static int __init parse_ioapics_under_ir(void) static int __init ir_dev_scope_init(void) { + int ret; + if (!irq_remapping_enabled) return 0; - return dmar_dev_scope_init(); + down_write(&dmar_global_lock); + ret = dmar_dev_scope_init(); + up_write(&dmar_global_lock); + + return ret; } rootfs_initcall(ir_dev_scope_init); @@ -878,23 +899,27 @@ static int intel_setup_ioapic_entry(int irq, struct io_apic_irq_attr *attr) { int ioapic_id = mpc_ioapic_id(attr->ioapic); - struct intel_iommu *iommu = map_ioapic_to_ir(ioapic_id); + struct intel_iommu *iommu; struct IR_IO_APIC_route_entry *entry; struct irte irte; int index; + down_read(&dmar_global_lock); + iommu = map_ioapic_to_ir(ioapic_id); if (!iommu) { pr_warn("No mapping iommu for ioapic %d\n", ioapic_id); - return -ENODEV; - } - - entry = (struct IR_IO_APIC_route_entry *)route_entry; - - index = alloc_irte(iommu, irq, 1); - if (index < 0) { - pr_warn("Failed to allocate IRTE for ioapic %d\n", ioapic_id); - return -ENOMEM; + index = -ENODEV; + } else { + index = alloc_irte(iommu, irq, 1); + if (index < 0) { + pr_warn("Failed to allocate IRTE for ioapic %d\n", + ioapic_id); + index = -ENOMEM; + } } + up_read(&dmar_global_lock); + if (index < 0) + return index; prepare_irte(&irte, vector, destination); @@ -913,6 +938,7 @@ static int intel_setup_ioapic_entry(int irq, irte.avail, irte.vector, irte.dest_id, irte.sid, irte.sq, irte.svt); + entry = (struct IR_IO_APIC_route_entry *)route_entry; memset(entry, 0, sizeof(*entry)); entry->index2 = (index >> 15) & 0x1; @@ -1043,20 +1069,23 @@ static int intel_msi_alloc_irq(struct pci_dev *dev, int irq, int nvec) struct intel_iommu *iommu; int index; + down_read(&dmar_global_lock); iommu = map_dev_to_ir(dev); if (!iommu) { printk(KERN_ERR "Unable to map PCI %s to iommu\n", pci_name(dev)); - return -ENOENT; + index = -ENOENT; + } else { + index = alloc_irte(iommu, irq, nvec); + if (index < 0) { + printk(KERN_ERR + "Unable to allocate %d IRTE for PCI %s\n", + nvec, pci_name(dev)); + index = -ENOSPC; + } } + up_read(&dmar_global_lock); - index = alloc_irte(iommu, irq, nvec); - if (index < 0) { - printk(KERN_ERR - "Unable to allocate %d IRTE for PCI %s\n", nvec, - pci_name(dev)); - return -ENOSPC; - } return index; } @@ -1064,33 +1093,40 @@ static int intel_msi_setup_irq(struct pci_dev *pdev, unsigned int irq, int index, int sub_handle) { struct intel_iommu *iommu; + int ret = -ENOENT; + down_read(&dmar_global_lock); iommu = map_dev_to_ir(pdev); - if (!iommu) - return -ENOENT; - /* - * setup the mapping between the irq and the IRTE - * base index, the sub_handle pointing to the - * appropriate interrupt remap table entry. - */ - set_irte_irq(irq, iommu, index, sub_handle); + if (iommu) { + /* + * setup the mapping between the irq and the IRTE + * base index, the sub_handle pointing to the + * appropriate interrupt remap table entry. + */ + set_irte_irq(irq, iommu, index, sub_handle); + ret = 0; + } + up_read(&dmar_global_lock); - return 0; + return ret; } static int intel_setup_hpet_msi(unsigned int irq, unsigned int id) { - struct intel_iommu *iommu = map_hpet_to_ir(id); + int ret = -1; + struct intel_iommu *iommu; int index; - if (!iommu) - return -1; + down_read(&dmar_global_lock); + iommu = map_hpet_to_ir(id); + if (iommu) { + index = alloc_irte(iommu, irq, 1); + if (index >= 0) + ret = 0; + } + up_read(&dmar_global_lock); - index = alloc_irte(iommu, irq, 1); - if (index < 0) - return -1; - - return 0; + return ret; } struct irq_remap_ops intel_irq_remap_ops = { diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 4b77fd8fde76..8f06a0135a84 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -25,6 +25,7 @@ #include #include #include +#include struct acpi_dmar_header; @@ -48,6 +49,7 @@ struct dmar_drhd_unit { struct intel_iommu *iommu; }; +extern struct rw_semaphore dmar_global_lock; extern struct list_head dmar_drhd_units; #define for_each_drhd_unit(drhd) \ From 0e242612d9cdb46e878ed1f126c78fe68492af00 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 19 Feb 2014 14:07:34 +0800 Subject: [PATCH 29/75] iommu/vt-d: Use RCU to protect global resources in interrupt context Global DMA and interrupt remapping resources may be accessed in interrupt context, so use RCU instead of rwsem to protect them in such cases. Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 33 ++++++++++++++++++++------------- drivers/iommu/intel-iommu.c | 20 ++++++++++++++++---- include/linux/dmar.h | 23 +++++++++++++++++------ 3 files changed, 53 insertions(+), 23 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index c9aca8841fa0..6e4d851991f1 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -71,13 +71,13 @@ static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd) * the very end. */ if (drhd->include_all) - list_add_tail(&drhd->list, &dmar_drhd_units); + list_add_tail_rcu(&drhd->list, &dmar_drhd_units); else - list_add(&drhd->list, &dmar_drhd_units); + list_add_rcu(&drhd->list, &dmar_drhd_units); } static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope, - struct pci_dev **dev, u16 segment) + struct pci_dev __rcu **dev, u16 segment) { struct pci_bus *bus; struct pci_dev *pdev = NULL; @@ -122,7 +122,9 @@ static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope, pci_name(pdev)); return -EINVAL; } - *dev = pdev; + + rcu_assign_pointer(*dev, pdev); + return 0; } @@ -149,7 +151,7 @@ void *dmar_alloc_dev_scope(void *start, void *end, int *cnt) } int __init dmar_parse_dev_scope(void *start, void *end, int *cnt, - struct pci_dev ***devices, u16 segment) + struct pci_dev __rcu ***devices, u16 segment) { struct acpi_dmar_device_scope *scope; int index, ret; @@ -177,7 +179,7 @@ int __init dmar_parse_dev_scope(void *start, void *end, int *cnt, return 0; } -void dmar_free_dev_scope(struct pci_dev ***devices, int *cnt) +void dmar_free_dev_scope(struct pci_dev __rcu ***devices, int *cnt) { int i; struct pci_dev *tmp_dev; @@ -186,9 +188,10 @@ void dmar_free_dev_scope(struct pci_dev ***devices, int *cnt) for_each_active_dev_scope(*devices, *cnt, i, tmp_dev) pci_dev_put(tmp_dev); kfree(*devices); - *devices = NULL; - *cnt = 0; } + + *devices = NULL; + *cnt = 0; } /** @@ -410,7 +413,7 @@ parse_dmar_table(void) return ret; } -static int dmar_pci_device_match(struct pci_dev *devices[], int cnt, +static int dmar_pci_device_match(struct pci_dev __rcu *devices[], int cnt, struct pci_dev *dev) { int index; @@ -431,11 +434,12 @@ static int dmar_pci_device_match(struct pci_dev *devices[], int cnt, struct dmar_drhd_unit * dmar_find_matched_drhd_unit(struct pci_dev *dev) { - struct dmar_drhd_unit *dmaru = NULL; + struct dmar_drhd_unit *dmaru; struct acpi_dmar_hardware_unit *drhd; dev = pci_physfn(dev); + rcu_read_lock(); for_each_drhd_unit(dmaru) { drhd = container_of(dmaru->hdr, struct acpi_dmar_hardware_unit, @@ -443,14 +447,17 @@ dmar_find_matched_drhd_unit(struct pci_dev *dev) if (dmaru->include_all && drhd->segment == pci_domain_nr(dev->bus)) - return dmaru; + goto out; if (dmar_pci_device_match(dmaru->devices, dmaru->devices_cnt, dev)) - return dmaru; + goto out; } + dmaru = NULL; +out: + rcu_read_unlock(); - return NULL; + return dmaru; } int __init dmar_dev_scope_init(void) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 50d639a2df88..e1679a6fe468 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -385,14 +385,14 @@ struct dmar_rmrr_unit { struct acpi_dmar_header *hdr; /* ACPI header */ u64 base_address; /* reserved base address*/ u64 end_address; /* reserved end address */ - struct pci_dev **devices; /* target devices */ + struct pci_dev __rcu **devices; /* target devices */ int devices_cnt; /* target device count */ }; struct dmar_atsr_unit { struct list_head list; /* list of ATSR units */ struct acpi_dmar_header *hdr; /* ACPI header */ - struct pci_dev **devices; /* target devices */ + struct pci_dev __rcu **devices; /* target devices */ int devices_cnt; /* target device count */ u8 include_all:1; /* include all ports */ }; @@ -634,12 +634,15 @@ static void domain_update_iommu_superpage(struct dmar_domain *domain) } /* set iommu_superpage to the smallest common denominator */ + rcu_read_lock(); for_each_active_iommu(iommu, drhd) { mask &= cap_super_page_val(iommu->cap); if (!mask) { break; } } + rcu_read_unlock(); + domain->iommu_superpage = fls(mask); } @@ -658,6 +661,7 @@ static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn) struct pci_dev *dev; int i; + rcu_read_lock(); for_each_active_iommu(iommu, drhd) { if (segment != drhd->segment) continue; @@ -677,6 +681,7 @@ static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn) } iommu = NULL; out: + rcu_read_unlock(); return iommu; } @@ -1535,10 +1540,12 @@ static void domain_exit(struct dmar_domain *domain) dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); /* clear attached or cached domains */ + rcu_read_lock(); for_each_active_iommu(iommu, drhd) if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE || test_bit(iommu->seq_id, domain->iommu_bmp)) iommu_detach_domain(domain, iommu); + rcu_read_unlock(); free_domain_mem(domain); } @@ -2338,6 +2345,7 @@ static bool device_has_rmrr(struct pci_dev *dev) struct pci_dev *tmp; int i; + rcu_read_lock(); for_each_rmrr_units(rmrr) { /* * Return TRUE if this RMRR contains the device that @@ -2346,9 +2354,11 @@ static bool device_has_rmrr(struct pci_dev *dev) for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, i, tmp) if (tmp == dev) { + rcu_read_unlock(); return true; } } + rcu_read_unlock(); return false; } @@ -3512,7 +3522,7 @@ int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr) atsru->hdr = hdr; atsru->include_all = atsr->flags & 0x1; - list_add(&atsru->list, &dmar_atsr_units); + list_add_rcu(&atsru->list, &dmar_atsr_units); return 0; } @@ -3574,6 +3584,7 @@ int dmar_find_matched_atsr_unit(struct pci_dev *dev) if (!bridge) return 0; + rcu_read_lock(); list_for_each_entry_rcu(atsru, &dmar_atsr_units, list) { atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); if (atsr->segment != pci_domain_nr(dev->bus)) @@ -3588,6 +3599,7 @@ int dmar_find_matched_atsr_unit(struct pci_dev *dev) } ret = 0; out: + rcu_read_unlock(); return ret; } @@ -3604,7 +3616,7 @@ int __init dmar_parse_rmrr_atsr_dev(void) return ret; } - list_for_each_entry(atsr, &dmar_atsr_units, list) { + list_for_each_entry_rcu(atsr, &dmar_atsr_units, list) { ret = atsr_parse_dev(atsr); if (ret) return ret; diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 8f06a0135a84..bedebab934b4 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -26,6 +26,7 @@ #include #include #include +#include struct acpi_dmar_header; @@ -41,7 +42,7 @@ struct dmar_drhd_unit { struct list_head list; /* list of drhd units */ struct acpi_dmar_header *hdr; /* ACPI header */ u64 reg_base_addr; /* register base address*/ - struct pci_dev **devices; /* target device array */ + struct pci_dev __rcu **devices;/* target device array */ int devices_cnt; /* target device count */ u16 segment; /* PCI domain */ u8 ignored:1; /* ignore drhd */ @@ -53,22 +54,31 @@ extern struct rw_semaphore dmar_global_lock; extern struct list_head dmar_drhd_units; #define for_each_drhd_unit(drhd) \ - list_for_each_entry(drhd, &dmar_drhd_units, list) + list_for_each_entry_rcu(drhd, &dmar_drhd_units, list) #define for_each_active_drhd_unit(drhd) \ - list_for_each_entry(drhd, &dmar_drhd_units, list) \ + list_for_each_entry_rcu(drhd, &dmar_drhd_units, list) \ if (drhd->ignored) {} else #define for_each_active_iommu(i, drhd) \ - list_for_each_entry(drhd, &dmar_drhd_units, list) \ + list_for_each_entry_rcu(drhd, &dmar_drhd_units, list) \ if (i=drhd->iommu, drhd->ignored) {} else #define for_each_iommu(i, drhd) \ - list_for_each_entry(drhd, &dmar_drhd_units, list) \ + list_for_each_entry_rcu(drhd, &dmar_drhd_units, list) \ if (i=drhd->iommu, 0) {} else +static inline bool dmar_rcu_check(void) +{ + return rwsem_is_locked(&dmar_global_lock) || + system_state == SYSTEM_BOOTING; +} + +#define dmar_rcu_dereference(p) rcu_dereference_check((p), dmar_rcu_check()) + #define for_each_dev_scope(a, c, p, d) \ - for ((p) = 0; ((d) = (p) < (c) ? (a)[(p)] : NULL, (p) < (c)); (p)++) + for ((p) = 0; ((d) = (p) < (c) ? dmar_rcu_dereference((a)[(p)]) : \ + NULL, (p) < (c)); (p)++) #define for_each_active_dev_scope(a, c, p, d) \ for_each_dev_scope((a), (c), (p), (d)) if (!(d)) { continue; } else @@ -78,6 +88,7 @@ extern int dmar_dev_scope_init(void); extern int dmar_parse_dev_scope(void *start, void *end, int *cnt, struct pci_dev ***devices, u16 segment); extern void *dmar_alloc_dev_scope(void *start, void *end, int *cnt); +extern void dmar_free_dev_scope(struct pci_dev __rcu ***devices, int *cnt); extern void dmar_free_dev_scope(struct pci_dev ***devices, int *cnt); /* Intel IOMMU detection */ From 59ce0515cdaf3b7d47893d12f61e51d691863788 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 19 Feb 2014 14:07:35 +0800 Subject: [PATCH 30/75] iommu/vt-d: Update DRHD/RMRR/ATSR device scope caches when PCI hotplug happens Current Intel DMAR/IOMMU driver assumes that all PCI devices associated with DMAR/RMRR/ATSR device scope arrays are created at boot time and won't change at runtime, so it caches pointers of associated PCI device object. That assumption may be wrong now due to: 1) introduction of PCI host bridge hotplug 2) PCI device hotplug through sysfs interfaces. Wang Yijing has tried to solve this issue by caching tupple instead of the PCI device object pointer, but that's still unreliable because PCI bus number may change in case of hotplug. Please refer to http://lkml.org/lkml/2013/11/5/64 Message from Yingjing's mail: after remove and rescan a pci device [ 611.857095] dmar: DRHD: handling fault status reg 2 [ 611.857109] dmar: DMAR:[DMA Read] Request device [86:00.3] fault addr ffff7000 [ 611.857109] DMAR:[fault reason 02] Present bit in context entry is clear [ 611.857524] dmar: DRHD: handling fault status reg 102 [ 611.857534] dmar: DMAR:[DMA Read] Request device [86:00.3] fault addr ffff6000 [ 611.857534] DMAR:[fault reason 02] Present bit in context entry is clear [ 611.857936] dmar: DRHD: handling fault status reg 202 [ 611.857947] dmar: DMAR:[DMA Read] Request device [86:00.3] fault addr ffff5000 [ 611.857947] DMAR:[fault reason 02] Present bit in context entry is clear [ 611.858351] dmar: DRHD: handling fault status reg 302 [ 611.858362] dmar: DMAR:[DMA Read] Request device [86:00.3] fault addr ffff4000 [ 611.858362] DMAR:[fault reason 02] Present bit in context entry is clear [ 611.860819] IPv6: ADDRCONF(NETDEV_UP): eth3: link is not ready [ 611.860983] dmar: DRHD: handling fault status reg 402 [ 611.860995] dmar: INTR-REMAP: Request device [[86:00.3] fault index a4 [ 611.860995] INTR-REMAP:[fault reason 34] Present field in the IRTE entry is clear This patch introduces a new mechanism to update the DRHD/RMRR/ATSR device scope caches by hooking PCI bus notification. Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 207 ++++++++++++++++++++++++++++++++++++ drivers/iommu/intel-iommu.c | 54 ++++++++++ include/linux/dmar.h | 24 ++++- 3 files changed, 283 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 6e4d851991f1..bf6bfd1f69aa 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -194,6 +194,209 @@ void dmar_free_dev_scope(struct pci_dev __rcu ***devices, int *cnt) *cnt = 0; } +/* Optimize out kzalloc()/kfree() for normal cases */ +static char dmar_pci_notify_info_buf[64]; + +static struct dmar_pci_notify_info * +dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event) +{ + int level = 0; + size_t size; + struct pci_dev *tmp; + struct dmar_pci_notify_info *info; + + BUG_ON(dev->is_virtfn); + + /* Only generate path[] for device addition event */ + if (event == BUS_NOTIFY_ADD_DEVICE) + for (tmp = dev; tmp; tmp = tmp->bus->self) + level++; + + size = sizeof(*info) + level * sizeof(struct acpi_dmar_pci_path); + if (size <= sizeof(dmar_pci_notify_info_buf)) { + info = (struct dmar_pci_notify_info *)dmar_pci_notify_info_buf; + } else { + info = kzalloc(size, GFP_KERNEL); + if (!info) { + pr_warn("Out of memory when allocating notify_info " + "for %s.\n", pci_name(dev)); + return NULL; + } + } + + info->event = event; + info->dev = dev; + info->seg = pci_domain_nr(dev->bus); + info->level = level; + if (event == BUS_NOTIFY_ADD_DEVICE) { + for (tmp = dev, level--; tmp; tmp = tmp->bus->self) { + info->path[level].device = PCI_SLOT(tmp->devfn); + info->path[level].function = PCI_FUNC(tmp->devfn); + if (pci_is_root_bus(tmp->bus)) + info->bus = tmp->bus->number; + } + } + + return info; +} + +static inline void dmar_free_pci_notify_info(struct dmar_pci_notify_info *info) +{ + if ((void *)info != dmar_pci_notify_info_buf) + kfree(info); +} + +static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus, + struct acpi_dmar_pci_path *path, int count) +{ + int i; + + if (info->bus != bus) + return false; + if (info->level != count) + return false; + + for (i = 0; i < count; i++) { + if (path[i].device != info->path[i].device || + path[i].function != info->path[i].function) + return false; + } + + return true; +} + +/* Return: > 0 if match found, 0 if no match found, < 0 if error happens */ +int dmar_insert_dev_scope(struct dmar_pci_notify_info *info, + void *start, void*end, u16 segment, + struct pci_dev __rcu **devices, int devices_cnt) +{ + int i, level; + struct pci_dev *tmp, *dev = info->dev; + struct acpi_dmar_device_scope *scope; + struct acpi_dmar_pci_path *path; + + if (segment != info->seg) + return 0; + + for (; start < end; start += scope->length) { + scope = start; + if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ENDPOINT && + scope->entry_type != ACPI_DMAR_SCOPE_TYPE_BRIDGE) + continue; + + path = (struct acpi_dmar_pci_path *)(scope + 1); + level = (scope->length - sizeof(*scope)) / sizeof(*path); + if (!dmar_match_pci_path(info, scope->bus, path, level)) + continue; + + if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT) ^ + (dev->hdr_type == PCI_HEADER_TYPE_NORMAL)) { + pr_warn("Device scope type does not match for %s\n", + pci_name(dev)); + return -EINVAL; + } + + for_each_dev_scope(devices, devices_cnt, i, tmp) + if (tmp == NULL) { + rcu_assign_pointer(devices[i], + pci_dev_get(dev)); + return 1; + } + BUG_ON(i >= devices_cnt); + } + + return 0; +} + +int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment, + struct pci_dev __rcu **devices, int count) +{ + int index; + struct pci_dev *tmp; + + if (info->seg != segment) + return 0; + + for_each_active_dev_scope(devices, count, index, tmp) + if (tmp == info->dev) { + rcu_assign_pointer(devices[index], NULL); + synchronize_rcu(); + pci_dev_put(tmp); + return 1; + } + + return 0; +} + +static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info) +{ + int ret = 0; + struct dmar_drhd_unit *dmaru; + struct acpi_dmar_hardware_unit *drhd; + + for_each_drhd_unit(dmaru) { + if (dmaru->include_all) + continue; + + drhd = container_of(dmaru->hdr, + struct acpi_dmar_hardware_unit, header); + ret = dmar_insert_dev_scope(info, (void *)(drhd + 1), + ((void *)drhd) + drhd->header.length, + dmaru->segment, + dmaru->devices, dmaru->devices_cnt); + if (ret != 0) + break; + } + if (ret >= 0) + ret = dmar_iommu_notify_scope_dev(info); + + return ret; +} + +static void dmar_pci_bus_del_dev(struct dmar_pci_notify_info *info) +{ + struct dmar_drhd_unit *dmaru; + + for_each_drhd_unit(dmaru) + if (dmar_remove_dev_scope(info, dmaru->segment, + dmaru->devices, dmaru->devices_cnt)) + break; + dmar_iommu_notify_scope_dev(info); +} + +static int dmar_pci_bus_notifier(struct notifier_block *nb, + unsigned long action, void *data) +{ + struct pci_dev *pdev = to_pci_dev(data); + struct dmar_pci_notify_info *info; + + /* Only care about add/remove events for physical functions */ + if (pdev->is_virtfn) + return NOTIFY_DONE; + if (action != BUS_NOTIFY_ADD_DEVICE && action != BUS_NOTIFY_DEL_DEVICE) + return NOTIFY_DONE; + + info = dmar_alloc_pci_notify_info(pdev, action); + if (!info) + return NOTIFY_DONE; + + down_write(&dmar_global_lock); + if (action == BUS_NOTIFY_ADD_DEVICE) + dmar_pci_bus_add_dev(info); + else if (action == BUS_NOTIFY_DEL_DEVICE) + dmar_pci_bus_del_dev(info); + up_write(&dmar_global_lock); + + dmar_free_pci_notify_info(info); + + return NOTIFY_OK; +} + +static struct notifier_block dmar_pci_bus_nb = { + .notifier_call = dmar_pci_bus_notifier, + .priority = INT_MIN, +}; + /** * dmar_parse_one_drhd - parses exactly one DMA remapping hardware definition * structure which uniquely represent one DMA remapping hardware unit @@ -482,6 +685,8 @@ int __init dmar_dev_scope_init(void) if (ret) goto fail; + bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb); + dmar_dev_scope_initialized = 1; return 0; @@ -1412,6 +1617,8 @@ static int __init dmar_free_unused_resources(void) if (irq_remapping_enabled || intel_iommu_enabled) return 0; + bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb); + down_write(&dmar_global_lock); list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) { list_del(&dmaru->list); diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index e1679a6fe468..d9c0dc5a5d35 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3625,6 +3625,60 @@ int __init dmar_parse_rmrr_atsr_dev(void) return 0; } +int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) +{ + int ret = 0; + struct dmar_rmrr_unit *rmrru; + struct dmar_atsr_unit *atsru; + struct acpi_dmar_atsr *atsr; + struct acpi_dmar_reserved_memory *rmrr; + + if (!intel_iommu_enabled && system_state != SYSTEM_BOOTING) + return 0; + + list_for_each_entry(rmrru, &dmar_rmrr_units, list) { + rmrr = container_of(rmrru->hdr, + struct acpi_dmar_reserved_memory, header); + if (info->event == BUS_NOTIFY_ADD_DEVICE) { + ret = dmar_insert_dev_scope(info, (void *)(rmrr + 1), + ((void *)rmrr) + rmrr->header.length, + rmrr->segment, rmrru->devices, + rmrru->devices_cnt); + if (ret > 0) + break; + else if(ret < 0) + return ret; + } else if (info->event == BUS_NOTIFY_DEL_DEVICE) { + if (dmar_remove_dev_scope(info, rmrr->segment, + rmrru->devices, rmrru->devices_cnt)) + break; + } + } + + list_for_each_entry(atsru, &dmar_atsr_units, list) { + if (atsru->include_all) + continue; + + atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); + if (info->event == BUS_NOTIFY_ADD_DEVICE) { + ret = dmar_insert_dev_scope(info, (void *)(atsr + 1), + (void *)atsr + atsr->header.length, + atsr->segment, atsru->devices, + atsru->devices_cnt); + if (ret > 0) + break; + else if(ret < 0) + return ret; + } else if (info->event == BUS_NOTIFY_DEL_DEVICE) { + if (dmar_remove_dev_scope(info, atsr->segment, + atsru->devices, atsru->devices_cnt)) + break; + } + } + + return 0; +} + /* * Here we only respond to action of unbound device from driver. * diff --git a/include/linux/dmar.h b/include/linux/dmar.h index bedebab934b4..4e196430f1b2 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -50,6 +50,15 @@ struct dmar_drhd_unit { struct intel_iommu *iommu; }; +struct dmar_pci_notify_info { + struct pci_dev *dev; + unsigned long event; + int bus; + u16 seg; + u16 level; + struct acpi_dmar_pci_path path[]; +} __attribute__((packed)); + extern struct rw_semaphore dmar_global_lock; extern struct list_head dmar_drhd_units; @@ -89,12 +98,18 @@ extern int dmar_parse_dev_scope(void *start, void *end, int *cnt, struct pci_dev ***devices, u16 segment); extern void *dmar_alloc_dev_scope(void *start, void *end, int *cnt); extern void dmar_free_dev_scope(struct pci_dev __rcu ***devices, int *cnt); -extern void dmar_free_dev_scope(struct pci_dev ***devices, int *cnt); - +extern int dmar_insert_dev_scope(struct dmar_pci_notify_info *info, + void *start, void*end, u16 segment, + struct pci_dev __rcu **devices, + int devices_cnt); +extern int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, + u16 segment, struct pci_dev __rcu **devices, + int count); /* Intel IOMMU detection */ extern int detect_intel_iommu(void); extern int enable_drhd_fault_handling(void); #else +struct dmar_pci_notify_info; static inline int detect_intel_iommu(void) { return -ENODEV; @@ -161,6 +176,7 @@ extern int iommu_detected, no_iommu; extern int dmar_parse_rmrr_atsr_dev(void); extern int dmar_parse_one_rmrr(struct acpi_dmar_header *header); extern int dmar_parse_one_atsr(struct acpi_dmar_header *header); +extern int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info); extern int intel_iommu_init(void); #else /* !CONFIG_INTEL_IOMMU: */ static inline int intel_iommu_init(void) { return -ENODEV; } @@ -176,6 +192,10 @@ static inline int dmar_parse_rmrr_atsr_dev(void) { return 0; } +static inline int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) +{ + return 0; +} #endif /* CONFIG_INTEL_IOMMU */ #endif /* __DMAR_H__ */ From 2e45528930388658603ea24d49cf52867b928d3e Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 19 Feb 2014 14:07:36 +0800 Subject: [PATCH 31/75] iommu/vt-d: Unify the way to process DMAR device scope array Now we have a PCI bus notification based mechanism to update DMAR device scope array, we could extend the mechanism to support boot time initialization too, which will help to unify and simplify the implementation. Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 163 ++++++++++-------------------------- drivers/iommu/intel-iommu.c | 71 ++++------------ include/linux/dmar.h | 5 -- 3 files changed, 61 insertions(+), 178 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index bf6bfd1f69aa..b19f9f4c3584 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -60,6 +60,7 @@ LIST_HEAD(dmar_drhd_units); struct acpi_table_header * __initdata dmar_tbl; static acpi_size dmar_tbl_size; +static int dmar_dev_scope_status = 1; static int alloc_iommu(struct dmar_drhd_unit *drhd); static void free_iommu(struct intel_iommu *iommu); @@ -76,58 +77,6 @@ static void __init dmar_register_drhd_unit(struct dmar_drhd_unit *drhd) list_add_rcu(&drhd->list, &dmar_drhd_units); } -static int __init dmar_parse_one_dev_scope(struct acpi_dmar_device_scope *scope, - struct pci_dev __rcu **dev, u16 segment) -{ - struct pci_bus *bus; - struct pci_dev *pdev = NULL; - struct acpi_dmar_pci_path *path; - int count; - - bus = pci_find_bus(segment, scope->bus); - path = (struct acpi_dmar_pci_path *)(scope + 1); - count = (scope->length - sizeof(struct acpi_dmar_device_scope)) - / sizeof(struct acpi_dmar_pci_path); - - while (count) { - if (pdev) - pci_dev_put(pdev); - /* - * Some BIOSes list non-exist devices in DMAR table, just - * ignore it - */ - if (!bus) { - pr_warn("Device scope bus [%d] not found\n", scope->bus); - break; - } - pdev = pci_get_slot(bus, PCI_DEVFN(path->device, path->function)); - if (!pdev) { - /* warning will be printed below */ - break; - } - path ++; - count --; - bus = pdev->subordinate; - } - if (!pdev) { - pr_warn("Device scope device [%04x:%02x:%02x.%02x] not found\n", - segment, scope->bus, path->device, path->function); - return 0; - } - if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT && \ - pdev->subordinate) || (scope->entry_type == \ - ACPI_DMAR_SCOPE_TYPE_BRIDGE && !pdev->subordinate)) { - pci_dev_put(pdev); - pr_warn("Device scope type does not match for %s\n", - pci_name(pdev)); - return -EINVAL; - } - - rcu_assign_pointer(*dev, pdev); - - return 0; -} - void *dmar_alloc_dev_scope(void *start, void *end, int *cnt) { struct acpi_dmar_device_scope *scope; @@ -150,35 +99,6 @@ void *dmar_alloc_dev_scope(void *start, void *end, int *cnt) return kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL); } -int __init dmar_parse_dev_scope(void *start, void *end, int *cnt, - struct pci_dev __rcu ***devices, u16 segment) -{ - struct acpi_dmar_device_scope *scope; - int index, ret; - - *devices = dmar_alloc_dev_scope(start, end, cnt); - if (*cnt == 0) - return 0; - else if (!*devices) - return -ENOMEM; - - for (index = 0; start < end; start += scope->length) { - scope = start; - if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT || - scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) { - ret = dmar_parse_one_dev_scope(scope, - &(*devices)[index], segment); - if (ret) { - dmar_free_dev_scope(devices, cnt); - return ret; - } - index ++; - } - } - - return 0; -} - void dmar_free_dev_scope(struct pci_dev __rcu ***devices, int *cnt) { int i; @@ -220,6 +140,8 @@ dmar_alloc_pci_notify_info(struct pci_dev *dev, unsigned long event) if (!info) { pr_warn("Out of memory when allocating notify_info " "for %s.\n", pci_name(dev)); + if (dmar_dev_scope_status == 0) + dmar_dev_scope_status = -ENOMEM; return NULL; } } @@ -349,6 +271,8 @@ static int dmar_pci_bus_add_dev(struct dmar_pci_notify_info *info) } if (ret >= 0) ret = dmar_iommu_notify_scope_dev(info); + if (ret < 0 && dmar_dev_scope_status == 0) + dmar_dev_scope_status = ret; return ret; } @@ -418,9 +342,21 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header) dmaru->reg_base_addr = drhd->address; dmaru->segment = drhd->segment; dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */ + if (!dmaru->include_all) { + dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1), + ((void *)drhd) + drhd->header.length, + &dmaru->devices_cnt); + if (dmaru->devices_cnt && dmaru->devices == NULL) { + kfree(dmaru); + return -ENOMEM; + } + } ret = alloc_iommu(dmaru); if (ret) { + if (!dmaru->include_all) + dmar_free_dev_scope(&dmaru->devices, + &dmaru->devices_cnt); kfree(dmaru); return ret; } @@ -437,21 +373,6 @@ static void dmar_free_drhd(struct dmar_drhd_unit *dmaru) kfree(dmaru); } -static int __init dmar_parse_dev(struct dmar_drhd_unit *dmaru) -{ - struct acpi_dmar_hardware_unit *drhd; - - drhd = (struct acpi_dmar_hardware_unit *) dmaru->hdr; - - if (dmaru->include_all) - return 0; - - return dmar_parse_dev_scope((void *)(drhd + 1), - ((void *)drhd) + drhd->header.length, - &dmaru->devices_cnt, &dmaru->devices, - drhd->segment); -} - #ifdef CONFIG_ACPI_NUMA static int __init dmar_parse_one_rhsa(struct acpi_dmar_header *header) @@ -665,34 +586,35 @@ out: int __init dmar_dev_scope_init(void) { - static int dmar_dev_scope_initialized; - struct dmar_drhd_unit *drhd; - int ret = -ENODEV; + struct pci_dev *dev = NULL; + struct dmar_pci_notify_info *info; - if (dmar_dev_scope_initialized) - return dmar_dev_scope_initialized; + if (dmar_dev_scope_status != 1) + return dmar_dev_scope_status; - if (list_empty(&dmar_drhd_units)) - goto fail; + if (list_empty(&dmar_drhd_units)) { + dmar_dev_scope_status = -ENODEV; + } else { + dmar_dev_scope_status = 0; - for_each_drhd_unit(drhd) { - ret = dmar_parse_dev(drhd); - if (ret) - goto fail; + for_each_pci_dev(dev) { + if (dev->is_virtfn) + continue; + + info = dmar_alloc_pci_notify_info(dev, + BUS_NOTIFY_ADD_DEVICE); + if (!info) { + return dmar_dev_scope_status; + } else { + dmar_pci_bus_add_dev(info); + dmar_free_pci_notify_info(info); + } + } + + bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb); } - ret = dmar_parse_rmrr_atsr_dev(); - if (ret) - goto fail; - - bus_register_notifier(&pci_bus_type, &dmar_pci_bus_nb); - - dmar_dev_scope_initialized = 1; - return 0; - -fail: - dmar_dev_scope_initialized = ret; - return ret; + return dmar_dev_scope_status; } @@ -1617,7 +1539,8 @@ static int __init dmar_free_unused_resources(void) if (irq_remapping_enabled || intel_iommu_enabled) return 0; - bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb); + if (dmar_dev_scope_status != 1 && !list_empty(&dmar_drhd_units)) + bus_unregister_notifier(&pci_bus_type, &dmar_pci_bus_nb); down_write(&dmar_global_lock); list_for_each_entry_safe(dmaru, dmaru_n, &dmar_drhd_units, list) { diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index d9c0dc5a5d35..dd576c067d0d 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3473,11 +3473,6 @@ static void __init init_iommu_pm_ops(void) static inline void init_iommu_pm_ops(void) {} #endif /* CONFIG_PM */ -static void __init dmar_register_rmrr_unit(struct dmar_rmrr_unit *rmrr) -{ - list_add(&rmrr->list, &dmar_rmrr_units); -} - int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header) { @@ -3492,23 +3487,19 @@ int __init dmar_parse_one_rmrr(struct acpi_dmar_header *header) rmrr = (struct acpi_dmar_reserved_memory *)header; rmrru->base_address = rmrr->base_address; rmrru->end_address = rmrr->end_address; + rmrru->devices = dmar_alloc_dev_scope((void *)(rmrr + 1), + ((void *)rmrr) + rmrr->header.length, + &rmrru->devices_cnt); + if (rmrru->devices_cnt && rmrru->devices == NULL) { + kfree(rmrru); + return -ENOMEM; + } + + list_add(&rmrru->list, &dmar_rmrr_units); - dmar_register_rmrr_unit(rmrru); return 0; } -static int __init -rmrr_parse_dev(struct dmar_rmrr_unit *rmrru) -{ - struct acpi_dmar_reserved_memory *rmrr; - - rmrr = (struct acpi_dmar_reserved_memory *) rmrru->hdr; - return dmar_parse_dev_scope((void *)(rmrr + 1), - ((void *)rmrr) + rmrr->header.length, - &rmrru->devices_cnt, &rmrru->devices, - rmrr->segment); -} - int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr) { struct acpi_dmar_atsr *atsr; @@ -3521,26 +3512,21 @@ int __init dmar_parse_one_atsr(struct acpi_dmar_header *hdr) atsru->hdr = hdr; atsru->include_all = atsr->flags & 0x1; + if (!atsru->include_all) { + atsru->devices = dmar_alloc_dev_scope((void *)(atsr + 1), + (void *)atsr + atsr->header.length, + &atsru->devices_cnt); + if (atsru->devices_cnt && atsru->devices == NULL) { + kfree(atsru); + return -ENOMEM; + } + } list_add_rcu(&atsru->list, &dmar_atsr_units); return 0; } -static int __init atsr_parse_dev(struct dmar_atsr_unit *atsru) -{ - struct acpi_dmar_atsr *atsr; - - if (atsru->include_all) - return 0; - - atsr = container_of(atsru->hdr, struct acpi_dmar_atsr, header); - return dmar_parse_dev_scope((void *)(atsr + 1), - (void *)atsr + atsr->header.length, - &atsru->devices_cnt, &atsru->devices, - atsr->segment); -} - static void intel_iommu_free_atsr(struct dmar_atsr_unit *atsru) { dmar_free_dev_scope(&atsru->devices, &atsru->devices_cnt); @@ -3604,27 +3590,6 @@ out: return ret; } -int __init dmar_parse_rmrr_atsr_dev(void) -{ - struct dmar_rmrr_unit *rmrr; - struct dmar_atsr_unit *atsr; - int ret; - - list_for_each_entry(rmrr, &dmar_rmrr_units, list) { - ret = rmrr_parse_dev(rmrr); - if (ret) - return ret; - } - - list_for_each_entry_rcu(atsr, &dmar_atsr_units, list) { - ret = atsr_parse_dev(atsr); - if (ret) - return ret; - } - - return 0; -} - int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) { int ret = 0; diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 4e196430f1b2..0a92e4d978bc 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -173,7 +173,6 @@ extern int arch_setup_dmar_msi(unsigned int irq); #ifdef CONFIG_INTEL_IOMMU extern int iommu_detected, no_iommu; -extern int dmar_parse_rmrr_atsr_dev(void); extern int dmar_parse_one_rmrr(struct acpi_dmar_header *header); extern int dmar_parse_one_atsr(struct acpi_dmar_header *header); extern int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info); @@ -188,10 +187,6 @@ static inline int dmar_parse_one_atsr(struct acpi_dmar_header *header) { return 0; } -static inline int dmar_parse_rmrr_atsr_dev(void) -{ - return 0; -} static inline int dmar_iommu_notify_scope_dev(struct dmar_pci_notify_info *info) { return 0; From 75f05569d0e51f6332a291c82abbeb7c8262e32d Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 19 Feb 2014 14:07:37 +0800 Subject: [PATCH 32/75] iommu/vt-d: Update IOMMU state when memory hotplug happens If static identity domain is created, IOMMU driver needs to update si_domain page table when memory hotplug event happens. Otherwise PCI device DMA operations can't access the hot-added memory regions. Signed-off-by: Jiang Liu Signed-off-by: Joerg Roedel --- drivers/iommu/intel-iommu.c | 71 ++++++++++++++++++++++++++++++++++++- drivers/iommu/iova.c | 64 +++++++++++++++++++++++++++++---- include/linux/iova.h | 2 ++ 3 files changed, 130 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index dd576c067d0d..484d669d2720 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -3683,6 +3684,73 @@ static struct notifier_block device_nb = { .notifier_call = device_notifier, }; +static int intel_iommu_memory_notifier(struct notifier_block *nb, + unsigned long val, void *v) +{ + struct memory_notify *mhp = v; + unsigned long long start, end; + unsigned long start_vpfn, last_vpfn; + + switch (val) { + case MEM_GOING_ONLINE: + start = mhp->start_pfn << PAGE_SHIFT; + end = ((mhp->start_pfn + mhp->nr_pages) << PAGE_SHIFT) - 1; + if (iommu_domain_identity_map(si_domain, start, end)) { + pr_warn("dmar: failed to build identity map for [%llx-%llx]\n", + start, end); + return NOTIFY_BAD; + } + break; + + case MEM_OFFLINE: + case MEM_CANCEL_ONLINE: + start_vpfn = mm_to_dma_pfn(mhp->start_pfn); + last_vpfn = mm_to_dma_pfn(mhp->start_pfn + mhp->nr_pages - 1); + while (start_vpfn <= last_vpfn) { + struct iova *iova; + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + + iova = find_iova(&si_domain->iovad, start_vpfn); + if (iova == NULL) { + pr_debug("dmar: failed get IOVA for PFN %lx\n", + start_vpfn); + break; + } + + iova = split_and_remove_iova(&si_domain->iovad, iova, + start_vpfn, last_vpfn); + if (iova == NULL) { + pr_warn("dmar: failed to split IOVA PFN [%lx-%lx]\n", + start_vpfn, last_vpfn); + return NOTIFY_BAD; + } + + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) + iommu_flush_iotlb_psi(iommu, si_domain->id, + iova->pfn_lo, + iova->pfn_hi - iova->pfn_lo + 1, 0); + rcu_read_unlock(); + dma_pte_clear_range(si_domain, iova->pfn_lo, + iova->pfn_hi); + dma_pte_free_pagetable(si_domain, iova->pfn_lo, + iova->pfn_hi); + + start_vpfn = iova->pfn_hi + 1; + free_iova_mem(iova); + } + break; + } + + return NOTIFY_OK; +} + +static struct notifier_block intel_iommu_memory_nb = { + .notifier_call = intel_iommu_memory_notifier, + .priority = 0 +}; + int __init intel_iommu_init(void) { int ret = -ENODEV; @@ -3755,8 +3823,9 @@ int __init intel_iommu_init(void) init_iommu_pm_ops(); bus_set_iommu(&pci_bus_type, &intel_iommu_ops); - bus_register_notifier(&pci_bus_type, &device_nb); + if (si_domain && !hw_pass_through) + register_memory_notifier(&intel_iommu_memory_nb); intel_iommu_enabled = 1; diff --git a/drivers/iommu/iova.c b/drivers/iommu/iova.c index 67da6cff74e8..f6b17e6af2fb 100644 --- a/drivers/iommu/iova.c +++ b/drivers/iommu/iova.c @@ -342,19 +342,30 @@ __is_range_overlap(struct rb_node *node, return 0; } +static inline struct iova * +alloc_and_init_iova(unsigned long pfn_lo, unsigned long pfn_hi) +{ + struct iova *iova; + + iova = alloc_iova_mem(); + if (iova) { + iova->pfn_lo = pfn_lo; + iova->pfn_hi = pfn_hi; + } + + return iova; +} + static struct iova * __insert_new_range(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi) { struct iova *iova; - iova = alloc_iova_mem(); - if (!iova) - return iova; + iova = alloc_and_init_iova(pfn_lo, pfn_hi); + if (iova) + iova_insert_rbtree(&iovad->rbroot, iova); - iova->pfn_hi = pfn_hi; - iova->pfn_lo = pfn_lo; - iova_insert_rbtree(&iovad->rbroot, iova); return iova; } @@ -433,3 +444,44 @@ copy_reserved_iova(struct iova_domain *from, struct iova_domain *to) } spin_unlock_irqrestore(&from->iova_rbtree_lock, flags); } + +struct iova * +split_and_remove_iova(struct iova_domain *iovad, struct iova *iova, + unsigned long pfn_lo, unsigned long pfn_hi) +{ + unsigned long flags; + struct iova *prev = NULL, *next = NULL; + + spin_lock_irqsave(&iovad->iova_rbtree_lock, flags); + if (iova->pfn_lo < pfn_lo) { + prev = alloc_and_init_iova(iova->pfn_lo, pfn_lo - 1); + if (prev == NULL) + goto error; + } + if (iova->pfn_hi > pfn_hi) { + next = alloc_and_init_iova(pfn_hi + 1, iova->pfn_hi); + if (next == NULL) + goto error; + } + + __cached_rbnode_delete_update(iovad, iova); + rb_erase(&iova->node, &iovad->rbroot); + + if (prev) { + iova_insert_rbtree(&iovad->rbroot, prev); + iova->pfn_lo = pfn_lo; + } + if (next) { + iova_insert_rbtree(&iovad->rbroot, next); + iova->pfn_hi = pfn_hi; + } + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + + return iova; + +error: + spin_unlock_irqrestore(&iovad->iova_rbtree_lock, flags); + if (prev) + free_iova_mem(prev); + return NULL; +} diff --git a/include/linux/iova.h b/include/linux/iova.h index 76a0759e88ec..3277f4711349 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -47,5 +47,7 @@ void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); void init_iova_domain(struct iova_domain *iovad, unsigned long pfn_32bit); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); void put_iova_domain(struct iova_domain *iovad); +struct iova *split_and_remove_iova(struct iova_domain *iovad, + struct iova *iova, unsigned long pfn_lo, unsigned long pfn_hi); #endif From e28045ab2e2a8e27d08275bf70be33868beba8fd Mon Sep 17 00:00:00 2001 From: Zhouyi Zhou Date: Wed, 5 Mar 2014 18:20:19 +0800 Subject: [PATCH 33/75] iommu/omap: Check for NULL in iopte_free() The iopte_free() function should check for NULL because kmem_cache_free() will panic on NULL argument. Signed-off-by: Zhouyi Zhou Signed-off-by: Joerg Roedel --- drivers/iommu/omap-iommu.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/omap-iommu.c b/drivers/iommu/omap-iommu.c index 8acea87cbc0e..7fcbfc498fa9 100644 --- a/drivers/iommu/omap-iommu.c +++ b/drivers/iommu/omap-iommu.c @@ -520,7 +520,8 @@ static void flush_iopte_range(u32 *first, u32 *last) static void iopte_free(u32 *iopte) { /* Note: freed iopte's must be clean ready for re-use */ - kmem_cache_free(iopte_cachep, iopte); + if (iopte) + kmem_cache_free(iopte_cachep, iopte); } static u32 *iopte_alloc(struct omap_iommu *obj, u32 *iopgd, u32 da) From 5cf0a76fa2179d246fc0375d733bdccffd59382b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 19 Mar 2014 16:07:49 +0000 Subject: [PATCH 34/75] iommu/vt-d: Clean up size handling for intel_iommu_unmap() We have this horrid API where iommu_unmap() can unmap more than it's asked to, if the IOVA in question happens to be mapped with a large page. Instead of propagating this nonsense to the point where we end up returning the page order from dma_pte_clear_range(), let's just do it once and adjust the 'size' parameter accordingly. Augment pfn_to_dma_pte() to return the level at which the PTE was found, which will also be useful later if we end up changing the API for iommu_iova_to_phys() to behave the same way as is being discussed upstream. Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 37 +++++++++++++++++++++++++------------ 1 file changed, 25 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 484d669d2720..6472bf15bef2 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -784,7 +784,7 @@ out: } static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, - unsigned long pfn, int target_level) + unsigned long pfn, int *target_level) { int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; struct dma_pte *parent, *pte = NULL; @@ -799,14 +799,14 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, parent = domain->pgd; - while (level > 0) { + while (1) { void *tmp_page; offset = pfn_level_offset(pfn, level); pte = &parent[offset]; - if (!target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte))) + if (!*target_level && (dma_pte_superpage(pte) || !dma_pte_present(pte))) break; - if (level == target_level) + if (level == *target_level) break; if (!dma_pte_present(pte)) { @@ -827,10 +827,16 @@ static struct dma_pte *pfn_to_dma_pte(struct dmar_domain *domain, domain_flush_cache(domain, pte, sizeof(*pte)); } } + if (level == 1) + break; + parent = phys_to_virt(dma_pte_addr(pte)); level--; } + if (!*target_level) + *target_level = level; + return pte; } @@ -868,7 +874,7 @@ static struct dma_pte *dma_pfn_level_pte(struct dmar_domain *domain, } /* clear last level pte, a tlb flush should be followed */ -static int dma_pte_clear_range(struct dmar_domain *domain, +static void dma_pte_clear_range(struct dmar_domain *domain, unsigned long start_pfn, unsigned long last_pfn) { @@ -898,8 +904,6 @@ static int dma_pte_clear_range(struct dmar_domain *domain, (void *)pte - (void *)first_pte); } while (start_pfn && start_pfn <= last_pfn); - - return min_t(int, (large_page - 1) * 9, MAX_AGAW_PFN_WIDTH); } static void dma_pte_free_level(struct dmar_domain *domain, int level, @@ -1832,7 +1836,7 @@ static int __domain_mapping(struct dmar_domain *domain, unsigned long iov_pfn, if (!pte) { largepage_lvl = hardware_largepage_caps(domain, iov_pfn, phys_pfn, sg_res); - first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, largepage_lvl); + first_pte = pte = pfn_to_dma_pte(domain, iov_pfn, &largepage_lvl); if (!pte) return -ENOMEM; /* It is large page*/ @@ -4099,15 +4103,23 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain, unsigned long iova, size_t size) { struct dmar_domain *dmar_domain = domain->priv; - int order; + int level = 0; - order = dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT, + /* Cope with horrid API which requires us to unmap more than the + size argument if it happens to be a large-page mapping. */ + if (!pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level)) + BUG(); + + if (size < VTD_PAGE_SIZE << level_to_offset_bits(level)) + size = VTD_PAGE_SIZE << level_to_offset_bits(level); + + dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT, (iova + size - 1) >> VTD_PAGE_SHIFT); if (dmar_domain->max_addr == iova + size) dmar_domain->max_addr = iova; - return PAGE_SIZE << order; + return size; } static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, @@ -4115,9 +4127,10 @@ static phys_addr_t intel_iommu_iova_to_phys(struct iommu_domain *domain, { struct dmar_domain *dmar_domain = domain->priv; struct dma_pte *pte; + int level = 0; u64 phys = 0; - pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, 0); + pte = pfn_to_dma_pte(dmar_domain, iova >> VTD_PAGE_SHIFT, &level); if (pte) phys = dma_pte_addr(pte); From ea8ea460c9ace60bbb5ac6e5521d637d5c15293d Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 5 Mar 2014 17:09:32 +0000 Subject: [PATCH 35/75] iommu/vt-d: Clean up and fix page table clear/free behaviour There is a race condition between the existing clear/free code and the hardware. The IOMMU is actually permitted to cache the intermediate levels of the page tables, and doesn't need to walk the table from the very top of the PGD each time. So the existing back-to-back calls to dma_pte_clear_range() and dma_pte_free_pagetable() can lead to a use-after-free where the IOMMU reads from a freed page table. When freeing page tables we actually need to do the IOTLB flush, with the 'invalidation hint' bit clear to indicate that it's not just a leaf-node flush, after unlinking each page table page from the next level up but before actually freeing it. So in the rewritten domain_unmap() we just return a list of pages (using pg->freelist to make a list of them), and then the caller is expected to do the appropriate IOTLB flush (or tear down the domain completely, whatever), before finally calling dma_free_pagelist() to free the pages. As an added bonus, we no longer need to flush the CPU's data cache for pages which are about to be *removed* from the page table hierarchy anyway, in the non-cache-coherent case. This drastically improves the performance of large unmaps. As a side-effect of all these changes, this also fixes the fact that intel_iommu_unmap() was neglecting to free the page tables for the range in question after clearing them. Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 232 +++++++++++++++++++++++++++++------- 1 file changed, 187 insertions(+), 45 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 6472bf15bef2..f5934fc2bbcc 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1,5 +1,5 @@ /* - * Copyright (c) 2006, Intel Corporation. + * Copyright © 2006-2014 Intel Corporation. * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -10,15 +10,11 @@ * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for * more details. * - * You should have received a copy of the GNU General Public License along with - * this program; if not, write to the Free Software Foundation, Inc., 59 Temple - * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Copyright (C) 2006-2008 Intel Corporation - * Author: Ashok Raj - * Author: Shaohua Li - * Author: Anil S Keshavamurthy - * Author: Fenghua Yu + * Authors: David Woodhouse , + * Ashok Raj , + * Shaohua Li , + * Anil S Keshavamurthy , + * Fenghua Yu */ #include @@ -413,6 +409,7 @@ struct deferred_flush_tables { int next; struct iova *iova[HIGH_WATER_MARK]; struct dmar_domain *domain[HIGH_WATER_MARK]; + struct page *freelist[HIGH_WATER_MARK]; }; static struct deferred_flush_tables *deferred_flush; @@ -961,6 +958,123 @@ static void dma_pte_free_pagetable(struct dmar_domain *domain, } } +/* When a page at a given level is being unlinked from its parent, we don't + need to *modify* it at all. All we need to do is make a list of all the + pages which can be freed just as soon as we've flushed the IOTLB and we + know the hardware page-walk will no longer touch them. + The 'pte' argument is the *parent* PTE, pointing to the page that is to + be freed. */ +static struct page *dma_pte_list_pagetables(struct dmar_domain *domain, + int level, struct dma_pte *pte, + struct page *freelist) +{ + struct page *pg; + + pg = pfn_to_page(dma_pte_addr(pte) >> PAGE_SHIFT); + pg->freelist = freelist; + freelist = pg; + + if (level == 1) + return freelist; + + for (pte = page_address(pg); !first_pte_in_page(pte); pte++) { + if (dma_pte_present(pte) && !dma_pte_superpage(pte)) + freelist = dma_pte_list_pagetables(domain, level - 1, + pte, freelist); + } + + return freelist; +} + +static struct page *dma_pte_clear_level(struct dmar_domain *domain, int level, + struct dma_pte *pte, unsigned long pfn, + unsigned long start_pfn, + unsigned long last_pfn, + struct page *freelist) +{ + struct dma_pte *first_pte = NULL, *last_pte = NULL; + + pfn = max(start_pfn, pfn); + pte = &pte[pfn_level_offset(pfn, level)]; + + do { + unsigned long level_pfn; + + if (!dma_pte_present(pte)) + goto next; + + level_pfn = pfn & level_mask(level); + + /* If range covers entire pagetable, free it */ + if (start_pfn <= level_pfn && + last_pfn >= level_pfn + level_size(level) - 1) { + /* These suborbinate page tables are going away entirely. Don't + bother to clear them; we're just going to *free* them. */ + if (level > 1 && !dma_pte_superpage(pte)) + freelist = dma_pte_list_pagetables(domain, level - 1, pte, freelist); + + dma_clear_pte(pte); + if (!first_pte) + first_pte = pte; + last_pte = pte; + } else if (level > 1) { + /* Recurse down into a level that isn't *entirely* obsolete */ + freelist = dma_pte_clear_level(domain, level - 1, + phys_to_virt(dma_pte_addr(pte)), + level_pfn, start_pfn, last_pfn, + freelist); + } +next: + pfn += level_size(level); + } while (!first_pte_in_page(++pte) && pfn <= last_pfn); + + if (first_pte) + domain_flush_cache(domain, first_pte, + (void *)++last_pte - (void *)first_pte); + + return freelist; +} + +/* We can't just free the pages because the IOMMU may still be walking + the page tables, and may have cached the intermediate levels. The + pages can only be freed after the IOTLB flush has been done. */ +struct page *domain_unmap(struct dmar_domain *domain, + unsigned long start_pfn, + unsigned long last_pfn) +{ + int addr_width = agaw_to_width(domain->agaw) - VTD_PAGE_SHIFT; + struct page *freelist = NULL; + + BUG_ON(addr_width < BITS_PER_LONG && start_pfn >> addr_width); + BUG_ON(addr_width < BITS_PER_LONG && last_pfn >> addr_width); + BUG_ON(start_pfn > last_pfn); + + /* we don't need lock here; nobody else touches the iova range */ + freelist = dma_pte_clear_level(domain, agaw_to_level(domain->agaw), + domain->pgd, 0, start_pfn, last_pfn, NULL); + + /* free pgd */ + if (start_pfn == 0 && last_pfn == DOMAIN_MAX_PFN(domain->gaw)) { + struct page *pgd_page = virt_to_page(domain->pgd); + pgd_page->freelist = freelist; + freelist = pgd_page; + + domain->pgd = NULL; + } + + return freelist; +} + +void dma_free_pagelist(struct page *freelist) +{ + struct page *pg; + + while ((pg = freelist)) { + freelist = pg->freelist; + free_pgtable_page(page_address(pg)); + } +} + /* iommu handling */ static int iommu_alloc_root_entry(struct intel_iommu *iommu) { @@ -1070,7 +1184,7 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, break; case DMA_TLB_PSI_FLUSH: val = DMA_TLB_PSI_FLUSH|DMA_TLB_IVT|DMA_TLB_DID(did); - /* Note: always flush non-leaf currently */ + /* IH bit is passed in as part of address */ val_iva = size_order | addr; break; default: @@ -1181,13 +1295,15 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain, } static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, - unsigned long pfn, unsigned int pages, int map) + unsigned long pfn, unsigned int pages, int ih, int map) { unsigned int mask = ilog2(__roundup_pow_of_two(pages)); uint64_t addr = (uint64_t)pfn << VTD_PAGE_SHIFT; BUG_ON(pages == 0); + if (ih) + ih = 1 << 6; /* * Fallback to domain selective flush if no PSI support or the size is * too big. @@ -1198,7 +1314,7 @@ static void iommu_flush_iotlb_psi(struct intel_iommu *iommu, u16 did, iommu->flush.flush_iotlb(iommu, did, 0, 0, DMA_TLB_DSI_FLUSH); else - iommu->flush.flush_iotlb(iommu, did, addr, mask, + iommu->flush.flush_iotlb(iommu, did, addr | ih, mask, DMA_TLB_PSI_FLUSH); /* @@ -1523,6 +1639,7 @@ static void domain_exit(struct dmar_domain *domain) { struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; + struct page *freelist = NULL; /* Domain 0 is reserved, so dont process it */ if (!domain) @@ -1538,11 +1655,7 @@ static void domain_exit(struct dmar_domain *domain) /* destroy iovas */ put_iova_domain(&domain->iovad); - /* clear ptes */ - dma_pte_clear_range(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); - - /* free page tables */ - dma_pte_free_pagetable(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); + freelist = domain_unmap(domain, 0, DOMAIN_MAX_PFN(domain->gaw)); /* clear attached or cached domains */ rcu_read_lock(); @@ -1552,6 +1665,8 @@ static void domain_exit(struct dmar_domain *domain) iommu_detach_domain(domain, iommu); rcu_read_unlock(); + dma_free_pagelist(freelist); + free_domain_mem(domain); } @@ -2851,7 +2966,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, /* it's a non-present to present mapping. Only flush if caching mode */ if (cap_caching_mode(iommu->cap)) - iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 1); + iommu_flush_iotlb_psi(iommu, domain->id, mm_to_dma_pfn(iova->pfn_lo), size, 0, 1); else iommu_flush_write_buffer(iommu); @@ -2903,13 +3018,16 @@ static void flush_unmaps(void) /* On real hardware multiple invalidations are expensive */ if (cap_caching_mode(iommu->cap)) iommu_flush_iotlb_psi(iommu, domain->id, - iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, 0); + iova->pfn_lo, iova->pfn_hi - iova->pfn_lo + 1, + !deferred_flush[i].freelist[j], 0); else { mask = ilog2(mm_to_dma_pfn(iova->pfn_hi - iova->pfn_lo + 1)); iommu_flush_dev_iotlb(deferred_flush[i].domain[j], (uint64_t)iova->pfn_lo << PAGE_SHIFT, mask); } __free_iova(&deferred_flush[i].domain[j]->iovad, iova); + if (deferred_flush[i].freelist[j]) + dma_free_pagelist(deferred_flush[i].freelist[j]); } deferred_flush[i].next = 0; } @@ -2926,7 +3044,7 @@ static void flush_unmaps_timeout(unsigned long data) spin_unlock_irqrestore(&async_umap_flush_lock, flags); } -static void add_unmap(struct dmar_domain *dom, struct iova *iova) +static void add_unmap(struct dmar_domain *dom, struct iova *iova, struct page *freelist) { unsigned long flags; int next, iommu_id; @@ -2942,6 +3060,7 @@ static void add_unmap(struct dmar_domain *dom, struct iova *iova) next = deferred_flush[iommu_id].next; deferred_flush[iommu_id].domain[next] = dom; deferred_flush[iommu_id].iova[next] = iova; + deferred_flush[iommu_id].freelist[next] = freelist; deferred_flush[iommu_id].next++; if (!timer_on) { @@ -2961,6 +3080,7 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, unsigned long start_pfn, last_pfn; struct iova *iova; struct intel_iommu *iommu; + struct page *freelist; if (iommu_no_mapping(dev)) return; @@ -2981,19 +3101,16 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, pr_debug("Device %s unmapping: pfn %lx-%lx\n", pci_name(pdev), start_pfn, last_pfn); - /* clear the whole page */ - dma_pte_clear_range(domain, start_pfn, last_pfn); - - /* free page tables */ - dma_pte_free_pagetable(domain, start_pfn, last_pfn); + freelist = domain_unmap(domain, start_pfn, last_pfn); if (intel_iommu_strict) { iommu_flush_iotlb_psi(iommu, domain->id, start_pfn, - last_pfn - start_pfn + 1, 0); + last_pfn - start_pfn + 1, !freelist, 0); /* free iova */ __free_iova(&domain->iovad, iova); + dma_free_pagelist(freelist); } else { - add_unmap(domain, iova); + add_unmap(domain, iova, freelist); /* * queue up the release of the unmap to save the 1/6th of the * cpu used up by the iotlb flush operation... @@ -3055,6 +3172,7 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, unsigned long start_pfn, last_pfn; struct iova *iova; struct intel_iommu *iommu; + struct page *freelist; if (iommu_no_mapping(hwdev)) return; @@ -3072,19 +3190,16 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, start_pfn = mm_to_dma_pfn(iova->pfn_lo); last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1; - /* clear the whole page */ - dma_pte_clear_range(domain, start_pfn, last_pfn); - - /* free page tables */ - dma_pte_free_pagetable(domain, start_pfn, last_pfn); + freelist = domain_unmap(domain, start_pfn, last_pfn); if (intel_iommu_strict) { iommu_flush_iotlb_psi(iommu, domain->id, start_pfn, - last_pfn - start_pfn + 1, 0); + last_pfn - start_pfn + 1, !freelist, 0); /* free iova */ __free_iova(&domain->iovad, iova); + dma_free_pagelist(freelist); } else { - add_unmap(domain, iova); + add_unmap(domain, iova, freelist); /* * queue up the release of the unmap to save the 1/6th of the * cpu used up by the iotlb flush operation... @@ -3167,7 +3282,7 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne /* it's a non-present to present mapping. Only flush if caching mode */ if (cap_caching_mode(iommu->cap)) - iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 1); + iommu_flush_iotlb_psi(iommu, domain->id, start_vpfn, size, 0, 1); else iommu_flush_write_buffer(iommu); @@ -3714,6 +3829,7 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb, struct iova *iova; struct dmar_drhd_unit *drhd; struct intel_iommu *iommu; + struct page *freelist; iova = find_iova(&si_domain->iovad, start_vpfn); if (iova == NULL) { @@ -3730,16 +3846,17 @@ static int intel_iommu_memory_notifier(struct notifier_block *nb, return NOTIFY_BAD; } + freelist = domain_unmap(si_domain, iova->pfn_lo, + iova->pfn_hi); + rcu_read_lock(); for_each_active_iommu(iommu, drhd) iommu_flush_iotlb_psi(iommu, si_domain->id, iova->pfn_lo, - iova->pfn_hi - iova->pfn_lo + 1, 0); + iova->pfn_hi - iova->pfn_lo + 1, + !freelist, 0); rcu_read_unlock(); - dma_pte_clear_range(si_domain, iova->pfn_lo, - iova->pfn_hi); - dma_pte_free_pagetable(si_domain, iova->pfn_lo, - iova->pfn_hi); + dma_free_pagelist(freelist); start_vpfn = iova->pfn_hi + 1; free_iova_mem(iova); @@ -4100,10 +4217,14 @@ static int intel_iommu_map(struct iommu_domain *domain, } static size_t intel_iommu_unmap(struct iommu_domain *domain, - unsigned long iova, size_t size) + unsigned long iova, size_t size) { struct dmar_domain *dmar_domain = domain->priv; - int level = 0; + struct page *freelist = NULL; + struct intel_iommu *iommu; + unsigned long start_pfn, last_pfn; + unsigned int npages; + int iommu_id, num, ndomains, level = 0; /* Cope with horrid API which requires us to unmap more than the size argument if it happens to be a large-page mapping. */ @@ -4113,8 +4234,29 @@ static size_t intel_iommu_unmap(struct iommu_domain *domain, if (size < VTD_PAGE_SIZE << level_to_offset_bits(level)) size = VTD_PAGE_SIZE << level_to_offset_bits(level); - dma_pte_clear_range(dmar_domain, iova >> VTD_PAGE_SHIFT, - (iova + size - 1) >> VTD_PAGE_SHIFT); + start_pfn = iova >> VTD_PAGE_SHIFT; + last_pfn = (iova + size - 1) >> VTD_PAGE_SHIFT; + + freelist = domain_unmap(dmar_domain, start_pfn, last_pfn); + + npages = last_pfn - start_pfn + 1; + + for_each_set_bit(iommu_id, dmar_domain->iommu_bmp, g_num_of_iommus) { + iommu = g_iommus[iommu_id]; + + /* + * find bit position of dmar_domain + */ + ndomains = cap_ndoms(iommu->cap); + for_each_set_bit(num, iommu->domain_ids, ndomains) { + if (iommu->domains[num] == dmar_domain) + iommu_flush_iotlb_psi(iommu, num, start_pfn, + npages, !freelist, 0); + } + + } + + dma_free_pagelist(freelist); if (dmar_domain->max_addr == iova + size) dmar_domain->max_addr = iova; From 214e39aa36c9c02355d388f20d83b93fc2fa7298 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 19 Mar 2014 10:38:49 +0000 Subject: [PATCH 36/75] iommu/vt-d: Honour intel_iommu=sp_off for non-VMM domains Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index f5934fc2bbcc..c3d4bc91a189 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1624,7 +1624,11 @@ static int domain_init(struct dmar_domain *domain, int guest_width) else domain->iommu_snooping = 0; - domain->iommu_superpage = fls(cap_super_page_val(iommu->cap)); + if (intel_iommu_superpage) + domain->iommu_superpage = fls(cap_super_page_val(iommu->cap)); + else + domain->iommu_superpage = 0; + domain->nid = iommu->node; /* always allocate the top pgd */ From d05019608746240d86a406fbf8d8c1fc71a87101 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 11 Mar 2014 17:10:29 -0700 Subject: [PATCH 37/75] iommu/vt-d: Be less pessimistic about domain coherency where possible In commit 2e12bc29 ("intel-iommu: Default to non-coherent for domains unattached to iommus") we decided to err on the side of caution and always assume that it's possible that a device will be attached which is behind a non-coherent IOMMU. In some cases, however, that just *cannot* happen. If there *are* no IOMMUs in the system which are non-coherent, then we don't need to do it. And flushing the dcache is a *significant* performance hit. Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 21 +++++++++++++++++---- 1 file changed, 17 insertions(+), 4 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index c3d4bc91a189..1599cb1e041c 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -592,18 +592,31 @@ static struct intel_iommu *domain_get_iommu(struct dmar_domain *domain) static void domain_update_iommu_coherency(struct dmar_domain *domain) { - int i; + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + int i, found = 0; - i = find_first_bit(domain->iommu_bmp, g_num_of_iommus); - - domain->iommu_coherency = i < g_num_of_iommus ? 1 : 0; + domain->iommu_coherency = 1; for_each_set_bit(i, domain->iommu_bmp, g_num_of_iommus) { + found = 1; if (!ecap_coherent(g_iommus[i]->ecap)) { domain->iommu_coherency = 0; break; } } + if (found) + return; + + /* No hardware attached; use lowest common denominator */ + rcu_read_lock(); + for_each_active_iommu(iommu, drhd) { + if (!ecap_coherent(iommu->ecap)) { + domain->iommu_coherency = 0; + break; + } + } + rcu_read_unlock(); } static void domain_update_iommu_snooping(struct dmar_domain *domain) From 86a54dcce6ccb0d4a5810ed8112011039d855bee Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 7 Mar 2014 12:43:40 +0000 Subject: [PATCH 38/75] iommu/vt-d: Add ACPI namespace device reporting structures Signed-off-by: David Woodhouse --- include/acpi/actbl2.h | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) diff --git a/include/acpi/actbl2.h b/include/acpi/actbl2.h index 094a906a0e98..da5b057d775d 100644 --- a/include/acpi/actbl2.h +++ b/include/acpi/actbl2.h @@ -424,7 +424,8 @@ enum acpi_dmar_type { ACPI_DMAR_TYPE_RESERVED_MEMORY = 1, ACPI_DMAR_TYPE_ATSR = 2, ACPI_DMAR_HARDWARE_AFFINITY = 3, - ACPI_DMAR_TYPE_RESERVED = 4 /* 4 and greater are reserved */ + ACPI_DMAR_TYPE_ANDD = 4, + ACPI_DMAR_TYPE_RESERVED = 5 /* 5 and greater are reserved */ }; /* DMAR Device Scope structure */ @@ -445,7 +446,8 @@ enum acpi_dmar_scope_type { ACPI_DMAR_SCOPE_TYPE_BRIDGE = 2, ACPI_DMAR_SCOPE_TYPE_IOAPIC = 3, ACPI_DMAR_SCOPE_TYPE_HPET = 4, - ACPI_DMAR_SCOPE_TYPE_RESERVED = 5 /* 5 and greater are reserved */ + ACPI_DMAR_SCOPE_TYPE_ACPI = 5, + ACPI_DMAR_SCOPE_TYPE_RESERVED = 6 /* 6 and greater are reserved */ }; struct acpi_dmar_pci_path { @@ -507,6 +509,15 @@ struct acpi_dmar_rhsa { u32 proximity_domain; }; +/* 4: ACPI Namespace Device Declaration Structure */ + +struct acpi_dmar_andd { + struct acpi_dmar_header header; + u8 reserved[3]; + u8 device_number; + u8 object_name[]; +}; + /******************************************************************************* * * HPET - High Precision Event Timer table From e625b4a95d50fa2f2d2fd0ab4b9ac9d6b6c2474c Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 7 Mar 2014 14:34:38 +0000 Subject: [PATCH 39/75] iommu/vt-d: Parse ANDD records Signed-off-by: David Woodhouse --- drivers/iommu/dmar.c | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index b19f9f4c3584..eb95020c2314 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -373,6 +373,26 @@ static void dmar_free_drhd(struct dmar_drhd_unit *dmaru) kfree(dmaru); } +static int __init dmar_parse_one_andd(struct acpi_dmar_header *header) +{ + struct acpi_dmar_andd *andd = (void *)header; + + /* Check for NUL termination within the designated length */ + if (strnlen(andd->object_name, header->length - 8) == header->length - 8) { + WARN_TAINT(1, TAINT_FIRMWARE_WORKAROUND, + "Your BIOS is broken; ANDD object name is not NUL-terminated\n" + "BIOS vendor: %s; Ver: %s; Product Version: %s\n", + dmi_get_system_info(DMI_BIOS_VENDOR), + dmi_get_system_info(DMI_BIOS_VERSION), + dmi_get_system_info(DMI_PRODUCT_VERSION)); + return -EINVAL; + } + pr_info("ANDD device: %x name: %s\n", andd->device_number, + andd->object_name); + + return 0; +} + #ifdef CONFIG_ACPI_NUMA static int __init dmar_parse_one_rhsa(struct acpi_dmar_header *header) @@ -436,6 +456,10 @@ dmar_table_print_dmar_entry(struct acpi_dmar_header *header) (unsigned long long)rhsa->base_address, rhsa->proximity_domain); break; + case ACPI_DMAR_TYPE_ANDD: + /* We don't print this here because we need to sanity-check + it first. So print it in dmar_parse_one_andd() instead. */ + break; } } @@ -521,6 +545,9 @@ parse_dmar_table(void) ret = dmar_parse_one_rhsa(entry_header); #endif break; + case ACPI_DMAR_TYPE_ANDD: + ret = dmar_parse_one_andd(entry_header); + break; default: pr_warn("Unknown DMAR structure type %d\n", entry_header->type); From 07cb52ff6aadac0ad68b29be2ef73dba3111c5ec Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 7 Mar 2014 14:39:27 +0000 Subject: [PATCH 40/75] iommu/vt-d: Allocate space for ACPI devices Signed-off-by: David Woodhouse --- drivers/iommu/dmar.c | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index eb95020c2314..4c6297d1b421 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -84,7 +84,8 @@ void *dmar_alloc_dev_scope(void *start, void *end, int *cnt) *cnt = 0; while (start < end) { scope = start; - if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT || + if (scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ACPI || + scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT || scope->entry_type == ACPI_DMAR_SCOPE_TYPE_BRIDGE) (*cnt)++; else if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_IOAPIC && @@ -342,21 +343,18 @@ dmar_parse_one_drhd(struct acpi_dmar_header *header) dmaru->reg_base_addr = drhd->address; dmaru->segment = drhd->segment; dmaru->include_all = drhd->flags & 0x1; /* BIT0: INCLUDE_ALL */ - if (!dmaru->include_all) { - dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1), - ((void *)drhd) + drhd->header.length, - &dmaru->devices_cnt); - if (dmaru->devices_cnt && dmaru->devices == NULL) { - kfree(dmaru); - return -ENOMEM; - } + dmaru->devices = dmar_alloc_dev_scope((void *)(drhd + 1), + ((void *)drhd) + drhd->header.length, + &dmaru->devices_cnt); + if (dmaru->devices_cnt && dmaru->devices == NULL) { + kfree(dmaru); + return -ENOMEM; } ret = alloc_iommu(dmaru); if (ret) { - if (!dmaru->include_all) - dmar_free_dev_scope(&dmaru->devices, - &dmaru->devices_cnt); + dmar_free_dev_scope(&dmaru->devices, + &dmaru->devices_cnt); kfree(dmaru); return ret; } From 832bd858674023b2415c7585db3beba345ef807f Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 7 Mar 2014 15:08:36 +0000 Subject: [PATCH 41/75] iommu/vt-d: Change scope lists to struct device, bus, devfn It's not only for PCI devices any more, and the scope information for an ACPI device provides the bus and devfn so that has to be stored here too. It is the device pointer itself which needs to be protected with RCU, so the __rcu annotation follows it into the definition of struct dmar_dev_scope, since we're no longer just passing arrays of device pointers around. Signed-off-by: David Woodhouse --- drivers/iommu/dmar.c | 41 +++++++++++++++++++---------------- drivers/iommu/intel-iommu.c | 43 +++++++++++++++++++++---------------- include/linux/dmar.h | 18 ++++++++++------ 3 files changed, 59 insertions(+), 43 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 4c6297d1b421..c1e2e0c82e69 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -97,17 +97,17 @@ void *dmar_alloc_dev_scope(void *start, void *end, int *cnt) if (*cnt == 0) return NULL; - return kcalloc(*cnt, sizeof(struct pci_dev *), GFP_KERNEL); + return kcalloc(*cnt, sizeof(struct dmar_dev_scope), GFP_KERNEL); } -void dmar_free_dev_scope(struct pci_dev __rcu ***devices, int *cnt) +void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt) { int i; - struct pci_dev *tmp_dev; + struct device *tmp_dev; if (*devices && *cnt) { for_each_active_dev_scope(*devices, *cnt, i, tmp_dev) - pci_dev_put(tmp_dev); + put_device(tmp_dev); kfree(*devices); } @@ -191,10 +191,11 @@ static bool dmar_match_pci_path(struct dmar_pci_notify_info *info, int bus, /* Return: > 0 if match found, 0 if no match found, < 0 if error happens */ int dmar_insert_dev_scope(struct dmar_pci_notify_info *info, void *start, void*end, u16 segment, - struct pci_dev __rcu **devices, int devices_cnt) + struct dmar_dev_scope *devices, + int devices_cnt) { int i, level; - struct pci_dev *tmp, *dev = info->dev; + struct device *tmp, *dev = &info->dev->dev; struct acpi_dmar_device_scope *scope; struct acpi_dmar_pci_path *path; @@ -213,16 +214,18 @@ int dmar_insert_dev_scope(struct dmar_pci_notify_info *info, continue; if ((scope->entry_type == ACPI_DMAR_SCOPE_TYPE_ENDPOINT) ^ - (dev->hdr_type == PCI_HEADER_TYPE_NORMAL)) { + (info->dev->hdr_type == PCI_HEADER_TYPE_NORMAL)) { pr_warn("Device scope type does not match for %s\n", - pci_name(dev)); + pci_name(info->dev)); return -EINVAL; } for_each_dev_scope(devices, devices_cnt, i, tmp) if (tmp == NULL) { - rcu_assign_pointer(devices[i], - pci_dev_get(dev)); + devices[i].bus = info->dev->bus->number; + devices[i].devfn = info->dev->devfn; + rcu_assign_pointer(devices[i].dev, + get_device(dev)); return 1; } BUG_ON(i >= devices_cnt); @@ -232,19 +235,19 @@ int dmar_insert_dev_scope(struct dmar_pci_notify_info *info, } int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, u16 segment, - struct pci_dev __rcu **devices, int count) + struct dmar_dev_scope *devices, int count) { int index; - struct pci_dev *tmp; + struct device *tmp; if (info->seg != segment) return 0; for_each_active_dev_scope(devices, count, index, tmp) - if (tmp == info->dev) { - rcu_assign_pointer(devices[index], NULL); + if (tmp == &info->dev->dev) { + rcu_assign_pointer(devices[index].dev, NULL); synchronize_rcu(); - pci_dev_put(tmp); + put_device(tmp); return 1; } @@ -562,15 +565,15 @@ parse_dmar_table(void) return ret; } -static int dmar_pci_device_match(struct pci_dev __rcu *devices[], int cnt, - struct pci_dev *dev) +static int dmar_pci_device_match(struct dmar_dev_scope devices[], + int cnt, struct pci_dev *dev) { int index; - struct pci_dev *tmp; + struct device *tmp; while (dev) { for_each_active_dev_scope(devices, cnt, index, tmp) - if (dev == tmp) + if (dev_is_pci(tmp) && dev == to_pci_dev(tmp)) return 1; /* Check our parent */ diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 1599cb1e041c..ace088eebb45 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -382,14 +382,14 @@ struct dmar_rmrr_unit { struct acpi_dmar_header *hdr; /* ACPI header */ u64 base_address; /* reserved base address*/ u64 end_address; /* reserved end address */ - struct pci_dev __rcu **devices; /* target devices */ + struct dmar_dev_scope *devices; /* target devices */ int devices_cnt; /* target device count */ }; struct dmar_atsr_unit { struct list_head list; /* list of ATSR units */ struct acpi_dmar_header *hdr; /* ACPI header */ - struct pci_dev __rcu **devices; /* target devices */ + struct dmar_dev_scope *devices; /* target devices */ int devices_cnt; /* target device count */ u8 include_all:1; /* include all ports */ }; @@ -669,7 +669,8 @@ static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn) { struct dmar_drhd_unit *drhd = NULL; struct intel_iommu *iommu; - struct pci_dev *dev; + struct device *dev; + struct pci_dev *pdev; int i; rcu_read_lock(); @@ -679,11 +680,14 @@ static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn) for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) { - if (dev->bus->number == bus && dev->devfn == devfn) + if (!dev_is_pci(dev)) + continue; + pdev = to_pci_dev(dev); + if (pdev->bus->number == bus && pdev->devfn == devfn) goto out; - if (dev->subordinate && - dev->subordinate->number <= bus && - dev->subordinate->busn_res.end >= bus) + if (pdev->subordinate && + pdev->subordinate->number <= bus && + pdev->subordinate->busn_res.end >= bus) goto out; } @@ -2479,7 +2483,7 @@ static int domain_add_dev_info(struct dmar_domain *domain, static bool device_has_rmrr(struct pci_dev *dev) { struct dmar_rmrr_unit *rmrr; - struct pci_dev *tmp; + struct device *tmp; int i; rcu_read_lock(); @@ -2490,7 +2494,7 @@ static bool device_has_rmrr(struct pci_dev *dev) */ for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, i, tmp) - if (tmp == dev) { + if (tmp == &dev->dev) { rcu_read_unlock(); return true; } @@ -2602,7 +2606,7 @@ static int __init init_dmars(void) { struct dmar_drhd_unit *drhd; struct dmar_rmrr_unit *rmrr; - struct pci_dev *pdev; + struct device *dev; struct intel_iommu *iommu; int i, ret; @@ -2746,8 +2750,10 @@ static int __init init_dmars(void) for_each_rmrr_units(rmrr) { /* some BIOS lists non-exist devices in DMAR table. */ for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, - i, pdev) { - ret = iommu_prepare_rmrr_dev(rmrr, pdev); + i, dev) { + if (!dev_is_pci(dev)) + continue; + ret = iommu_prepare_rmrr_dev(rmrr, to_pci_dev(dev)); if (ret) printk(KERN_ERR "IOMMU: mapping reserved region failed\n"); @@ -3434,7 +3440,7 @@ DECLARE_PCI_FIXUP_ENABLE(PCI_VENDOR_ID_INTEL, PCI_DEVICE_ID_INTEL_IOAT_SNB, quir static void __init init_no_remapping_devices(void) { struct dmar_drhd_unit *drhd; - struct pci_dev *dev; + struct device *dev; int i; for_each_drhd_unit(drhd) { @@ -3442,7 +3448,7 @@ static void __init init_no_remapping_devices(void) for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) break; - /* ignore DMAR unit if no pci devices exist */ + /* ignore DMAR unit if no devices exist */ if (i == drhd->devices_cnt) drhd->ignored = 1; } @@ -3454,7 +3460,7 @@ static void __init init_no_remapping_devices(void) for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) - if (!IS_GFX_DEVICE(dev)) + if (!dev_is_pci(dev) || !IS_GFX_DEVICE(to_pci_dev(dev))) break; if (i < drhd->devices_cnt) continue; @@ -3467,7 +3473,7 @@ static void __init init_no_remapping_devices(void) drhd->ignored = 1; for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) - dev->dev.archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; + dev->archdata.iommu = DUMMY_DEVICE_DOMAIN_INFO; } } } @@ -3691,7 +3697,8 @@ int dmar_find_matched_atsr_unit(struct pci_dev *dev) { int i, ret = 1; struct pci_bus *bus; - struct pci_dev *bridge = NULL, *tmp; + struct pci_dev *bridge = NULL; + struct device *tmp; struct acpi_dmar_atsr *atsr; struct dmar_atsr_unit *atsru; @@ -3714,7 +3721,7 @@ int dmar_find_matched_atsr_unit(struct pci_dev *dev) continue; for_each_dev_scope(atsru->devices, atsru->devices_cnt, i, tmp) - if (tmp == bridge) + if (tmp == &bridge->dev) goto out; if (atsru->include_all) diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 0a92e4d978bc..23c8db129560 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -36,13 +36,19 @@ struct acpi_dmar_header; struct intel_iommu; +struct dmar_dev_scope { + struct device __rcu *dev; + u8 bus; + u8 devfn; +}; + #ifdef CONFIG_DMAR_TABLE extern struct acpi_table_header *dmar_tbl; struct dmar_drhd_unit { struct list_head list; /* list of drhd units */ struct acpi_dmar_header *hdr; /* ACPI header */ u64 reg_base_addr; /* register base address*/ - struct pci_dev __rcu **devices;/* target device array */ + struct dmar_dev_scope *devices;/* target device array */ int devices_cnt; /* target device count */ u16 segment; /* PCI domain */ u8 ignored:1; /* ignore drhd */ @@ -86,7 +92,7 @@ static inline bool dmar_rcu_check(void) #define dmar_rcu_dereference(p) rcu_dereference_check((p), dmar_rcu_check()) #define for_each_dev_scope(a, c, p, d) \ - for ((p) = 0; ((d) = (p) < (c) ? dmar_rcu_dereference((a)[(p)]) : \ + for ((p) = 0; ((d) = (p) < (c) ? dmar_rcu_dereference((a)[(p)].dev) : \ NULL, (p) < (c)); (p)++) #define for_each_active_dev_scope(a, c, p, d) \ @@ -95,15 +101,15 @@ static inline bool dmar_rcu_check(void) extern int dmar_table_init(void); extern int dmar_dev_scope_init(void); extern int dmar_parse_dev_scope(void *start, void *end, int *cnt, - struct pci_dev ***devices, u16 segment); + struct dmar_dev_scope **devices, u16 segment); extern void *dmar_alloc_dev_scope(void *start, void *end, int *cnt); -extern void dmar_free_dev_scope(struct pci_dev __rcu ***devices, int *cnt); +extern void dmar_free_dev_scope(struct dmar_dev_scope **devices, int *cnt); extern int dmar_insert_dev_scope(struct dmar_pci_notify_info *info, void *start, void*end, u16 segment, - struct pci_dev __rcu **devices, + struct dmar_dev_scope *devices, int devices_cnt); extern int dmar_remove_dev_scope(struct dmar_pci_notify_info *info, - u16 segment, struct pci_dev __rcu **devices, + u16 segment, struct dmar_dev_scope *devices, int count); /* Intel IOMMU detection */ extern int detect_intel_iommu(void); From ed40356b5fcf1ce28e026ab39c5b2b6939068b50 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 7 Mar 2014 23:15:42 +0000 Subject: [PATCH 42/75] iommu/vt-d: Add ACPI devices into dmaru->devices[] array Signed-off-by: David Woodhouse --- drivers/iommu/dmar.c | 75 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index c1e2e0c82e69..7ea086f61073 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -612,6 +612,79 @@ out: return dmaru; } +static void __init dmar_acpi_insert_dev_scope(u8 device_number, + struct acpi_device *adev) +{ + struct dmar_drhd_unit *dmaru; + struct acpi_dmar_hardware_unit *drhd; + struct acpi_dmar_device_scope *scope; + struct device *tmp; + int i; + struct acpi_dmar_pci_path *path; + + for_each_drhd_unit(dmaru) { + drhd = container_of(dmaru->hdr, + struct acpi_dmar_hardware_unit, + header); + + for (scope = (void *)(drhd + 1); + (unsigned long)scope < ((unsigned long)drhd) + drhd->header.length; + scope = ((void *)scope) + scope->length) { + if (scope->entry_type != ACPI_DMAR_SCOPE_TYPE_ACPI) + continue; + if (scope->enumeration_id != device_number) + continue; + + path = (void *)(scope + 1); + pr_info("ACPI device \"%s\" under DMAR at %llx as %02x:%02x.%d\n", + dev_name(&adev->dev), dmaru->reg_base_addr, + scope->bus, path->device, path->function); + for_each_dev_scope(dmaru->devices, dmaru->devices_cnt, i, tmp) + if (tmp == NULL) { + dmaru->devices[i].bus = scope->bus; + dmaru->devices[i].devfn = PCI_DEVFN(path->device, + path->function); + rcu_assign_pointer(dmaru->devices[i].dev, + get_device(&adev->dev)); + return; + } + BUG_ON(i >= dmaru->devices_cnt); + } + } + pr_warn("No IOMMU scope found for ANDD enumeration ID %d (%s)\n", + device_number, dev_name(&adev->dev)); +} + +static int __init dmar_acpi_dev_scope_init(void) +{ + struct acpi_dmar_andd *andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar); + + while (((unsigned long)andd) < + ((unsigned long)dmar_tbl) + dmar_tbl->length) { + if (andd->header.type == ACPI_DMAR_TYPE_ANDD) { + acpi_handle h; + struct acpi_device *adev; + + if (!ACPI_SUCCESS(acpi_get_handle(ACPI_ROOT_OBJECT, + andd->object_name, + &h))) { + pr_err("Failed to find handle for ACPI object %s\n", + andd->object_name); + continue; + } + acpi_bus_get_device(h, &adev); + if (!adev) { + pr_err("Failed to get device for ACPI object %s\n", + andd->object_name); + continue; + } + dmar_acpi_insert_dev_scope(andd->device_number, adev); + } + andd = ((void *)andd) + andd->header.length; + } + return 0; +} + int __init dmar_dev_scope_init(void) { struct pci_dev *dev = NULL; @@ -620,6 +693,8 @@ int __init dmar_dev_scope_init(void) if (dmar_dev_scope_status != 1) return dmar_dev_scope_status; + dmar_acpi_dev_scope_init(); + if (list_empty(&dmar_drhd_units)) { dmar_dev_scope_status = -ENODEV; } else { From 3d89194a94da0c49f9d21720df6bbaf7c848449c Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 6 Mar 2014 15:59:26 +0000 Subject: [PATCH 43/75] iommu/vt-d: Make iommu_dummy() take struct device instead of struct pci_dev Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index ace088eebb45..2079cb65d478 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2882,9 +2882,9 @@ static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev) return __get_valid_domain_for_dev(dev); } -static int iommu_dummy(struct pci_dev *pdev) +static int iommu_dummy(struct device *dev) { - return pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO; + return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO; } /* Check if the pdev needs to go through non-identity map and unmap process.*/ @@ -2896,13 +2896,13 @@ static int iommu_no_mapping(struct device *dev) if (unlikely(!dev_is_pci(dev))) return 1; - pdev = to_pci_dev(dev); - if (iommu_dummy(pdev)) + if (iommu_dummy(dev)) return 1; if (!iommu_identity_mapping) return 0; + pdev = to_pci_dev(dev); found = identity_mapping(pdev); if (found) { if (iommu_should_identity_map(pdev, 0)) @@ -3801,7 +3801,7 @@ static int device_notifier(struct notifier_block *nb, struct pci_dev *pdev = to_pci_dev(dev); struct dmar_domain *domain; - if (iommu_dummy(pdev)) + if (iommu_dummy(dev)) return 0; if (action != BUS_NOTIFY_UNBOUND_DRIVER && From 1525a29a7d619901aed0dc5f1eb0592fc881805a Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 6 Mar 2014 16:19:30 +0000 Subject: [PATCH 44/75] iommu/vt-d: Make dmar_insert_dev_info() take struct device instead of struct pci_dev Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 2079cb65d478..40dbafd376fb 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2113,15 +2113,14 @@ static void domain_remove_dev_info(struct dmar_domain *domain) /* * find_domain - * Note: we use struct pci_dev->dev.archdata.iommu stores the info + * Note: we use struct device->archdata.iommu stores the info */ -static struct dmar_domain * -find_domain(struct pci_dev *pdev) +static struct dmar_domain *find_domain(struct device *dev) { struct device_domain_info *info; /* No lock here, assumes no domain exit in normal case */ - info = pdev->dev.archdata.iommu; + info = dev->archdata.iommu; if (info) return info->domain; return NULL; @@ -2161,7 +2160,7 @@ static int dmar_insert_dev_info(int segment, int bus, int devfn, spin_lock_irqsave(&device_domain_lock, flags); if (dev) - found = find_domain(dev); + found = find_domain(&dev->dev); else found = dmar_search_domain_by_dev_info(segment, bus, devfn); if (found) { @@ -2193,7 +2192,7 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) int bus = 0, devfn = 0; int segment; - domain = find_domain(pdev); + domain = find_domain(&pdev->dev); if (domain) return domain; @@ -2252,7 +2251,7 @@ error: if (free) domain_exit(free); /* recheck it here, maybe others set it */ - return find_domain(pdev); + return find_domain(&pdev->dev); } static int iommu_identity_mapping; @@ -3108,7 +3107,7 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, if (iommu_no_mapping(dev)) return; - domain = find_domain(pdev); + domain = find_domain(dev); BUG_ON(!domain); iommu = domain_get_iommu(domain); @@ -3200,7 +3199,7 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, if (iommu_no_mapping(hwdev)) return; - domain = find_domain(pdev); + domain = find_domain(hwdev); BUG_ON(!domain); iommu = domain_get_iommu(domain); @@ -3808,7 +3807,7 @@ static int device_notifier(struct notifier_block *nb, action != BUS_NOTIFY_DEL_DEVICE) return 0; - domain = find_domain(pdev); + domain = find_domain(dev); if (!domain) return 0; @@ -4147,7 +4146,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, if (unlikely(domain_context_mapped(pdev))) { struct dmar_domain *old_domain; - old_domain = find_domain(pdev); + old_domain = find_domain(dev); if (old_domain) { if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE || dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) From 0bcb3e28c3c9b06a3ffab6238c517acdc851e625 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 6 Mar 2014 17:12:03 +0000 Subject: [PATCH 45/75] iommu/vt-d: Use struct device in device_domain_info, not struct pci_dev Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 54 ++++++++++++++++++++++--------------- 1 file changed, 32 insertions(+), 22 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 40dbafd376fb..dc84147cd5c9 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -372,7 +372,7 @@ struct device_domain_info { int segment; /* PCI domain */ u8 bus; /* PCI bus number */ u8 devfn; /* PCI devfn number */ - struct pci_dev *dev; /* it's NULL for PCIe-to-PCI bridge */ + struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ struct intel_iommu *iommu; /* IOMMU used by this device */ struct dmar_domain *domain; /* pointer to domain */ }; @@ -428,7 +428,7 @@ static void domain_remove_dev_info(struct dmar_domain *domain); static void domain_remove_one_dev_info(struct dmar_domain *domain, struct pci_dev *pdev); static void iommu_detach_dependent_devices(struct intel_iommu *iommu, - struct pci_dev *pdev); + struct device *dev); #ifdef CONFIG_INTEL_IOMMU_DEFAULT_ON int dmar_disabled = 0; @@ -1247,6 +1247,7 @@ static struct device_domain_info *iommu_support_dev_iotlb( unsigned long flags; struct device_domain_info *info; struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn); + struct pci_dev *pdev; if (!ecap_dev_iotlb_support(iommu->ecap)) return NULL; @@ -1262,13 +1263,15 @@ static struct device_domain_info *iommu_support_dev_iotlb( } spin_unlock_irqrestore(&device_domain_lock, flags); - if (!found || !info->dev) + if (!found || !info->dev || !dev_is_pci(info->dev)) return NULL; - if (!pci_find_ext_capability(info->dev, PCI_EXT_CAP_ID_ATS)) + pdev = to_pci_dev(info->dev); + + if (!pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_ATS)) return NULL; - if (!dmar_find_matched_atsr_unit(info->dev)) + if (!dmar_find_matched_atsr_unit(pdev)) return NULL; info->iommu = iommu; @@ -1278,18 +1281,19 @@ static struct device_domain_info *iommu_support_dev_iotlb( static void iommu_enable_dev_iotlb(struct device_domain_info *info) { - if (!info) + if (!info || !dev_is_pci(info->dev)) return; - pci_enable_ats(info->dev, VTD_PAGE_SHIFT); + pci_enable_ats(to_pci_dev(info->dev), VTD_PAGE_SHIFT); } static void iommu_disable_dev_iotlb(struct device_domain_info *info) { - if (!info->dev || !pci_ats_enabled(info->dev)) + if (!info->dev || !dev_is_pci(info->dev) || + !pci_ats_enabled(to_pci_dev(info->dev))) return; - pci_disable_ats(info->dev); + pci_disable_ats(to_pci_dev(info->dev)); } static void iommu_flush_dev_iotlb(struct dmar_domain *domain, @@ -1301,11 +1305,16 @@ static void iommu_flush_dev_iotlb(struct dmar_domain *domain, spin_lock_irqsave(&device_domain_lock, flags); list_for_each_entry(info, &domain->devices, link) { - if (!info->dev || !pci_ats_enabled(info->dev)) + struct pci_dev *pdev; + if (!info->dev || !dev_is_pci(info->dev)) + continue; + + pdev = to_pci_dev(info->dev); + if (!pci_ats_enabled(pdev)) continue; sid = info->bus << 8 | info->devfn; - qdep = pci_ats_queue_depth(info->dev); + qdep = pci_ats_queue_depth(pdev); qi_flush_dev_iotlb(info->iommu, sid, qdep, addr, mask); } spin_unlock_irqrestore(&device_domain_lock, flags); @@ -2071,7 +2080,7 @@ static inline void unlink_domain_info(struct device_domain_info *info) list_del(&info->link); list_del(&info->global); if (info->dev) - info->dev->dev.archdata.iommu = NULL; + info->dev->archdata.iommu = NULL; } static void domain_remove_dev_info(struct dmar_domain *domain) @@ -2140,7 +2149,7 @@ dmar_search_domain_by_dev_info(int segment, int bus, int devfn) } static int dmar_insert_dev_info(int segment, int bus, int devfn, - struct pci_dev *dev, struct dmar_domain **domp) + struct device *dev, struct dmar_domain **domp) { struct dmar_domain *found, *domain = *domp; struct device_domain_info *info; @@ -2160,7 +2169,7 @@ static int dmar_insert_dev_info(int segment, int bus, int devfn, spin_lock_irqsave(&device_domain_lock, flags); if (dev) - found = find_domain(&dev->dev); + found = find_domain(dev); else found = dmar_search_domain_by_dev_info(segment, bus, devfn); if (found) { @@ -2174,7 +2183,7 @@ static int dmar_insert_dev_info(int segment, int bus, int devfn, list_add(&info->link, &domain->devices); list_add(&info->global, &device_domain_list); if (dev) - dev->dev.archdata.iommu = info; + dev->archdata.iommu = info; spin_unlock_irqrestore(&device_domain_lock, flags); } @@ -2245,7 +2254,7 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) found_domain: if (dmar_insert_dev_info(segment, pdev->bus->number, pdev->devfn, - pdev, &domain) == 0) + &pdev->dev, &domain) == 0) return domain; error: if (free) @@ -2458,7 +2467,7 @@ static int domain_add_dev_info(struct dmar_domain *domain, info->segment = pci_domain_nr(pdev->bus); info->bus = pdev->bus->number; info->devfn = pdev->devfn; - info->dev = pdev; + info->dev = &pdev->dev; info->domain = domain; spin_lock_irqsave(&device_domain_lock, flags); @@ -3189,7 +3198,6 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, enum dma_data_direction dir, struct dma_attrs *attrs) { - struct pci_dev *pdev = to_pci_dev(hwdev); struct dmar_domain *domain; unsigned long start_pfn, last_pfn; struct iova *iova; @@ -3985,13 +3993,15 @@ out_free_dmar: } static void iommu_detach_dependent_devices(struct intel_iommu *iommu, - struct pci_dev *pdev) + struct device *dev) { - struct pci_dev *tmp, *parent; + struct pci_dev *tmp, *parent, *pdev; - if (!iommu || !pdev) + if (!iommu || !dev || !dev_is_pci(dev)) return; + pdev = to_pci_dev(dev); + /* dependent device detach */ tmp = pci_find_upstream_pcie_bridge(pdev); /* Secondary interface's bus number and devfn 0 */ @@ -4034,7 +4044,7 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain, iommu_disable_dev_iotlb(info); iommu_detach_dev(iommu, info->bus, info->devfn); - iommu_detach_dependent_devices(iommu, pdev); + iommu_detach_dependent_devices(iommu, &pdev->dev); free_devinfo_mem(info); spin_lock_irqsave(&device_domain_lock, flags); From 64ae892bfee37c0f982710363c39474218028e33 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 9 Mar 2014 12:52:30 -0700 Subject: [PATCH 46/75] iommu/vt-d: Pass iommu to domain_context_mapping_one() and iommu_support_dev_iotlb() Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 35 +++++++++++++++++------------------ 1 file changed, 17 insertions(+), 18 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index dc84147cd5c9..1c43a7b3008a 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1240,13 +1240,13 @@ static void __iommu_flush_iotlb(struct intel_iommu *iommu, u16 did, (unsigned long long)DMA_TLB_IAIG(val)); } -static struct device_domain_info *iommu_support_dev_iotlb( - struct dmar_domain *domain, int segment, u8 bus, u8 devfn) +static struct device_domain_info * +iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu, + u8 bus, u8 devfn) { int found = 0; unsigned long flags; struct device_domain_info *info; - struct intel_iommu *iommu = device_to_iommu(segment, bus, devfn); struct pci_dev *pdev; if (!ecap_dev_iotlb_support(iommu->ecap)) @@ -1700,12 +1700,12 @@ static void domain_exit(struct dmar_domain *domain) free_domain_mem(domain); } -static int domain_context_mapping_one(struct dmar_domain *domain, int segment, - u8 bus, u8 devfn, int translation) +static int domain_context_mapping_one(struct dmar_domain *domain, + struct intel_iommu *iommu, + u8 bus, u8 devfn, int translation) { struct context_entry *context; unsigned long flags; - struct intel_iommu *iommu; struct dma_pte *pgd; unsigned long num; unsigned long ndomains; @@ -1720,10 +1720,6 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment, BUG_ON(translation != CONTEXT_TT_PASS_THROUGH && translation != CONTEXT_TT_MULTI_LEVEL); - iommu = device_to_iommu(segment, bus, devfn); - if (!iommu) - return -ENODEV; - context = device_to_context_entry(iommu, bus, devfn); if (!context) return -ENOMEM; @@ -1781,7 +1777,7 @@ static int domain_context_mapping_one(struct dmar_domain *domain, int segment, context_set_domain_id(context, id); if (translation != CONTEXT_TT_PASS_THROUGH) { - info = iommu_support_dev_iotlb(domain, segment, bus, devfn); + info = iommu_support_dev_iotlb(domain, iommu, bus, devfn); translation = info ? CONTEXT_TT_DEV_IOTLB : CONTEXT_TT_MULTI_LEVEL; } @@ -1836,8 +1832,14 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev, { int ret; struct pci_dev *tmp, *parent; + struct intel_iommu *iommu; - ret = domain_context_mapping_one(domain, pci_domain_nr(pdev->bus), + iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number, + pdev->devfn); + if (!iommu) + return -ENODEV; + + ret = domain_context_mapping_one(domain, iommu, pdev->bus->number, pdev->devfn, translation); if (ret) @@ -1850,8 +1852,7 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev, /* Secondary interface's bus number and devfn 0 */ parent = pdev->bus->self; while (parent != tmp) { - ret = domain_context_mapping_one(domain, - pci_domain_nr(parent->bus), + ret = domain_context_mapping_one(domain, iommu, parent->bus->number, parent->devfn, translation); if (ret) @@ -1859,13 +1860,11 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev, parent = parent->bus->self; } if (pci_is_pcie(tmp)) /* this is a PCIe-to-PCI bridge */ - return domain_context_mapping_one(domain, - pci_domain_nr(tmp->subordinate), + return domain_context_mapping_one(domain, iommu, tmp->subordinate->number, 0, translation); else /* this is a legacy PCI bridge */ - return domain_context_mapping_one(domain, - pci_domain_nr(tmp->bus), + return domain_context_mapping_one(domain, iommu, tmp->bus->number, tmp->devfn, translation); From b718cd3d8412edaf02665d29b0cf5ef828675a45 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 9 Mar 2014 13:11:33 -0700 Subject: [PATCH 47/75] iommu/vt-d: Stop dmar_insert_dev_info() freeing domains on losing race By moving this into get_domain_for_dev() we can make dmar_insert_dev_info() suitable for use with "special" domains such as the si_domain, which currently use domain_add_dev_info(). Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 45 +++++++++++++++++-------------------- 1 file changed, 21 insertions(+), 24 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 1c43a7b3008a..c1c564233768 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2147,16 +2147,17 @@ dmar_search_domain_by_dev_info(int segment, int bus, int devfn) return NULL; } -static int dmar_insert_dev_info(int segment, int bus, int devfn, - struct device *dev, struct dmar_domain **domp) +static struct dmar_domain *dmar_insert_dev_info(int segment, int bus, int devfn, + struct device *dev, + struct dmar_domain *domain) { - struct dmar_domain *found, *domain = *domp; + struct dmar_domain *found; struct device_domain_info *info; unsigned long flags; info = alloc_devinfo_mem(); if (!info) - return -ENOMEM; + return NULL; info->segment = segment; info->bus = bus; @@ -2174,19 +2175,17 @@ static int dmar_insert_dev_info(int segment, int bus, int devfn, if (found) { spin_unlock_irqrestore(&device_domain_lock, flags); free_devinfo_mem(info); - if (found != domain) { - domain_exit(domain); - *domp = found; - } - } else { - list_add(&info->link, &domain->devices); - list_add(&info->global, &device_domain_list); - if (dev) - dev->archdata.iommu = info; - spin_unlock_irqrestore(&device_domain_lock, flags); + /* Caller must free the original domain */ + return found; } - return 0; + list_add(&info->link, &domain->devices); + list_add(&info->global, &device_domain_list); + if (dev) + dev->archdata.iommu = info; + spin_unlock_irqrestore(&device_domain_lock, flags); + + return domain; } /* domain is initialized */ @@ -2245,21 +2244,19 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) /* register pcie-to-pci device */ if (dev_tmp) { - if (dmar_insert_dev_info(segment, bus, devfn, NULL, &domain)) + domain = dmar_insert_dev_info(segment, bus, devfn, NULL, domain); + if (!domain) goto error; - else - free = NULL; } found_domain: - if (dmar_insert_dev_info(segment, pdev->bus->number, pdev->devfn, - &pdev->dev, &domain) == 0) - return domain; + domain = dmar_insert_dev_info(segment, pdev->bus->number, pdev->devfn, + &pdev->dev, domain); error: - if (free) + if (free != domain) domain_exit(free); - /* recheck it here, maybe others set it */ - return find_domain(&pdev->dev); + + return domain; } static int iommu_identity_mapping; From 0ac72664853b3181a437afb02a86d7c6f792e031 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 9 Mar 2014 13:19:22 -0700 Subject: [PATCH 48/75] iommu/vt-d: use dmar_insert_dev_info() from dma_add_dev_info() Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index c1c564233768..7d3d33f5c8dc 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2452,29 +2452,21 @@ static int domain_add_dev_info(struct dmar_domain *domain, struct pci_dev *pdev, int translation) { + struct dmar_domain *ndomain; struct device_domain_info *info; unsigned long flags; int ret; - info = alloc_devinfo_mem(); - if (!info) - return -ENOMEM; - - info->segment = pci_domain_nr(pdev->bus); - info->bus = pdev->bus->number; - info->devfn = pdev->devfn; - info->dev = &pdev->dev; - info->domain = domain; - - spin_lock_irqsave(&device_domain_lock, flags); - list_add(&info->link, &domain->devices); - list_add(&info->global, &device_domain_list); - pdev->dev.archdata.iommu = info; - spin_unlock_irqrestore(&device_domain_lock, flags); + ndomain = dmar_insert_dev_info(pci_domain_nr(pdev->bus), + pdev->bus->number, pdev->devfn, + &pdev->dev, domain); + if (ndomain != domain) + return -EBUSY; ret = domain_context_mapping(domain, pdev, translation); if (ret) { spin_lock_irqsave(&device_domain_lock, flags); + info = pdev->dev.archdata.iommu; unlink_domain_info(info); spin_unlock_irqrestore(&device_domain_lock, flags); free_devinfo_mem(info); From e2f8c5f6d45b092b52adea9d71018ef11250b924 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 9 Mar 2014 13:25:07 -0700 Subject: [PATCH 49/75] iommu/vt-d: Use domain_remove_one_dev_info() in domain_add_dev_info() error path Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 7d3d33f5c8dc..84f0d4284125 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2453,8 +2453,6 @@ static int domain_add_dev_info(struct dmar_domain *domain, int translation) { struct dmar_domain *ndomain; - struct device_domain_info *info; - unsigned long flags; int ret; ndomain = dmar_insert_dev_info(pci_domain_nr(pdev->bus), @@ -2465,11 +2463,7 @@ static int domain_add_dev_info(struct dmar_domain *domain, ret = domain_context_mapping(domain, pdev, translation); if (ret) { - spin_lock_irqsave(&device_domain_lock, flags); - info = pdev->dev.archdata.iommu; - unlink_domain_info(info); - spin_unlock_irqrestore(&device_domain_lock, flags); - free_devinfo_mem(info); + domain_remove_one_dev_info(domain, pdev); return ret; } From 5a8f40e8c8801a9805bbe60d140ed2b0b3b91d18 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 9 Mar 2014 13:31:18 -0700 Subject: [PATCH 50/75] iommu/vt-d: Always store iommu in device_domain_info Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 47 ++++++++++++++++++++++++------------- 1 file changed, 31 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 84f0d4284125..8303f256fe84 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1274,8 +1274,6 @@ iommu_support_dev_iotlb (struct dmar_domain *domain, struct intel_iommu *iommu, if (!dmar_find_matched_atsr_unit(pdev)) return NULL; - info->iommu = iommu; - return info; } @@ -2134,7 +2132,7 @@ static struct dmar_domain *find_domain(struct device *dev) return NULL; } -static inline struct dmar_domain * +static inline struct device_domain_info * dmar_search_domain_by_dev_info(int segment, int bus, int devfn) { struct device_domain_info *info; @@ -2142,16 +2140,17 @@ dmar_search_domain_by_dev_info(int segment, int bus, int devfn) list_for_each_entry(info, &device_domain_list, global) if (info->segment == segment && info->bus == bus && info->devfn == devfn) - return info->domain; + return info; return NULL; } -static struct dmar_domain *dmar_insert_dev_info(int segment, int bus, int devfn, +static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu, + int segment, int bus, int devfn, struct device *dev, struct dmar_domain *domain) { - struct dmar_domain *found; + struct dmar_domain *found = NULL; struct device_domain_info *info; unsigned long flags; @@ -2164,14 +2163,19 @@ static struct dmar_domain *dmar_insert_dev_info(int segment, int bus, int devfn, info->devfn = devfn; info->dev = dev; info->domain = domain; + info->iommu = iommu; if (!dev) domain->flags |= DOMAIN_FLAG_P2P_MULTIPLE_DEVICES; spin_lock_irqsave(&device_domain_lock, flags); if (dev) found = find_domain(dev); - else - found = dmar_search_domain_by_dev_info(segment, bus, devfn); + else { + struct device_domain_info *info2; + info2 = dmar_search_domain_by_dev_info(segment, bus, devfn); + if (info2) + found = info2->domain; + } if (found) { spin_unlock_irqrestore(&device_domain_lock, flags); free_devinfo_mem(info); @@ -2192,7 +2196,8 @@ static struct dmar_domain *dmar_insert_dev_info(int segment, int bus, int devfn, static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) { struct dmar_domain *domain, *free = NULL; - struct intel_iommu *iommu; + struct intel_iommu *iommu = NULL; + struct device_domain_info *info; struct dmar_drhd_unit *drhd; struct pci_dev *dev_tmp; unsigned long flags; @@ -2215,10 +2220,13 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) devfn = dev_tmp->devfn; } spin_lock_irqsave(&device_domain_lock, flags); - domain = dmar_search_domain_by_dev_info(segment, bus, devfn); + info = dmar_search_domain_by_dev_info(segment, bus, devfn); + if (info) { + iommu = info->iommu; + domain = info->domain; + } spin_unlock_irqrestore(&device_domain_lock, flags); - /* pcie-pci bridge already has a domain, uses it */ - if (domain) + if (info) goto found_domain; } @@ -2244,14 +2252,15 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) /* register pcie-to-pci device */ if (dev_tmp) { - domain = dmar_insert_dev_info(segment, bus, devfn, NULL, domain); + domain = dmar_insert_dev_info(iommu, segment, bus, devfn, NULL, + domain); if (!domain) goto error; } found_domain: - domain = dmar_insert_dev_info(segment, pdev->bus->number, pdev->devfn, - &pdev->dev, domain); + domain = dmar_insert_dev_info(iommu, segment, pdev->bus->number, + pdev->devfn, &pdev->dev, domain); error: if (free != domain) domain_exit(free); @@ -2453,9 +2462,15 @@ static int domain_add_dev_info(struct dmar_domain *domain, int translation) { struct dmar_domain *ndomain; + struct intel_iommu *iommu; int ret; - ndomain = dmar_insert_dev_info(pci_domain_nr(pdev->bus), + iommu = device_to_iommu(pci_domain_nr(pdev->bus), + pdev->bus->number, pdev->devfn); + if (!iommu) + return -ENODEV; + + ndomain = dmar_insert_dev_info(iommu, pci_domain_nr(pdev->bus), pdev->bus->number, pdev->devfn, &pdev->dev, domain); if (ndomain != domain) From 8bbc4410129c0919e5943012489427c5e050a63e Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 9 Mar 2014 13:52:37 -0700 Subject: [PATCH 51/75] iommu/vt-d: Simplify iommu check in domain_remove_one_dev_info() Now we store the iommu in the device_domain_info, we don't need to do a lookup. Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 8303f256fe84..dc322d0238a0 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4056,8 +4056,7 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain, * owned by this domain, clear this iommu in iommu_bmp * update iommu count and coherency */ - if (iommu == device_to_iommu(info->segment, info->bus, - info->devfn)) + if (info->iommu == iommu) found = 1; } From 7c7faa11ecf3eec17699ae73fc6e336cbf993081 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 9 Mar 2014 13:33:06 -0700 Subject: [PATCH 52/75] iommu/vt-d: Remove device_to_iommu() call from domain_remove_dev_info() This was problematic because it works by domain/bus/devfn and we want to make device_to_iommu() use only a struct device * (for handling non-PCI devices). Now that the iommu pointer is reliably stored in the device_domain_info, we don't need to look it up. Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index dc322d0238a0..aa8548c8a915 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2084,7 +2084,6 @@ static void domain_remove_dev_info(struct dmar_domain *domain) { struct device_domain_info *info; unsigned long flags, flags2; - struct intel_iommu *iommu; spin_lock_irqsave(&device_domain_lock, flags); while (!list_empty(&domain->devices)) { @@ -2094,16 +2093,15 @@ static void domain_remove_dev_info(struct dmar_domain *domain) spin_unlock_irqrestore(&device_domain_lock, flags); iommu_disable_dev_iotlb(info); - iommu = device_to_iommu(info->segment, info->bus, info->devfn); - iommu_detach_dev(iommu, info->bus, info->devfn); + iommu_detach_dev(info->iommu, info->bus, info->devfn); if (domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) { - iommu_detach_dependent_devices(iommu, info->dev); + iommu_detach_dependent_devices(info->iommu, info->dev); /* clear this iommu in iommu_bmp, update iommu count * and capabilities */ spin_lock_irqsave(&domain->iommu_lock, flags2); - if (test_and_clear_bit(iommu->seq_id, + if (test_and_clear_bit(info->iommu->seq_id, domain->iommu_bmp)) { domain->iommu_count--; domain_update_iommu_cap(domain); From 67ccac41fafda88492620f4c0a30d4ccb2eb7767 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 9 Mar 2014 13:49:45 -0700 Subject: [PATCH 53/75] iommu/vt-d: Store PCI segment number in struct intel_iommu Signed-off-by: David Woodhouse --- drivers/iommu/dmar.c | 1 + include/linux/intel-iommu.h | 1 + 2 files changed, 2 insertions(+) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 7ea086f61073..56e1c79dc77f 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -952,6 +952,7 @@ static int alloc_iommu(struct dmar_drhd_unit *drhd) } iommu->agaw = agaw; iommu->msagaw = msagaw; + iommu->segment = drhd->segment; iommu->node = -1; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 2c4bed593b32..0a2da5188217 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -319,6 +319,7 @@ struct intel_iommu { int agaw; /* agaw of this iommu */ int msagaw; /* max sagaw of this iommu */ unsigned int irq; + u16 segment; /* PCI segment# */ unsigned char name[13]; /* Device Name */ #ifdef CONFIG_INTEL_IOMMU From 41e80dca52bd0497c1bb292713fc0164415e9ae6 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 9 Mar 2014 13:55:54 -0700 Subject: [PATCH 54/75] iommu/vt-d: Remove segment from struct device_domain_info() It's accessible via info->iommu->segment so this is redundant. Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index aa8548c8a915..5c89eb7ae398 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -369,7 +369,6 @@ struct dmar_domain { struct device_domain_info { struct list_head link; /* link to domain siblings */ struct list_head global; /* link to global list */ - int segment; /* PCI domain */ u8 bus; /* PCI bus number */ u8 devfn; /* PCI devfn number */ struct device *dev; /* it's NULL for PCIe-to-PCI bridge */ @@ -2136,7 +2135,7 @@ dmar_search_domain_by_dev_info(int segment, int bus, int devfn) struct device_domain_info *info; list_for_each_entry(info, &device_domain_list, global) - if (info->segment == segment && info->bus == bus && + if (info->iommu->segment == segment && info->bus == bus && info->devfn == devfn) return info; @@ -2144,7 +2143,7 @@ dmar_search_domain_by_dev_info(int segment, int bus, int devfn) } static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu, - int segment, int bus, int devfn, + int bus, int devfn, struct device *dev, struct dmar_domain *domain) { @@ -2156,7 +2155,6 @@ static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu, if (!info) return NULL; - info->segment = segment; info->bus = bus; info->devfn = devfn; info->dev = dev; @@ -2170,7 +2168,7 @@ static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu, found = find_domain(dev); else { struct device_domain_info *info2; - info2 = dmar_search_domain_by_dev_info(segment, bus, devfn); + info2 = dmar_search_domain_by_dev_info(iommu->segment, bus, devfn); if (info2) found = info2->domain; } @@ -2250,14 +2248,14 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) /* register pcie-to-pci device */ if (dev_tmp) { - domain = dmar_insert_dev_info(iommu, segment, bus, devfn, NULL, + domain = dmar_insert_dev_info(iommu, bus, devfn, NULL, domain); if (!domain) goto error; } found_domain: - domain = dmar_insert_dev_info(iommu, segment, pdev->bus->number, + domain = dmar_insert_dev_info(iommu, pdev->bus->number, pdev->devfn, &pdev->dev, domain); error: if (free != domain) @@ -2468,8 +2466,7 @@ static int domain_add_dev_info(struct dmar_domain *domain, if (!iommu) return -ENODEV; - ndomain = dmar_insert_dev_info(iommu, pci_domain_nr(pdev->bus), - pdev->bus->number, pdev->devfn, + ndomain = dmar_insert_dev_info(iommu, pdev->bus->number, pdev->devfn, &pdev->dev, domain); if (ndomain != domain) return -EBUSY; @@ -4031,7 +4028,7 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain, spin_lock_irqsave(&device_domain_lock, flags); list_for_each_entry_safe(info, tmp, &domain->devices, link) { - if (info->segment == pci_domain_nr(pdev->bus) && + if (info->iommu->segment == pci_domain_nr(pdev->bus) && info->bus == pdev->bus->number && info->devfn == pdev->devfn) { unlink_domain_info(info); From 9b226624bbf7102cee67b6459bcb9c66dd081ca7 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 9 Mar 2014 14:03:28 -0700 Subject: [PATCH 55/75] iommu/vt-d: Make identity_mapping() take struct device not struct pci_dev Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 5c89eb7ae398..1030230d4851 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2439,14 +2439,14 @@ static int __init si_domain_init(int hw) return 0; } -static int identity_mapping(struct pci_dev *pdev) +static int identity_mapping(struct device *dev) { struct device_domain_info *info; if (likely(!iommu_identity_mapping)) return 0; - info = pdev->dev.archdata.iommu; + info = dev->archdata.iommu; if (info && info != DUMMY_DEVICE_DOMAIN_INFO) return (info->domain == si_domain); @@ -2903,7 +2903,7 @@ static int iommu_no_mapping(struct device *dev) return 0; pdev = to_pci_dev(dev); - found = identity_mapping(pdev); + found = identity_mapping(dev); if (found) { if (iommu_should_identity_map(pdev, 0)) return 1; From 156baca8d31e1aced2c8a14262637aef1ab416b9 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 9 Mar 2014 14:00:57 -0700 Subject: [PATCH 56/75] iommu/vt-d: Make device_to_iommu() cope with non-PCI devices Pass the struct device to it, and also make it return the bus/devfn to use, since that is also stored in the DMAR table. Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 78 ++++++++++++++++++++++--------------- 1 file changed, 46 insertions(+), 32 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 1030230d4851..cfc5eef81b82 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -664,37 +664,53 @@ static void domain_update_iommu_cap(struct dmar_domain *domain) domain_update_iommu_superpage(domain); } -static struct intel_iommu *device_to_iommu(int segment, u8 bus, u8 devfn) +static struct intel_iommu *device_to_iommu(struct device *dev, u8 *bus, u8 *devfn) { struct dmar_drhd_unit *drhd = NULL; struct intel_iommu *iommu; - struct device *dev; - struct pci_dev *pdev; + struct device *tmp; + struct pci_dev *ptmp, *pdev = NULL; + u16 segment; int i; + if (dev_is_pci(dev)) { + pdev = to_pci_dev(dev); + segment = pci_domain_nr(pdev->bus); + } else if (ACPI_COMPANION(dev)) + dev = &ACPI_COMPANION(dev)->dev; + rcu_read_lock(); for_each_active_iommu(iommu, drhd) { - if (segment != drhd->segment) + if (pdev && segment != drhd->segment) continue; for_each_active_dev_scope(drhd->devices, - drhd->devices_cnt, i, dev) { - if (!dev_is_pci(dev)) + drhd->devices_cnt, i, tmp) { + if (tmp == dev) { + *bus = drhd->devices[i].bus; + *devfn = drhd->devices[i].devfn; + goto out; + } + + if (!pdev || !dev_is_pci(tmp)) continue; - pdev = to_pci_dev(dev); - if (pdev->bus->number == bus && pdev->devfn == devfn) - goto out; - if (pdev->subordinate && - pdev->subordinate->number <= bus && - pdev->subordinate->busn_res.end >= bus) - goto out; + + ptmp = to_pci_dev(tmp); + if (ptmp->subordinate && + ptmp->subordinate->number <= pdev->bus->number && + ptmp->subordinate->busn_res.end >= pdev->bus->number) + goto got_pdev; } - if (drhd->include_all) + if (pdev && drhd->include_all) { + got_pdev: + *bus = pdev->bus->number; + *devfn = pdev->devfn; goto out; + } } iommu = NULL; -out: + out: rcu_read_unlock(); return iommu; @@ -1830,14 +1846,13 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev, int ret; struct pci_dev *tmp, *parent; struct intel_iommu *iommu; + u8 bus, devfn; - iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number, - pdev->devfn); + iommu = device_to_iommu(&pdev->dev, &bus, &devfn); if (!iommu) return -ENODEV; - ret = domain_context_mapping_one(domain, iommu, - pdev->bus->number, pdev->devfn, + ret = domain_context_mapping_one(domain, iommu, bus, devfn, translation); if (ret) return ret; @@ -1872,13 +1887,13 @@ static int domain_context_mapped(struct pci_dev *pdev) int ret; struct pci_dev *tmp, *parent; struct intel_iommu *iommu; + u8 bus, devfn; - iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number, - pdev->devfn); + iommu = device_to_iommu(&pdev->dev, &bus, &devfn); if (!iommu) return -ENODEV; - ret = device_context_mapped(iommu, pdev->bus->number, pdev->devfn); + ret = device_context_mapped(iommu, bus, devfn); if (!ret) return ret; /* dependent device mapping */ @@ -2459,15 +2474,14 @@ static int domain_add_dev_info(struct dmar_domain *domain, { struct dmar_domain *ndomain; struct intel_iommu *iommu; + u8 bus, devfn; int ret; - iommu = device_to_iommu(pci_domain_nr(pdev->bus), - pdev->bus->number, pdev->devfn); + iommu = device_to_iommu(&pdev->dev, &bus, &devfn); if (!iommu) return -ENODEV; - ndomain = dmar_insert_dev_info(iommu, pdev->bus->number, pdev->devfn, - &pdev->dev, domain); + ndomain = dmar_insert_dev_info(iommu, bus, devfn, &pdev->dev, domain); if (ndomain != domain) return -EBUSY; @@ -4020,9 +4034,9 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain, struct intel_iommu *iommu; unsigned long flags; int found = 0; + u8 bus, devfn; - iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number, - pdev->devfn); + iommu = device_to_iommu(&pdev->dev, &bus, &devfn); if (!iommu) return; @@ -4142,6 +4156,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, struct pci_dev *pdev = to_pci_dev(dev); struct intel_iommu *iommu; int addr_width; + u8 bus, devfn; /* normally pdev is not mapped */ if (unlikely(domain_context_mapped(pdev))) { @@ -4157,8 +4172,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, } } - iommu = device_to_iommu(pci_domain_nr(pdev->bus), pdev->bus->number, - pdev->devfn); + iommu = device_to_iommu(dev, &bus, &devfn); if (!iommu) return -ENODEV; @@ -4324,9 +4338,9 @@ static int intel_iommu_add_device(struct device *dev) struct pci_dev *bridge, *dma_pdev = NULL; struct iommu_group *group; int ret; + u8 bus, devfn; - if (!device_to_iommu(pci_domain_nr(pdev->bus), - pdev->bus->number, pdev->devfn)) + if (!device_to_iommu(dev, &bus, &devfn)) return -ENODEV; bridge = pci_find_upstream_pcie_bridge(pdev); From e1f167f3fd69d794b570fc4d3159191568ff9b70 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 9 Mar 2014 15:24:46 -0700 Subject: [PATCH 57/75] iommu/vt-d: Make domain_context_mapp{ed,ing}() take struct device Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 31 +++++++++++++++++-------------- 1 file changed, 17 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index cfc5eef81b82..949aa29dba8b 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -1840,24 +1840,25 @@ static int domain_context_mapping_one(struct dmar_domain *domain, } static int -domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev, - int translation) +domain_context_mapping(struct dmar_domain *domain, struct device *dev, + int translation) { int ret; - struct pci_dev *tmp, *parent; + struct pci_dev *pdev, *tmp, *parent; struct intel_iommu *iommu; u8 bus, devfn; - iommu = device_to_iommu(&pdev->dev, &bus, &devfn); + iommu = device_to_iommu(dev, &bus, &devfn); if (!iommu) return -ENODEV; ret = domain_context_mapping_one(domain, iommu, bus, devfn, translation); - if (ret) + if (ret || !dev_is_pci(dev)) return ret; /* dependent device mapping */ + pdev = to_pci_dev(dev); tmp = pci_find_upstream_pcie_bridge(pdev); if (!tmp) return 0; @@ -1882,21 +1883,23 @@ domain_context_mapping(struct dmar_domain *domain, struct pci_dev *pdev, translation); } -static int domain_context_mapped(struct pci_dev *pdev) +static int domain_context_mapped(struct device *dev) { int ret; - struct pci_dev *tmp, *parent; + struct pci_dev *pdev, *tmp, *parent; struct intel_iommu *iommu; u8 bus, devfn; - iommu = device_to_iommu(&pdev->dev, &bus, &devfn); + iommu = device_to_iommu(dev, &bus, &devfn); if (!iommu) return -ENODEV; ret = device_context_mapped(iommu, bus, devfn); - if (!ret) + if (!ret || !dev_is_pci(dev)) return ret; + /* dependent device mapping */ + pdev = to_pci_dev(dev); tmp = pci_find_upstream_pcie_bridge(pdev); if (!tmp) return ret; @@ -2361,7 +2364,7 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev, goto error; /* context entry init */ - ret = domain_context_mapping(domain, pdev, CONTEXT_TT_MULTI_LEVEL); + ret = domain_context_mapping(domain, &pdev->dev, CONTEXT_TT_MULTI_LEVEL); if (ret) goto error; @@ -2485,7 +2488,7 @@ static int domain_add_dev_info(struct dmar_domain *domain, if (ndomain != domain) return -EBUSY; - ret = domain_context_mapping(domain, pdev, translation); + ret = domain_context_mapping(domain, &pdev->dev, translation); if (ret) { domain_remove_one_dev_info(domain, pdev); return ret; @@ -2870,8 +2873,8 @@ static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev) } /* make sure context mapping is ok */ - if (unlikely(!domain_context_mapped(pdev))) { - ret = domain_context_mapping(domain, pdev, + if (unlikely(!domain_context_mapped(&pdev->dev))) { + ret = domain_context_mapping(domain, &pdev->dev, CONTEXT_TT_MULTI_LEVEL); if (ret) { printk(KERN_ERR @@ -4159,7 +4162,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, u8 bus, devfn; /* normally pdev is not mapped */ - if (unlikely(domain_context_mapped(pdev))) { + if (unlikely(domain_context_mapped(&pdev->dev))) { struct dmar_domain *old_domain; old_domain = find_domain(dev); From 146922ec798de6484897a43fc6180e49c425f183 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 9 Mar 2014 15:44:17 -0700 Subject: [PATCH 58/75] iommu/vt-d: Make get_domain_for_dev() take struct device Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 75 ++++++++++++++++++------------------- 1 file changed, 36 insertions(+), 39 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 949aa29dba8b..1c5f656ff19d 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2207,52 +2207,51 @@ static struct dmar_domain *dmar_insert_dev_info(struct intel_iommu *iommu, } /* domain is initialized */ -static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) +static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw) { struct dmar_domain *domain, *free = NULL; struct intel_iommu *iommu = NULL; struct device_domain_info *info; - struct dmar_drhd_unit *drhd; - struct pci_dev *dev_tmp; + struct pci_dev *dev_tmp = NULL; unsigned long flags; - int bus = 0, devfn = 0; - int segment; + u8 bus, devfn, bridge_bus, bridge_devfn; - domain = find_domain(&pdev->dev); + domain = find_domain(dev); if (domain) return domain; - segment = pci_domain_nr(pdev->bus); + if (dev_is_pci(dev)) { + struct pci_dev *pdev = to_pci_dev(dev); + u16 segment; - dev_tmp = pci_find_upstream_pcie_bridge(pdev); - if (dev_tmp) { - if (pci_is_pcie(dev_tmp)) { - bus = dev_tmp->subordinate->number; - devfn = 0; - } else { - bus = dev_tmp->bus->number; - devfn = dev_tmp->devfn; + segment = pci_domain_nr(pdev->bus); + dev_tmp = pci_find_upstream_pcie_bridge(pdev); + if (dev_tmp) { + if (pci_is_pcie(dev_tmp)) { + bridge_bus = dev_tmp->subordinate->number; + bridge_devfn = 0; + } else { + bridge_bus = dev_tmp->bus->number; + bridge_devfn = dev_tmp->devfn; + } + spin_lock_irqsave(&device_domain_lock, flags); + info = dmar_search_domain_by_dev_info(segment, bus, devfn); + if (info) { + iommu = info->iommu; + domain = info->domain; + } + spin_unlock_irqrestore(&device_domain_lock, flags); + /* pcie-pci bridge already has a domain, uses it */ + if (info) + goto found_domain; } - spin_lock_irqsave(&device_domain_lock, flags); - info = dmar_search_domain_by_dev_info(segment, bus, devfn); - if (info) { - iommu = info->iommu; - domain = info->domain; - } - spin_unlock_irqrestore(&device_domain_lock, flags); - if (info) - goto found_domain; } - drhd = dmar_find_matched_drhd_unit(pdev); - if (!drhd) { - printk(KERN_ERR "IOMMU: can't find DMAR for device %s\n", - pci_name(pdev)); - return NULL; - } - iommu = drhd->iommu; + iommu = device_to_iommu(dev, &bus, &devfn); + if (!iommu) + goto error; - /* Allocate and intialize new domain for the device */ + /* Allocate and initialize new domain for the device */ domain = alloc_domain(false); if (!domain) goto error; @@ -2266,15 +2265,14 @@ static struct dmar_domain *get_domain_for_dev(struct pci_dev *pdev, int gaw) /* register pcie-to-pci device */ if (dev_tmp) { - domain = dmar_insert_dev_info(iommu, bus, devfn, NULL, - domain); + domain = dmar_insert_dev_info(iommu, bridge_bus, bridge_devfn, + NULL, domain); if (!domain) goto error; } found_domain: - domain = dmar_insert_dev_info(iommu, pdev->bus->number, - pdev->devfn, &pdev->dev, domain); + domain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain); error: if (free != domain) domain_exit(free); @@ -2320,7 +2318,7 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev, struct dmar_domain *domain; int ret; - domain = get_domain_for_dev(pdev, DEFAULT_DOMAIN_ADDRESS_WIDTH); + domain = get_domain_for_dev(&pdev->dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); if (!domain) return -ENOMEM; @@ -2864,8 +2862,7 @@ static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev) struct dmar_domain *domain; int ret; - domain = get_domain_for_dev(pdev, - DEFAULT_DOMAIN_ADDRESS_WIDTH); + domain = get_domain_for_dev(&pdev->dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); if (!domain) { printk(KERN_ERR "Allocating domain for %s failed", pci_name(pdev)); From 0b9d9753155b9ed72e864592f9bf482a688c3c11 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 9 Mar 2014 15:48:15 -0700 Subject: [PATCH 59/75] iommu/vt-d: Handle RMRRs for non-PCI devices Should hopefully never happen (RMRRs are an abomination) but while we're busy eliminating all the PCI assumptions, we might as well do it. Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 30 ++++++++++++++---------------- 1 file changed, 14 insertions(+), 16 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 1c5f656ff19d..7b2b9f321c74 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2311,14 +2311,14 @@ static int iommu_domain_identity_map(struct dmar_domain *domain, DMA_PTE_READ|DMA_PTE_WRITE); } -static int iommu_prepare_identity_map(struct pci_dev *pdev, +static int iommu_prepare_identity_map(struct device *dev, unsigned long long start, unsigned long long end) { struct dmar_domain *domain; int ret; - domain = get_domain_for_dev(&pdev->dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); + domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); if (!domain) return -ENOMEM; @@ -2328,13 +2328,13 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev, up to start with in si_domain */ if (domain == si_domain && hw_pass_through) { printk("Ignoring identity map for HW passthrough device %s [0x%Lx - 0x%Lx]\n", - pci_name(pdev), start, end); + dev_name(dev), start, end); return 0; } printk(KERN_INFO "IOMMU: Setting identity map for device %s [0x%Lx - 0x%Lx]\n", - pci_name(pdev), start, end); + dev_name(dev), start, end); if (end < start) { WARN(1, "Your BIOS is broken; RMRR ends before it starts!\n" @@ -2362,7 +2362,7 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev, goto error; /* context entry init */ - ret = domain_context_mapping(domain, &pdev->dev, CONTEXT_TT_MULTI_LEVEL); + ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL); if (ret) goto error; @@ -2374,12 +2374,12 @@ static int iommu_prepare_identity_map(struct pci_dev *pdev, } static inline int iommu_prepare_rmrr_dev(struct dmar_rmrr_unit *rmrr, - struct pci_dev *pdev) + struct device *dev) { - if (pdev->dev.archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) + if (dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO) return 0; - return iommu_prepare_identity_map(pdev, rmrr->base_address, - rmrr->end_address); + return iommu_prepare_identity_map(dev, rmrr->base_address, + rmrr->end_address); } #ifdef CONFIG_INTEL_IOMMU_FLOPPY_WA @@ -2393,7 +2393,7 @@ static inline void iommu_prepare_isa(void) return; printk(KERN_INFO "IOMMU: Prepare 0-16MiB unity mapping for LPC\n"); - ret = iommu_prepare_identity_map(pdev, 0, 16*1024*1024 - 1); + ret = iommu_prepare_identity_map(&pdev->dev, 0, 16*1024*1024 - 1); if (ret) printk(KERN_ERR "IOMMU: Failed to create 0-16MiB identity map; " @@ -2495,7 +2495,7 @@ static int domain_add_dev_info(struct dmar_domain *domain, return 0; } -static bool device_has_rmrr(struct pci_dev *dev) +static bool device_has_rmrr(struct device *dev) { struct dmar_rmrr_unit *rmrr; struct device *tmp; @@ -2509,7 +2509,7 @@ static bool device_has_rmrr(struct pci_dev *dev) */ for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, i, tmp) - if (tmp == &dev->dev) { + if (tmp == dev) { rcu_read_unlock(); return true; } @@ -2529,7 +2529,7 @@ static int iommu_should_identity_map(struct pci_dev *pdev, int startup) * from this process due to their usage of RMRRs that are known * to not be needed after BIOS hand-off to OS. */ - if (device_has_rmrr(pdev) && + if (device_has_rmrr(&pdev->dev) && (pdev->class >> 8) != PCI_CLASS_SERIAL_USB) return 0; @@ -2766,9 +2766,7 @@ static int __init init_dmars(void) /* some BIOS lists non-exist devices in DMAR table. */ for_each_active_dev_scope(rmrr->devices, rmrr->devices_cnt, i, dev) { - if (!dev_is_pci(dev)) - continue; - ret = iommu_prepare_rmrr_dev(rmrr, to_pci_dev(dev)); + ret = iommu_prepare_rmrr_dev(rmrr, dev); if (ret) printk(KERN_ERR "IOMMU: mapping reserved region failed\n"); From 3bdb259116059a6b805cfe9be66f4054f92598a3 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 9 Mar 2014 16:03:08 -0700 Subject: [PATCH 60/75] iommu/vt-d: Make iommu_should_identity_map() take struct device Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 111 +++++++++++++++++++----------------- 1 file changed, 59 insertions(+), 52 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 7b2b9f321c74..ccfce8832954 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2518,58 +2518,65 @@ static bool device_has_rmrr(struct device *dev) return false; } -static int iommu_should_identity_map(struct pci_dev *pdev, int startup) +static int iommu_should_identity_map(struct device *dev, int startup) { - /* - * We want to prevent any device associated with an RMRR from - * getting placed into the SI Domain. This is done because - * problems exist when devices are moved in and out of domains - * and their respective RMRR info is lost. We exempt USB devices - * from this process due to their usage of RMRRs that are known - * to not be needed after BIOS hand-off to OS. - */ - if (device_has_rmrr(&pdev->dev) && - (pdev->class >> 8) != PCI_CLASS_SERIAL_USB) - return 0; + if (dev_is_pci(dev)) { + struct pci_dev *pdev = to_pci_dev(dev); - if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev)) - return 1; + /* + * We want to prevent any device associated with an RMRR from + * getting placed into the SI Domain. This is done because + * problems exist when devices are moved in and out of domains + * and their respective RMRR info is lost. We exempt USB devices + * from this process due to their usage of RMRRs that are known + * to not be needed after BIOS hand-off to OS. + */ + if (device_has_rmrr(dev) && + (pdev->class >> 8) != PCI_CLASS_SERIAL_USB) + return 0; - if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev)) - return 1; + if ((iommu_identity_mapping & IDENTMAP_AZALIA) && IS_AZALIA(pdev)) + return 1; - if (!(iommu_identity_mapping & IDENTMAP_ALL)) - return 0; + if ((iommu_identity_mapping & IDENTMAP_GFX) && IS_GFX_DEVICE(pdev)) + return 1; + + if (!(iommu_identity_mapping & IDENTMAP_ALL)) + return 0; + + /* + * We want to start off with all devices in the 1:1 domain, and + * take them out later if we find they can't access all of memory. + * + * However, we can't do this for PCI devices behind bridges, + * because all PCI devices behind the same bridge will end up + * with the same source-id on their transactions. + * + * Practically speaking, we can't change things around for these + * devices at run-time, because we can't be sure there'll be no + * DMA transactions in flight for any of their siblings. + * + * So PCI devices (unless they're on the root bus) as well as + * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of + * the 1:1 domain, just in _case_ one of their siblings turns out + * not to be able to map all of memory. + */ + if (!pci_is_pcie(pdev)) { + if (!pci_is_root_bus(pdev->bus)) + return 0; + if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI) + return 0; + } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE) + return 0; + } else { + if (device_has_rmrr(dev)) + return 0; + } /* - * We want to start off with all devices in the 1:1 domain, and - * take them out later if we find they can't access all of memory. - * - * However, we can't do this for PCI devices behind bridges, - * because all PCI devices behind the same bridge will end up - * with the same source-id on their transactions. - * - * Practically speaking, we can't change things around for these - * devices at run-time, because we can't be sure there'll be no - * DMA transactions in flight for any of their siblings. - * - * So PCI devices (unless they're on the root bus) as well as - * their parent PCI-PCI or PCIe-PCI bridges must be left _out_ of - * the 1:1 domain, just in _case_ one of their siblings turns out - * not to be able to map all of memory. - */ - if (!pci_is_pcie(pdev)) { - if (!pci_is_root_bus(pdev->bus)) - return 0; - if (pdev->class >> 8 == PCI_CLASS_BRIDGE_PCI) - return 0; - } else if (pci_pcie_type(pdev) == PCI_EXP_TYPE_PCI_BRIDGE) - return 0; - - /* * At boot time, we don't yet know if devices will be 64-bit capable. - * Assume that they will -- if they turn out not to be, then we can + * Assume that they will — if they turn out not to be, then we can * take them out of the 1:1 domain later. */ if (!startup) { @@ -2577,13 +2584,13 @@ static int iommu_should_identity_map(struct pci_dev *pdev, int startup) * If the device's dma_mask is less than the system's memory * size then this is not a candidate for identity mapping. */ - u64 dma_mask = pdev->dma_mask; + u64 dma_mask = *dev->dma_mask; - if (pdev->dev.coherent_dma_mask && - pdev->dev.coherent_dma_mask < dma_mask) - dma_mask = pdev->dev.coherent_dma_mask; + if (dev->coherent_dma_mask && + dev->coherent_dma_mask < dma_mask) + dma_mask = dev->coherent_dma_mask; - return dma_mask >= dma_get_required_mask(&pdev->dev); + return dma_mask >= dma_get_required_mask(dev); } return 1; @@ -2599,7 +2606,7 @@ static int __init iommu_prepare_static_identity_mapping(int hw) return -EFAULT; for_each_pci_dev(pdev) { - if (iommu_should_identity_map(pdev, 1)) { + if (iommu_should_identity_map(&pdev->dev, 1)) { ret = domain_add_dev_info(si_domain, pdev, hw ? CONTEXT_TT_PASS_THROUGH : CONTEXT_TT_MULTI_LEVEL); @@ -2917,7 +2924,7 @@ static int iommu_no_mapping(struct device *dev) pdev = to_pci_dev(dev); found = identity_mapping(dev); if (found) { - if (iommu_should_identity_map(pdev, 0)) + if (iommu_should_identity_map(&pdev->dev, 0)) return 1; else { /* @@ -2934,7 +2941,7 @@ static int iommu_no_mapping(struct device *dev) * In case of a detached 64 bit DMA device from vm, the device * is put into si_domain for identity mapping. */ - if (iommu_should_identity_map(pdev, 0)) { + if (iommu_should_identity_map(&pdev->dev, 0)) { int ret; ret = domain_add_dev_info(si_domain, pdev, hw_pass_through ? From d4b709f48ed44d42ed90c1a85dda7fd053d36ce1 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 9 Mar 2014 16:07:40 -0700 Subject: [PATCH 61/75] iommu/vt-d: Make get_valid_domain_for_dev() take struct device Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index ccfce8832954..e18b93cc1224 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2862,26 +2862,24 @@ static struct iova *intel_alloc_iova(struct device *dev, return iova; } -static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev) +static struct dmar_domain *__get_valid_domain_for_dev(struct device *dev) { struct dmar_domain *domain; int ret; - domain = get_domain_for_dev(&pdev->dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); + domain = get_domain_for_dev(dev, DEFAULT_DOMAIN_ADDRESS_WIDTH); if (!domain) { - printk(KERN_ERR - "Allocating domain for %s failed", pci_name(pdev)); + printk(KERN_ERR "Allocating domain for %s failed", + dev_name(dev)); return NULL; } /* make sure context mapping is ok */ - if (unlikely(!domain_context_mapped(&pdev->dev))) { - ret = domain_context_mapping(domain, &pdev->dev, - CONTEXT_TT_MULTI_LEVEL); + if (unlikely(!domain_context_mapped(dev))) { + ret = domain_context_mapping(domain, dev, CONTEXT_TT_MULTI_LEVEL); if (ret) { - printk(KERN_ERR - "Domain context map for %s failed", - pci_name(pdev)); + printk(KERN_ERR "Domain context map for %s failed", + dev_name(dev)); return NULL; } } @@ -2889,12 +2887,12 @@ static struct dmar_domain *__get_valid_domain_for_dev(struct pci_dev *pdev) return domain; } -static inline struct dmar_domain *get_valid_domain_for_dev(struct pci_dev *dev) +static inline struct dmar_domain *get_valid_domain_for_dev(struct device *dev) { struct device_domain_info *info; /* No lock here, assumes no domain exit in normal case */ - info = dev->dev.archdata.iommu; + info = dev->archdata.iommu; if (likely(info)) return info->domain; @@ -2975,7 +2973,7 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, if (iommu_no_mapping(hwdev)) return paddr; - domain = get_valid_domain_for_dev(pdev); + domain = get_valid_domain_for_dev(hwdev); if (!domain) return 0; @@ -3280,7 +3278,7 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne if (iommu_no_mapping(hwdev)) return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir); - domain = get_valid_domain_for_dev(pdev); + domain = get_valid_domain_for_dev(hwdev); if (!domain) return 0; From 207e35920d2a6ee1d2f48fcd6bb34b42246192d1 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 9 Mar 2014 16:12:32 -0700 Subject: [PATCH 62/75] iommu/vt-d: Remove some pointless to_pci_dev() calls Mostly made redundant by using dev_name() instead of pci_name(), and one instance of using *dev->dma_mask instead of pdev->dma_mask. Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index e18b93cc1224..2fe55bb6437f 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2835,7 +2835,6 @@ static struct iova *intel_alloc_iova(struct device *dev, struct dmar_domain *domain, unsigned long nrpages, uint64_t dma_mask) { - struct pci_dev *pdev = to_pci_dev(dev); struct iova *iova = NULL; /* Restrict dma_mask to the width that the iommu can handle */ @@ -2855,7 +2854,7 @@ static struct iova *intel_alloc_iova(struct device *dev, iova = alloc_iova(&domain->iovad, nrpages, IOVA_PFN(dma_mask), 1); if (unlikely(!iova)) { printk(KERN_ERR "Allocating %ld-page iova for %s failed", - nrpages, pci_name(pdev)); + nrpages, dev_name(dev)); return NULL; } @@ -2959,7 +2958,6 @@ static int iommu_no_mapping(struct device *dev) static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, size_t size, int dir, u64 dma_mask) { - struct pci_dev *pdev = to_pci_dev(hwdev); struct dmar_domain *domain; phys_addr_t start_paddr; struct iova *iova; @@ -3018,7 +3016,7 @@ error: if (iova) __free_iova(&domain->iovad, iova); printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n", - pci_name(pdev), size, (unsigned long long)paddr, dir); + dev_name(hwdev), size, (unsigned long long)paddr, dir); return 0; } @@ -3115,7 +3113,6 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, size_t size, enum dma_data_direction dir, struct dma_attrs *attrs) { - struct pci_dev *pdev = to_pci_dev(dev); struct dmar_domain *domain; unsigned long start_pfn, last_pfn; struct iova *iova; @@ -3139,7 +3136,7 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, last_pfn = mm_to_dma_pfn(iova->pfn_hi + 1) - 1; pr_debug("Device %s unmapping: pfn %lx-%lx\n", - pci_name(pdev), start_pfn, last_pfn); + dev_name(dev), start_pfn, last_pfn); freelist = domain_unmap(domain, start_pfn, last_pfn); @@ -3264,7 +3261,6 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne enum dma_data_direction dir, struct dma_attrs *attrs) { int i; - struct pci_dev *pdev = to_pci_dev(hwdev); struct dmar_domain *domain; size_t size = 0; int prot = 0; @@ -3288,7 +3284,7 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne size += aligned_nrpages(sg->offset, sg->length); iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), - pdev->dma_mask); + *hwdev->dma_mask); if (!iova) { sglist->dma_length = 0; return 0; From 5040a918bd109a1903e7babac817325620939fa4 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 9 Mar 2014 16:14:00 -0700 Subject: [PATCH 63/75] iommu/vt-d: Rename 'hwdev' variables to 'dev' now that that's the norm Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 44 ++++++++++++++++++------------------- 1 file changed, 22 insertions(+), 22 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 2fe55bb6437f..60f8ceeb06e4 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2955,7 +2955,7 @@ static int iommu_no_mapping(struct device *dev) return 0; } -static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, +static dma_addr_t __intel_map_single(struct device *dev, phys_addr_t paddr, size_t size, int dir, u64 dma_mask) { struct dmar_domain *domain; @@ -2968,17 +2968,17 @@ static dma_addr_t __intel_map_single(struct device *hwdev, phys_addr_t paddr, BUG_ON(dir == DMA_NONE); - if (iommu_no_mapping(hwdev)) + if (iommu_no_mapping(dev)) return paddr; - domain = get_valid_domain_for_dev(hwdev); + domain = get_valid_domain_for_dev(dev); if (!domain) return 0; iommu = domain_get_iommu(domain); size = aligned_nrpages(paddr, size); - iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), dma_mask); + iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), dma_mask); if (!iova) goto error; @@ -3016,7 +3016,7 @@ error: if (iova) __free_iova(&domain->iovad, iova); printk(KERN_ERR"Device %s request: %zx@%llx dir %d --- failed\n", - dev_name(hwdev), size, (unsigned long long)paddr, dir); + dev_name(dev), size, (unsigned long long)paddr, dir); return 0; } @@ -3155,7 +3155,7 @@ static void intel_unmap_page(struct device *dev, dma_addr_t dev_addr, } } -static void *intel_alloc_coherent(struct device *hwdev, size_t size, +static void *intel_alloc_coherent(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flags, struct dma_attrs *attrs) { @@ -3165,10 +3165,10 @@ static void *intel_alloc_coherent(struct device *hwdev, size_t size, size = PAGE_ALIGN(size); order = get_order(size); - if (!iommu_no_mapping(hwdev)) + if (!iommu_no_mapping(dev)) flags &= ~(GFP_DMA | GFP_DMA32); - else if (hwdev->coherent_dma_mask < dma_get_required_mask(hwdev)) { - if (hwdev->coherent_dma_mask < DMA_BIT_MASK(32)) + else if (dev->coherent_dma_mask < dma_get_required_mask(dev)) { + if (dev->coherent_dma_mask < DMA_BIT_MASK(32)) flags |= GFP_DMA; else flags |= GFP_DMA32; @@ -3179,16 +3179,16 @@ static void *intel_alloc_coherent(struct device *hwdev, size_t size, return NULL; memset(vaddr, 0, size); - *dma_handle = __intel_map_single(hwdev, virt_to_bus(vaddr), size, + *dma_handle = __intel_map_single(dev, virt_to_bus(vaddr), size, DMA_BIDIRECTIONAL, - hwdev->coherent_dma_mask); + dev->coherent_dma_mask); if (*dma_handle) return vaddr; free_pages((unsigned long)vaddr, order); return NULL; } -static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr, +static void intel_free_coherent(struct device *dev, size_t size, void *vaddr, dma_addr_t dma_handle, struct dma_attrs *attrs) { int order; @@ -3196,11 +3196,11 @@ static void intel_free_coherent(struct device *hwdev, size_t size, void *vaddr, size = PAGE_ALIGN(size); order = get_order(size); - intel_unmap_page(hwdev, dma_handle, size, DMA_BIDIRECTIONAL, NULL); + intel_unmap_page(dev, dma_handle, size, DMA_BIDIRECTIONAL, NULL); free_pages((unsigned long)vaddr, order); } -static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, +static void intel_unmap_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction dir, struct dma_attrs *attrs) { @@ -3210,10 +3210,10 @@ static void intel_unmap_sg(struct device *hwdev, struct scatterlist *sglist, struct intel_iommu *iommu; struct page *freelist; - if (iommu_no_mapping(hwdev)) + if (iommu_no_mapping(dev)) return; - domain = find_domain(hwdev); + domain = find_domain(dev); BUG_ON(!domain); iommu = domain_get_iommu(domain); @@ -3257,7 +3257,7 @@ static int intel_nontranslate_map_sg(struct device *hddev, return nelems; } -static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int nelems, +static int intel_map_sg(struct device *dev, struct scatterlist *sglist, int nelems, enum dma_data_direction dir, struct dma_attrs *attrs) { int i; @@ -3271,10 +3271,10 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne struct intel_iommu *iommu; BUG_ON(dir == DMA_NONE); - if (iommu_no_mapping(hwdev)) - return intel_nontranslate_map_sg(hwdev, sglist, nelems, dir); + if (iommu_no_mapping(dev)) + return intel_nontranslate_map_sg(dev, sglist, nelems, dir); - domain = get_valid_domain_for_dev(hwdev); + domain = get_valid_domain_for_dev(dev); if (!domain) return 0; @@ -3283,8 +3283,8 @@ static int intel_map_sg(struct device *hwdev, struct scatterlist *sglist, int ne for_each_sg(sglist, sg, nelems, i) size += aligned_nrpages(sg->offset, sg->length); - iova = intel_alloc_iova(hwdev, domain, dma_to_mm_pfn(size), - *hwdev->dma_mask); + iova = intel_alloc_iova(dev, domain, dma_to_mm_pfn(size), + *dev->dma_mask); if (!iova) { sglist->dma_length = 0; return 0; From bf9c9eda718bd56dde772270d117a8ef51bcea7e Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 9 Mar 2014 16:19:13 -0700 Subject: [PATCH 64/75] iommu/vt-d: Make domain_remove_one_dev_info() take struct device Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 25 +++++++++++-------------- 1 file changed, 11 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 60f8ceeb06e4..66c2921f6404 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -425,7 +425,7 @@ static long list_size; static void domain_exit(struct dmar_domain *domain); static void domain_remove_dev_info(struct dmar_domain *domain); static void domain_remove_one_dev_info(struct dmar_domain *domain, - struct pci_dev *pdev); + struct device *dev); static void iommu_detach_dependent_devices(struct intel_iommu *iommu, struct device *dev); @@ -2488,7 +2488,7 @@ static int domain_add_dev_info(struct dmar_domain *domain, ret = domain_context_mapping(domain, &pdev->dev, translation); if (ret) { - domain_remove_one_dev_info(domain, pdev); + domain_remove_one_dev_info(domain, &pdev->dev); return ret; } @@ -2928,7 +2928,7 @@ static int iommu_no_mapping(struct device *dev) * 32 bit DMA is removed from si_domain and fall back * to non-identity mapping. */ - domain_remove_one_dev_info(si_domain, pdev); + domain_remove_one_dev_info(si_domain, dev); printk(KERN_INFO "32bit %s uses non-identity mapping\n", pci_name(pdev)); return 0; @@ -3810,7 +3810,6 @@ static int device_notifier(struct notifier_block *nb, unsigned long action, void *data) { struct device *dev = data; - struct pci_dev *pdev = to_pci_dev(dev); struct dmar_domain *domain; if (iommu_dummy(dev)) @@ -3825,7 +3824,7 @@ static int device_notifier(struct notifier_block *nb, return 0; down_read(&dmar_global_lock); - domain_remove_one_dev_info(domain, pdev); + domain_remove_one_dev_info(domain, dev); if (!(domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE) && !(domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) && list_empty(&domain->devices)) @@ -4027,7 +4026,7 @@ static void iommu_detach_dependent_devices(struct intel_iommu *iommu, } static void domain_remove_one_dev_info(struct dmar_domain *domain, - struct pci_dev *pdev) + struct device *dev) { struct device_domain_info *info, *tmp; struct intel_iommu *iommu; @@ -4035,21 +4034,20 @@ static void domain_remove_one_dev_info(struct dmar_domain *domain, int found = 0; u8 bus, devfn; - iommu = device_to_iommu(&pdev->dev, &bus, &devfn); + iommu = device_to_iommu(dev, &bus, &devfn); if (!iommu) return; spin_lock_irqsave(&device_domain_lock, flags); list_for_each_entry_safe(info, tmp, &domain->devices, link) { - if (info->iommu->segment == pci_domain_nr(pdev->bus) && - info->bus == pdev->bus->number && - info->devfn == pdev->devfn) { + if (info->iommu == iommu && info->bus == bus && + info->devfn == devfn) { unlink_domain_info(info); spin_unlock_irqrestore(&device_domain_lock, flags); iommu_disable_dev_iotlb(info); iommu_detach_dev(iommu, info->bus, info->devfn); - iommu_detach_dependent_devices(iommu, &pdev->dev); + iommu_detach_dependent_devices(iommu, dev); free_devinfo_mem(info); spin_lock_irqsave(&device_domain_lock, flags); @@ -4165,7 +4163,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, if (old_domain) { if (dmar_domain->flags & DOMAIN_FLAG_VIRTUAL_MACHINE || dmar_domain->flags & DOMAIN_FLAG_STATIC_IDENTITY) - domain_remove_one_dev_info(old_domain, pdev); + domain_remove_one_dev_info(old_domain, dev); else domain_remove_dev_info(old_domain); } @@ -4210,9 +4208,8 @@ static void intel_iommu_detach_device(struct iommu_domain *domain, struct device *dev) { struct dmar_domain *dmar_domain = domain->priv; - struct pci_dev *pdev = to_pci_dev(dev); - domain_remove_one_dev_info(dmar_domain, pdev); + domain_remove_one_dev_info(dmar_domain, dev); } static int intel_iommu_map(struct iommu_domain *domain, From 5913c9bf0efecfa869375af8b4619529d3464038 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 9 Mar 2014 16:27:31 -0700 Subject: [PATCH 65/75] iommu/vt-d: Make domain_add_dev_info() take struct device Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 66c2921f6404..2333f36f4866 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2470,25 +2470,24 @@ static int identity_mapping(struct device *dev) } static int domain_add_dev_info(struct dmar_domain *domain, - struct pci_dev *pdev, - int translation) + struct device *dev, int translation) { struct dmar_domain *ndomain; struct intel_iommu *iommu; u8 bus, devfn; int ret; - iommu = device_to_iommu(&pdev->dev, &bus, &devfn); + iommu = device_to_iommu(dev, &bus, &devfn); if (!iommu) return -ENODEV; - ndomain = dmar_insert_dev_info(iommu, bus, devfn, &pdev->dev, domain); + ndomain = dmar_insert_dev_info(iommu, bus, devfn, dev, domain); if (ndomain != domain) return -EBUSY; - ret = domain_context_mapping(domain, &pdev->dev, translation); + ret = domain_context_mapping(domain, dev, translation); if (ret) { - domain_remove_one_dev_info(domain, &pdev->dev); + domain_remove_one_dev_info(domain, dev); return ret; } @@ -2607,7 +2606,7 @@ static int __init iommu_prepare_static_identity_mapping(int hw) for_each_pci_dev(pdev) { if (iommu_should_identity_map(&pdev->dev, 1)) { - ret = domain_add_dev_info(si_domain, pdev, + ret = domain_add_dev_info(si_domain, &pdev->dev, hw ? CONTEXT_TT_PASS_THROUGH : CONTEXT_TT_MULTI_LEVEL); if (ret) { @@ -2940,7 +2939,7 @@ static int iommu_no_mapping(struct device *dev) */ if (iommu_should_identity_map(&pdev->dev, 0)) { int ret; - ret = domain_add_dev_info(si_domain, pdev, + ret = domain_add_dev_info(si_domain, dev, hw_pass_through ? CONTEXT_TT_PASS_THROUGH : CONTEXT_TT_MULTI_LEVEL); @@ -4201,7 +4200,7 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, dmar_domain->agaw--; } - return domain_add_dev_info(dmar_domain, pdev, CONTEXT_TT_MULTI_LEVEL); + return domain_add_dev_info(dmar_domain, dev, CONTEXT_TT_MULTI_LEVEL); } static void intel_iommu_detach_device(struct iommu_domain *domain, From ecb509ec2bacfe341530e56abf6bf3f20548bcd6 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 9 Mar 2014 16:29:55 -0700 Subject: [PATCH 66/75] iommu/vt-d: Remove pdev from iommu_no_mapping() Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 2333f36f4866..0f5e6c911e85 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2902,10 +2902,9 @@ static int iommu_dummy(struct device *dev) return dev->archdata.iommu == DUMMY_DEVICE_DOMAIN_INFO; } -/* Check if the pdev needs to go through non-identity map and unmap process.*/ +/* Check if the dev needs to go through non-identity map and unmap process.*/ static int iommu_no_mapping(struct device *dev) { - struct pci_dev *pdev; int found; if (unlikely(!dev_is_pci(dev))) @@ -2917,10 +2916,9 @@ static int iommu_no_mapping(struct device *dev) if (!iommu_identity_mapping) return 0; - pdev = to_pci_dev(dev); found = identity_mapping(dev); if (found) { - if (iommu_should_identity_map(&pdev->dev, 0)) + if (iommu_should_identity_map(dev, 0)) return 1; else { /* @@ -2929,7 +2927,7 @@ static int iommu_no_mapping(struct device *dev) */ domain_remove_one_dev_info(si_domain, dev); printk(KERN_INFO "32bit %s uses non-identity mapping\n", - pci_name(pdev)); + dev_name(dev)); return 0; } } else { @@ -2937,7 +2935,7 @@ static int iommu_no_mapping(struct device *dev) * In case of a detached 64 bit DMA device from vm, the device * is put into si_domain for identity mapping. */ - if (iommu_should_identity_map(&pdev->dev, 0)) { + if (iommu_should_identity_map(dev, 0)) { int ret; ret = domain_add_dev_info(si_domain, dev, hw_pass_through ? @@ -2945,7 +2943,7 @@ static int iommu_no_mapping(struct device *dev) CONTEXT_TT_MULTI_LEVEL); if (!ret) { printk(KERN_INFO "64bit %s uses identity mapping\n", - pci_name(pdev)); + dev_name(dev)); return 1; } } From 7207d8f925a74578a544f5beff8f840cfeebd12e Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Sun, 9 Mar 2014 16:31:06 -0700 Subject: [PATCH 67/75] iommu/vt-d: Remove pdev from intel_iommu_attach_device() Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 0f5e6c911e85..37ce54b188f3 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -4147,13 +4147,12 @@ static int intel_iommu_attach_device(struct iommu_domain *domain, struct device *dev) { struct dmar_domain *dmar_domain = domain->priv; - struct pci_dev *pdev = to_pci_dev(dev); struct intel_iommu *iommu; int addr_width; u8 bus, devfn; - /* normally pdev is not mapped */ - if (unlikely(domain_context_mapped(&pdev->dev))) { + /* normally dev is not mapped */ + if (unlikely(domain_context_mapped(dev))) { struct dmar_domain *old_domain; old_domain = find_domain(dev); From 46333e375f3228196e8cb741d817165f9793b46c Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 10 Mar 2014 20:01:21 -0700 Subject: [PATCH 68/75] iommu/vt-d: Remove to_pci_dev() in intel_map_page() It might not be... Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 37ce54b188f3..0a8e166a57b0 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3023,7 +3023,7 @@ static dma_addr_t intel_map_page(struct device *dev, struct page *page, struct dma_attrs *attrs) { return __intel_map_single(dev, page_to_phys(page) + offset, size, - dir, to_pci_dev(dev)->dma_mask); + dir, *dev->dma_mask); } static void flush_unmaps(void) From 66077edc972c1c8dc2cf08e96a956c2db9bd705c Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Mon, 10 Mar 2014 20:04:11 -0700 Subject: [PATCH 69/75] iommu/vt-d: Finally enable translation for non-PCI devices Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 3 --- 1 file changed, 3 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 0a8e166a57b0..a180f10a7b26 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2907,9 +2907,6 @@ static int iommu_no_mapping(struct device *dev) { int found; - if (unlikely(!dev_is_pci(dev))) - return 1; - if (iommu_dummy(dev)) return 1; From cf04eee8bf0e842dd73a64d02cdcdcbb31b0102c Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 21 Mar 2014 16:49:04 +0000 Subject: [PATCH 70/75] iommu/vt-d: Include ACPI devices in iommu=pt Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 63 ++++++++++++++++++++++++++++--------- 1 file changed, 49 insertions(+), 14 deletions(-) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index a180f10a7b26..6fbce01b7875 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2595,31 +2595,66 @@ static int iommu_should_identity_map(struct device *dev, int startup) return 1; } +static int __init dev_prepare_static_identity_mapping(struct device *dev, int hw) +{ + int ret; + + if (!iommu_should_identity_map(dev, 1)) + return 0; + + ret = domain_add_dev_info(si_domain, dev, + hw ? CONTEXT_TT_PASS_THROUGH : + CONTEXT_TT_MULTI_LEVEL); + if (!ret) + pr_info("IOMMU: %s identity mapping for device %s\n", + hw ? "hardware" : "software", dev_name(dev)); + else if (ret == -ENODEV) + /* device not associated with an iommu */ + ret = 0; + + return ret; +} + + static int __init iommu_prepare_static_identity_mapping(int hw) { struct pci_dev *pdev = NULL; - int ret; + struct dmar_drhd_unit *drhd; + struct intel_iommu *iommu; + struct device *dev; + int i; + int ret = 0; ret = si_domain_init(hw); if (ret) return -EFAULT; for_each_pci_dev(pdev) { - if (iommu_should_identity_map(&pdev->dev, 1)) { - ret = domain_add_dev_info(si_domain, &pdev->dev, - hw ? CONTEXT_TT_PASS_THROUGH : - CONTEXT_TT_MULTI_LEVEL); - if (ret) { - /* device not associated with an iommu */ - if (ret == -ENODEV) - continue; - return ret; - } - pr_info("IOMMU: %s identity mapping for device %s\n", - hw ? "hardware" : "software", pci_name(pdev)); - } + ret = dev_prepare_static_identity_mapping(&pdev->dev, hw); + if (ret) + return ret; } + for_each_active_iommu(iommu, drhd) + for_each_active_dev_scope(drhd->devices, drhd->devices_cnt, i, dev) { + struct acpi_device_physical_node *pn; + struct acpi_device *adev; + + if (dev->bus != &acpi_bus_type) + continue; + + adev= to_acpi_device(dev); + mutex_lock(&adev->physical_node_lock); + list_for_each_entry(pn, &adev->physical_node_list, node) { + ret = dev_prepare_static_identity_mapping(pn->dev, hw); + if (ret) + break; + } + mutex_unlock(&adev->physical_node_lock); + if (ret) + return ret; + } + return 0; } From a919a018cccf999aa56d7f9adeae0525b01b7434 Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Wed, 5 Mar 2014 18:54:18 -0600 Subject: [PATCH 71/75] iommu/amd: Fix logic to determine and checking max PASID In reality, the spec can only support 16-bit PASID since INVALIDATE_IOTLB_PAGES and COMPLETE_PPR_REQUEST commands only allow 16-bit PASID. So, we updated the PASID_MASK accordingly and invoke BUG_ON if the hardware is reporting PASmax more than 16-bit. Besides, max PASID is defined as ((2^(PASmax+1)) - 1). The current does not determine this correctly. Signed-off-by: Suravee Suthikulpanit Tested-by: Jay Cornwall Signed-off-by: Joerg Roedel --- drivers/iommu/amd_iommu.c | 4 ++-- drivers/iommu/amd_iommu_init.c | 16 +++++++++------- drivers/iommu/amd_iommu_types.h | 11 ++++++++--- 3 files changed, 19 insertions(+), 12 deletions(-) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 1dd9f818fc84..c949520bd196 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -963,7 +963,7 @@ static void build_inv_iommu_pasid(struct iommu_cmd *cmd, u16 domid, int pasid, address &= ~(0xfffULL); - cmd->data[0] = pasid & PASID_MASK; + cmd->data[0] = pasid; cmd->data[1] = domid; cmd->data[2] = lower_32_bits(address); cmd->data[3] = upper_32_bits(address); @@ -1001,7 +1001,7 @@ static void build_complete_ppr(struct iommu_cmd *cmd, u16 devid, int pasid, cmd->data[0] = devid; if (gn) { - cmd->data[1] = pasid & PASID_MASK; + cmd->data[1] = pasid; cmd->data[2] = CMD_INV_IOMMU_PAGES_GN_MASK; } cmd->data[3] = tag & 0x1ff; diff --git a/drivers/iommu/amd_iommu_init.c b/drivers/iommu/amd_iommu_init.c index 28b4bea7c109..b76c58dbe30c 100644 --- a/drivers/iommu/amd_iommu_init.c +++ b/drivers/iommu/amd_iommu_init.c @@ -150,7 +150,7 @@ int amd_iommus_present; bool amd_iommu_np_cache __read_mostly; bool amd_iommu_iotlb_sup __read_mostly = true; -u32 amd_iommu_max_pasids __read_mostly = ~0; +u32 amd_iommu_max_pasid __read_mostly = ~0; bool amd_iommu_v2_present __read_mostly; bool amd_iommu_pc_present __read_mostly; @@ -1231,14 +1231,16 @@ static int iommu_init_pci(struct amd_iommu *iommu) if (iommu_feature(iommu, FEATURE_GT)) { int glxval; - u32 pasids; - u64 shift; + u32 max_pasid; + u64 pasmax; - shift = iommu->features & FEATURE_PASID_MASK; - shift >>= FEATURE_PASID_SHIFT; - pasids = (1 << shift); + pasmax = iommu->features & FEATURE_PASID_MASK; + pasmax >>= FEATURE_PASID_SHIFT; + max_pasid = (1 << (pasmax + 1)) - 1; - amd_iommu_max_pasids = min(amd_iommu_max_pasids, pasids); + amd_iommu_max_pasid = min(amd_iommu_max_pasid, max_pasid); + + BUG_ON(amd_iommu_max_pasid & ~PASID_MASK); glxval = iommu->features & FEATURE_GLXVAL_MASK; glxval >>= FEATURE_GLXVAL_SHIFT; diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index e400fbe411de..55ba46bdb01b 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -98,7 +98,12 @@ #define FEATURE_GLXVAL_SHIFT 14 #define FEATURE_GLXVAL_MASK (0x03ULL << FEATURE_GLXVAL_SHIFT) -#define PASID_MASK 0x000fffff +/* Note: + * The current driver only support 16-bit PASID. + * Currently, hardware only implement upto 16-bit PASID + * even though the spec says it could have upto 20 bits. + */ +#define PASID_MASK 0x0000ffff /* MMIO status bits */ #define MMIO_STATUS_EVT_INT_MASK (1 << 1) @@ -696,8 +701,8 @@ extern unsigned long *amd_iommu_pd_alloc_bitmap; */ extern u32 amd_iommu_unmap_flush; -/* Smallest number of PASIDs supported by any IOMMU in the system */ -extern u32 amd_iommu_max_pasids; +/* Smallest max PASID supported by any IOMMU in the system */ +extern u32 amd_iommu_max_pasid; extern bool amd_iommu_v2_present; From 11f1a7768cb9179b1f1ce6b8027df7531e0704e7 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 25 Mar 2014 20:16:40 +0100 Subject: [PATCH 72/75] iommu/vt-d: Check for NULL pointer in dmar_acpi_dev_scope_init() When ir_dev_scope_init() is called via a rootfs initcall it will check for irq_remapping_enabled before it calls (indirectly) into dmar_acpi_dev_scope_init() which uses the dmar_tbl pointer without any checks. The AMD IOMMU driver also sets the irq_remapping_enabled flag which causes the dmar_acpi_dev_scope_init() function to be called on systems with AMD IOMMU hardware too, causing a boot-time kernel crash. Signed-off-by: Joerg Roedel --- drivers/iommu/dmar.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 56e1c79dc77f..e531a2b07207 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -657,7 +657,12 @@ static void __init dmar_acpi_insert_dev_scope(u8 device_number, static int __init dmar_acpi_dev_scope_init(void) { - struct acpi_dmar_andd *andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar); + struct acpi_dmar_andd *andd; + + if (dmar_tbl == NULL) + return -ENODEV; + + andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar); while (((unsigned long)andd) < ((unsigned long)dmar_tbl) + dmar_tbl->length) { From 63b4262478ee6384c7efb5279be54236e5d6361b Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Fri, 28 Mar 2014 11:28:40 +0000 Subject: [PATCH 73/75] iommu/vt-d: Only call dmar_acpi_dev_scope_init() if DRHD units present MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As pointed out by Jörg and fixed in commit 11f1a7768 ("iommu/vt-d: Check for NULL pointer in dmar_acpi_dev_scope_init(), this code path can bizarrely get exercised even on AMD IOMMU systems with IRQ remapping enabled. In addition to the defensive check for NULL which Jörg added, let's also just avoid calling the function at all if there aren't an Intel IOMMU units in the system. Signed-off-by: David Woodhouse --- drivers/iommu/dmar.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index e531a2b07207..142650e82979 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -698,13 +698,13 @@ int __init dmar_dev_scope_init(void) if (dmar_dev_scope_status != 1) return dmar_dev_scope_status; - dmar_acpi_dev_scope_init(); - if (list_empty(&dmar_drhd_units)) { dmar_dev_scope_status = -ENODEV; } else { dmar_dev_scope_status = 0; + dmar_acpi_dev_scope_init(); + for_each_pci_dev(dev) { if (dev->is_virtfn) continue; From 14d405699634d4ce0adfc7b4f52ac7427220a98d Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 28 Mar 2014 11:29:50 +0300 Subject: [PATCH 74/75] iommu/vt-d: returning free pointer in get_domain_for_dev() If we hit this error condition then we want to return a NULL pointer and not a freed variable. Signed-off-by: Dan Carpenter Signed-off-by: David Woodhouse --- drivers/iommu/intel-iommu.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index 6fbce01b7875..69fa7da5e48b 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -2257,6 +2257,7 @@ static struct dmar_domain *get_domain_for_dev(struct device *dev, int gaw) goto error; if (iommu_attach_domain(domain, iommu)) { free_domain_mem(domain); + domain = NULL; goto error; } free = domain; From 7713ec066ae8adc49dd8daa02a73e6b60af6ee5f Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 1 Apr 2014 14:58:36 +0100 Subject: [PATCH 75/75] iommu/vt-d: Fix error handling in ANDD processing If we failed to find an ACPI device to correspond to an ANDD record, we would fail to increment our pointer and would just process the same record over and over again, with predictable results. Turn it from a while() loop into a for() loop to let the 'continue' in the error paths work correctly. Signed-off-by: David Woodhouse --- drivers/iommu/dmar.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/drivers/iommu/dmar.c b/drivers/iommu/dmar.c index 142650e82979..f445c10df8df 100644 --- a/drivers/iommu/dmar.c +++ b/drivers/iommu/dmar.c @@ -662,10 +662,9 @@ static int __init dmar_acpi_dev_scope_init(void) if (dmar_tbl == NULL) return -ENODEV; - andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar); - - while (((unsigned long)andd) < - ((unsigned long)dmar_tbl) + dmar_tbl->length) { + for (andd = (void *)dmar_tbl + sizeof(struct acpi_table_dmar); + ((unsigned long)andd) < ((unsigned long)dmar_tbl) + dmar_tbl->length; + andd = ((void *)andd) + andd->header.length) { if (andd->header.type == ACPI_DMAR_TYPE_ANDD) { acpi_handle h; struct acpi_device *adev; @@ -685,7 +684,6 @@ static int __init dmar_acpi_dev_scope_init(void) } dmar_acpi_insert_dev_scope(andd->device_number, adev); } - andd = ((void *)andd) + andd->header.length; } return 0; }