From ac0101d396fee24994632f71b55b9f7f9ee16eff Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 1 Sep 2009 16:00:35 +0200 Subject: [PATCH 1/8] x86/dma: Mark iommu_pass_through as __read_mostly This variable is read most of the time. This patch marks it as such. It also documents the meaning the this variable while at it. Signed-off-by: Joerg Roedel --- arch/x86/kernel/pci-dma.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index 1a041bcf506b..873aa079d166 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -32,7 +32,14 @@ int no_iommu __read_mostly; /* Set this to 1 if there is a HW IOMMU in the system */ int iommu_detected __read_mostly = 0; -int iommu_pass_through; +/* + * This variable becomes 1 if iommu=pt is passed on the kernel command line. + * If this variable is 1, IOMMU implementations do no DMA ranslation for + * devices and allow every device to access to whole physical memory. This is + * useful if a user want to use an IOMMU only for KVM device assignment to + * guests and not for driver dma translation. + */ +int iommu_pass_through __read_mostly; dma_addr_t bad_dma_address __read_mostly = 0; EXPORT_SYMBOL(bad_dma_address); From 2650815fb03fe2bf1e6701584087ba669dcf92cd Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Aug 2009 16:52:40 +0200 Subject: [PATCH 2/8] x86/amd-iommu: Add core functions for pd allocation/freeing This patch factors some code of protection domain allocation into seperate functions. This way the logic can be used to allocate the passthrough domain later. As a side effect this patch fixes an unlikely domain id leakage bug. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 36 ++++++++++++++++++++++++++++++++---- 1 file changed, 32 insertions(+), 4 deletions(-) diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 6c99f5037801..0934348abfad 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1988,19 +1988,47 @@ static void cleanup_domain(struct protection_domain *domain) write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); } -static int amd_iommu_domain_init(struct iommu_domain *dom) +static void protection_domain_free(struct protection_domain *domain) +{ + if (!domain) + return; + + if (domain->id) + domain_id_free(domain->id); + + kfree(domain); +} + +static struct protection_domain *protection_domain_alloc(void) { struct protection_domain *domain; domain = kzalloc(sizeof(*domain), GFP_KERNEL); if (!domain) - return -ENOMEM; + return NULL; spin_lock_init(&domain->lock); - domain->mode = PAGE_MODE_3_LEVEL; domain->id = domain_id_alloc(); if (!domain->id) + goto out_err; + + return domain; + +out_err: + kfree(domain); + + return NULL; +} + +static int amd_iommu_domain_init(struct iommu_domain *dom) +{ + struct protection_domain *domain; + + domain = protection_domain_alloc(); + if (!domain) goto out_free; + + domain->mode = PAGE_MODE_3_LEVEL; domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); if (!domain->pt_root) goto out_free; @@ -2010,7 +2038,7 @@ static int amd_iommu_domain_init(struct iommu_domain *dom) return 0; out_free: - kfree(domain); + protection_domain_free(domain); return -ENOMEM; } From 0feae533ddebe02cda6ccce5cac7349b446776a8 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Aug 2009 15:26:30 +0200 Subject: [PATCH 3/8] x86/amd-iommu: Add passthrough mode initialization functions When iommu=pt is passed on kernel command line the devices should run untranslated. This requires the allocation of a special domain for that purpose. This patch implements the allocation and initialization path for iommu=pt. Signed-off-by: Joerg Roedel --- arch/x86/include/asm/amd_iommu.h | 1 + arch/x86/include/asm/amd_iommu_types.h | 4 ++ arch/x86/kernel/amd_iommu.c | 72 +++++++++++++++++++++++--- 3 files changed, 69 insertions(+), 8 deletions(-) diff --git a/arch/x86/include/asm/amd_iommu.h b/arch/x86/include/asm/amd_iommu.h index bdf96f119f06..ac95995b7bad 100644 --- a/arch/x86/include/asm/amd_iommu.h +++ b/arch/x86/include/asm/amd_iommu.h @@ -25,6 +25,7 @@ #ifdef CONFIG_AMD_IOMMU extern int amd_iommu_init(void); extern int amd_iommu_init_dma_ops(void); +extern int amd_iommu_init_passthrough(void); extern void amd_iommu_detect(void); extern irqreturn_t amd_iommu_int_handler(int irq, void *data); extern void amd_iommu_flush_all_domains(void); diff --git a/arch/x86/include/asm/amd_iommu_types.h b/arch/x86/include/asm/amd_iommu_types.h index 0c878caaa0a2..49f7453bff76 100644 --- a/arch/x86/include/asm/amd_iommu_types.h +++ b/arch/x86/include/asm/amd_iommu_types.h @@ -143,6 +143,7 @@ #define EVT_BUFFER_SIZE 8192 /* 512 entries */ #define EVT_LEN_MASK (0x9ULL << 56) +#define PAGE_MODE_NONE 0x00 #define PAGE_MODE_1_LEVEL 0x01 #define PAGE_MODE_2_LEVEL 0x02 #define PAGE_MODE_3_LEVEL 0x03 @@ -194,6 +195,9 @@ #define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */ #define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops domain for an IOMMU */ +#define PD_PASSTHROUGH_MASK (1UL << 2) /* domain has no page + translation */ + extern bool amd_iommu_dump; #define DUMP_printk(format, arg...) \ do { \ diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 0934348abfad..7987f20499ad 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -41,6 +41,12 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock); static LIST_HEAD(iommu_pd_list); static DEFINE_SPINLOCK(iommu_pd_list_lock); +/* + * Domain for untranslated devices - only allocated + * if iommu=pt passed on kernel cmd line. + */ +static struct protection_domain *pt_domain; + #ifdef CONFIG_IOMMU_API static struct iommu_ops amd_iommu_ops; #endif @@ -1067,9 +1073,9 @@ static struct protection_domain *domain_for_device(u16 devid) * If a device is not yet associated with a domain, this function does * assigns it visible for the hardware */ -static void attach_device(struct amd_iommu *iommu, - struct protection_domain *domain, - u16 devid) +static void __attach_device(struct amd_iommu *iommu, + struct protection_domain *domain, + u16 devid) { unsigned long flags; u64 pte_root = virt_to_phys(domain->pt_root); @@ -1087,12 +1093,19 @@ static void attach_device(struct amd_iommu *iommu, amd_iommu_pd_table[devid] = domain; write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); +} - /* - * We might boot into a crash-kernel here. The crashed kernel - * left the caches in the IOMMU dirty. So we have to flush - * here to evict all dirty stuff. - */ +static void attach_device(struct amd_iommu *iommu, + struct protection_domain *domain, + u16 devid) +{ + __attach_device(iommu, domain, devid); + + /* + * We might boot into a crash-kernel here. The crashed kernel + * left the caches in the IOMMU dirty. So we have to flush + * here to evict all dirty stuff. + */ iommu_queue_inv_dev_entry(iommu, devid); iommu_flush_tlb_pde(iommu, domain->id); } @@ -2219,3 +2232,46 @@ static struct iommu_ops amd_iommu_ops = { .domain_has_cap = amd_iommu_domain_has_cap, }; +/***************************************************************************** + * + * The next functions do a basic initialization of IOMMU for pass through + * mode + * + * In passthrough mode the IOMMU is initialized and enabled but not used for + * DMA-API translation. + * + *****************************************************************************/ + +int __init amd_iommu_init_passthrough(void) +{ + struct pci_dev *dev = NULL; + u16 devid, devid2; + + /* allocate passthroug domain */ + pt_domain = protection_domain_alloc(); + if (!pt_domain) + return -ENOMEM; + + pt_domain->mode |= PAGE_MODE_NONE; + + while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) { + struct amd_iommu *iommu; + + devid = calc_devid(dev->bus->number, dev->devfn); + if (devid > amd_iommu_last_bdf) + continue; + + devid2 = amd_iommu_alias_table[devid]; + + iommu = amd_iommu_rlookup_table[devid2]; + if (!iommu) + continue; + + __attach_device(iommu, pt_domain, devid); + __attach_device(iommu, pt_domain, devid2); + } + + pr_info("AMD-Vi: Initialized for Passthrough Mode\n"); + + return 0; +} From aa879fff5d12318259816aa35023e941a1e4d3d9 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Mon, 31 Aug 2009 16:01:48 +0200 Subject: [PATCH 4/8] x86/amd-iommu: Fix device table write order The V bit of the device table entry has to be set after the rest of the entry is written to not confuse the hardware. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 7987f20499ad..2b1e77c714ff 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1087,9 +1087,9 @@ static void __attach_device(struct amd_iommu *iommu, pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; write_lock_irqsave(&amd_iommu_devtable_lock, flags); - amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); - amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); amd_iommu_dev_table[devid].data[2] = domain->id; + amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); + amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); amd_iommu_pd_table[devid] = domain; write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); From eba6ac60ba66c6bf6858938442204feaa67dea31 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 1 Sep 2009 12:07:08 +0200 Subject: [PATCH 5/8] x86/amd-iommu: Align locking between attach_device and detach_device This patch makes the locking behavior between the functions attach_device and __attach_device consistent with the locking behavior between detach_device and __detach_device. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 19 ++++++++++++++----- 1 file changed, 14 insertions(+), 5 deletions(-) diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 2b1e77c714ff..9aa135d4453f 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1077,29 +1077,38 @@ static void __attach_device(struct amd_iommu *iommu, struct protection_domain *domain, u16 devid) { - unsigned long flags; - u64 pte_root = virt_to_phys(domain->pt_root); + u64 pte_root; - domain->dev_cnt += 1; + /* lock domain */ + spin_lock(&domain->lock); + + pte_root = virt_to_phys(domain->pt_root); pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV; - write_lock_irqsave(&amd_iommu_devtable_lock, flags); amd_iommu_dev_table[devid].data[2] = domain->id; amd_iommu_dev_table[devid].data[1] = upper_32_bits(pte_root); amd_iommu_dev_table[devid].data[0] = lower_32_bits(pte_root); amd_iommu_pd_table[devid] = domain; - write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); + + domain->dev_cnt += 1; + + /* ready */ + spin_unlock(&domain->lock); } static void attach_device(struct amd_iommu *iommu, struct protection_domain *domain, u16 devid) { + unsigned long flags; + + write_lock_irqsave(&amd_iommu_devtable_lock, flags); __attach_device(iommu, domain, devid); + write_unlock_irqrestore(&amd_iommu_devtable_lock, flags); /* * We might boot into a crash-kernel here. The crashed kernel From 21129f786f231f7a9dce5b504617b893f50a435f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 1 Sep 2009 11:59:42 +0200 Subject: [PATCH 6/8] x86/amd-iommu: Make sure a device is assigned in passthrough mode When the IOMMU driver runs in passthrough mode it has to make sure that every device not assigned to an IOMMU-API domain must be put into the passthrough domain instead of keeping it unassigned. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index 9aa135d4453f..a8e74c34dd27 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1141,6 +1141,15 @@ static void __detach_device(struct protection_domain *domain, u16 devid) /* ready */ spin_unlock(&domain->lock); + + /* + * If we run in passthrough mode the device must be assigned to the + * passthrough domain if it is detached from any other domain + */ + if (iommu_pass_through) { + struct amd_iommu *iommu = amd_iommu_rlookup_table[devid]; + __attach_device(iommu, pt_domain, devid); + } } /* From a1ca331c8aa75cd58fdf685e2e8745e1d3ec5c8f Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 1 Sep 2009 12:22:22 +0200 Subject: [PATCH 7/8] x86/amd-iommu: Don't detach device from pt domain on driver unbind This patch makes sure a device is not detached from the passthrough domain when the device driver is unloaded or does otherwise release the device. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/arch/x86/kernel/amd_iommu.c b/arch/x86/kernel/amd_iommu.c index a8e74c34dd27..12a541deae5e 100644 --- a/arch/x86/kernel/amd_iommu.c +++ b/arch/x86/kernel/amd_iommu.c @@ -1195,6 +1195,8 @@ static int device_change_notifier(struct notifier_block *nb, case BUS_NOTIFY_UNBOUND_DRIVER: if (!domain) goto out; + if (iommu_pass_through) + break; detach_device(domain, devid); break; case BUS_NOTIFY_ADD_DEVICE: From 4751a95134e05f1172131d2001c6991d671fa58c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Tue, 1 Sep 2009 15:53:54 +0200 Subject: [PATCH 8/8] x86/amd-iommu: Initialize passthrough mode when requested This patch enables the passthrough mode for AMD IOMMU by running the initialization function when iommu=pt is passed on the kernel command line. Signed-off-by: Joerg Roedel --- arch/x86/kernel/amd_iommu_init.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/arch/x86/kernel/amd_iommu_init.c b/arch/x86/kernel/amd_iommu_init.c index c1b17e97252e..f00f489ab150 100644 --- a/arch/x86/kernel/amd_iommu_init.c +++ b/arch/x86/kernel/amd_iommu_init.c @@ -1242,12 +1242,18 @@ int __init amd_iommu_init(void) if (ret) goto free; - ret = amd_iommu_init_dma_ops(); + if (iommu_pass_through) + ret = amd_iommu_init_passthrough(); + else + ret = amd_iommu_init_dma_ops(); if (ret) goto free; enable_iommus(); + if (iommu_pass_through) + goto out; + printk(KERN_INFO "AMD IOMMU: device isolation "); if (amd_iommu_isolate) printk("enabled\n");