Merge branches 'iommu/api' and 'iommu/amd' into for-linus

This commit is contained in:
Joerg Roedel 2009-01-03 16:43:44 +01:00
commit b0a11f44ab
4 changed files with 677 additions and 88 deletions

View File

@ -586,6 +586,16 @@ config AMD_IOMMU
your BIOS for an option to enable it or if you have an IVRS ACPI
table.
config AMD_IOMMU_STATS
bool "Export AMD IOMMU statistics to debugfs"
depends on AMD_IOMMU
select DEBUG_FS
help
This option enables code in the AMD IOMMU driver to collect various
statistics about whats happening in the driver and exports that
information to userspace via debugfs.
If unsure, say N.
# need this always selected by IOMMU for the VIA workaround
config SWIOTLB
def_bool y if X86_64

View File

@ -190,16 +190,23 @@
/* FIXME: move this macro to <linux/pci.h> */
#define PCI_BUS(x) (((x) >> 8) & 0xff)
/* Protection domain flags */
#define PD_DMA_OPS_MASK (1UL << 0) /* domain used for dma_ops */
#define PD_DEFAULT_MASK (1UL << 1) /* domain is a default dma_ops
domain for an IOMMU */
/*
* This structure contains generic data for IOMMU protection domains
* independent of their use.
*/
struct protection_domain {
spinlock_t lock; /* mostly used to lock the page table*/
u16 id; /* the domain id written to the device table */
int mode; /* paging mode (0-6 levels) */
u64 *pt_root; /* page table root pointer */
void *priv; /* private data */
spinlock_t lock; /* mostly used to lock the page table*/
u16 id; /* the domain id written to the device table */
int mode; /* paging mode (0-6 levels) */
u64 *pt_root; /* page table root pointer */
unsigned long flags; /* flags to find out type of domain */
unsigned dev_cnt; /* devices assigned to this domain */
void *priv; /* private data */
};
/*
@ -295,7 +302,7 @@ struct amd_iommu {
bool int_enabled;
/* if one, we need to send a completion wait command */
int need_sync;
bool need_sync;
/* default dma_ops domain for that IOMMU */
struct dma_ops_domain *default_dom;
@ -374,7 +381,7 @@ extern struct protection_domain **amd_iommu_pd_table;
extern unsigned long *amd_iommu_pd_alloc_bitmap;
/* will be 1 if device isolation is enabled */
extern int amd_iommu_isolate;
extern bool amd_iommu_isolate;
/*
* If true, the addresses will be flushed on unmap time, not when
@ -382,18 +389,6 @@ extern int amd_iommu_isolate;
*/
extern bool amd_iommu_unmap_flush;
/* takes a PCI device id and prints it out in a readable form */
static inline void print_devid(u16 devid, int nl)
{
int bus = devid >> 8;
int dev = devid >> 3 & 0x1f;
int fn = devid & 0x07;
printk("%02x:%02x.%x", bus, dev, fn);
if (nl)
printk("\n");
}
/* takes bus and device/function and returns the device id
* FIXME: should that be in generic PCI code? */
static inline u16 calc_devid(u8 bus, u8 devfn)
@ -401,4 +396,32 @@ static inline u16 calc_devid(u8 bus, u8 devfn)
return (((u16)bus) << 8) | devfn;
}
#ifdef CONFIG_AMD_IOMMU_STATS
struct __iommu_counter {
char *name;
struct dentry *dent;
u64 value;
};
#define DECLARE_STATS_COUNTER(nm) \
static struct __iommu_counter nm = { \
.name = #nm, \
}
#define INC_STATS_COUNTER(name) name.value += 1
#define ADD_STATS_COUNTER(name, x) name.value += (x)
#define SUB_STATS_COUNTER(name, x) name.value -= (x)
#else /* CONFIG_AMD_IOMMU_STATS */
#define DECLARE_STATS_COUNTER(name)
#define INC_STATS_COUNTER(name)
#define ADD_STATS_COUNTER(name, x)
#define SUB_STATS_COUNTER(name, x)
static inline void amd_iommu_stats_init(void) { }
#endif /* CONFIG_AMD_IOMMU_STATS */
#endif /* _ASM_X86_AMD_IOMMU_TYPES_H */

View File

@ -20,8 +20,12 @@
#include <linux/pci.h>
#include <linux/gfp.h>
#include <linux/bitops.h>
#include <linux/debugfs.h>
#include <linux/scatterlist.h>
#include <linux/iommu-helper.h>
#ifdef CONFIG_IOMMU_API
#include <linux/iommu.h>
#endif
#include <asm/proto.h>
#include <asm/iommu.h>
#include <asm/gart.h>
@ -38,6 +42,10 @@ static DEFINE_RWLOCK(amd_iommu_devtable_lock);
static LIST_HEAD(iommu_pd_list);
static DEFINE_SPINLOCK(iommu_pd_list_lock);
#ifdef CONFIG_IOMMU_API
static struct iommu_ops amd_iommu_ops;
#endif
/*
* general struct to manage commands send to an IOMMU
*/
@ -47,6 +55,68 @@ struct iommu_cmd {
static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
struct unity_map_entry *e);
static struct dma_ops_domain *find_protection_domain(u16 devid);
#ifdef CONFIG_AMD_IOMMU_STATS
/*
* Initialization code for statistics collection
*/
DECLARE_STATS_COUNTER(compl_wait);
DECLARE_STATS_COUNTER(cnt_map_single);
DECLARE_STATS_COUNTER(cnt_unmap_single);
DECLARE_STATS_COUNTER(cnt_map_sg);
DECLARE_STATS_COUNTER(cnt_unmap_sg);
DECLARE_STATS_COUNTER(cnt_alloc_coherent);
DECLARE_STATS_COUNTER(cnt_free_coherent);
DECLARE_STATS_COUNTER(cross_page);
DECLARE_STATS_COUNTER(domain_flush_single);
DECLARE_STATS_COUNTER(domain_flush_all);
DECLARE_STATS_COUNTER(alloced_io_mem);
DECLARE_STATS_COUNTER(total_map_requests);
static struct dentry *stats_dir;
static struct dentry *de_isolate;
static struct dentry *de_fflush;
static void amd_iommu_stats_add(struct __iommu_counter *cnt)
{
if (stats_dir == NULL)
return;
cnt->dent = debugfs_create_u64(cnt->name, 0444, stats_dir,
&cnt->value);
}
static void amd_iommu_stats_init(void)
{
stats_dir = debugfs_create_dir("amd-iommu", NULL);
if (stats_dir == NULL)
return;
de_isolate = debugfs_create_bool("isolation", 0444, stats_dir,
(u32 *)&amd_iommu_isolate);
de_fflush = debugfs_create_bool("fullflush", 0444, stats_dir,
(u32 *)&amd_iommu_unmap_flush);
amd_iommu_stats_add(&compl_wait);
amd_iommu_stats_add(&cnt_map_single);
amd_iommu_stats_add(&cnt_unmap_single);
amd_iommu_stats_add(&cnt_map_sg);
amd_iommu_stats_add(&cnt_unmap_sg);
amd_iommu_stats_add(&cnt_alloc_coherent);
amd_iommu_stats_add(&cnt_free_coherent);
amd_iommu_stats_add(&cross_page);
amd_iommu_stats_add(&domain_flush_single);
amd_iommu_stats_add(&domain_flush_all);
amd_iommu_stats_add(&alloced_io_mem);
amd_iommu_stats_add(&total_map_requests);
}
#endif
/* returns !0 if the IOMMU is caching non-present entries in its TLB */
static int iommu_has_npcache(struct amd_iommu *iommu)
@ -189,41 +259,23 @@ static int iommu_queue_command(struct amd_iommu *iommu, struct iommu_cmd *cmd)
spin_lock_irqsave(&iommu->lock, flags);
ret = __iommu_queue_command(iommu, cmd);
if (!ret)
iommu->need_sync = 1;
iommu->need_sync = true;
spin_unlock_irqrestore(&iommu->lock, flags);
return ret;
}
/*
* This function is called whenever we need to ensure that the IOMMU has
* completed execution of all commands we sent. It sends a
* COMPLETION_WAIT command and waits for it to finish. The IOMMU informs
* us about that by writing a value to a physical address we pass with
* the command.
* This function waits until an IOMMU has completed a completion
* wait command
*/
static int iommu_completion_wait(struct amd_iommu *iommu)
static void __iommu_wait_for_completion(struct amd_iommu *iommu)
{
int ret = 0, ready = 0;
int ready = 0;
unsigned status = 0;
struct iommu_cmd cmd;
unsigned long flags, i = 0;
unsigned long i = 0;
memset(&cmd, 0, sizeof(cmd));
cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
spin_lock_irqsave(&iommu->lock, flags);
if (!iommu->need_sync)
goto out;
iommu->need_sync = 0;
ret = __iommu_queue_command(iommu, &cmd);
if (ret)
goto out;
INC_STATS_COUNTER(compl_wait);
while (!ready && (i < EXIT_LOOP_COUNT)) {
++i;
@ -238,6 +290,48 @@ static int iommu_completion_wait(struct amd_iommu *iommu)
if (unlikely(i == EXIT_LOOP_COUNT))
panic("AMD IOMMU: Completion wait loop failed\n");
}
/*
* This function queues a completion wait command into the command
* buffer of an IOMMU
*/
static int __iommu_completion_wait(struct amd_iommu *iommu)
{
struct iommu_cmd cmd;
memset(&cmd, 0, sizeof(cmd));
cmd.data[0] = CMD_COMPL_WAIT_INT_MASK;
CMD_SET_TYPE(&cmd, CMD_COMPL_WAIT);
return __iommu_queue_command(iommu, &cmd);
}
/*
* This function is called whenever we need to ensure that the IOMMU has
* completed execution of all commands we sent. It sends a
* COMPLETION_WAIT command and waits for it to finish. The IOMMU informs
* us about that by writing a value to a physical address we pass with
* the command.
*/
static int iommu_completion_wait(struct amd_iommu *iommu)
{
int ret = 0;
unsigned long flags;
spin_lock_irqsave(&iommu->lock, flags);
if (!iommu->need_sync)
goto out;
ret = __iommu_completion_wait(iommu);
iommu->need_sync = false;
if (ret)
goto out;
__iommu_wait_for_completion(iommu);
out:
spin_unlock_irqrestore(&iommu->lock, flags);
@ -264,6 +358,21 @@ static int iommu_queue_inv_dev_entry(struct amd_iommu *iommu, u16 devid)
return ret;
}
static void __iommu_build_inv_iommu_pages(struct iommu_cmd *cmd, u64 address,
u16 domid, int pde, int s)
{
memset(cmd, 0, sizeof(*cmd));
address &= PAGE_MASK;
CMD_SET_TYPE(cmd, CMD_INV_IOMMU_PAGES);
cmd->data[1] |= domid;
cmd->data[2] = lower_32_bits(address);
cmd->data[3] = upper_32_bits(address);
if (s) /* size bit - we flush more than one 4kb page */
cmd->data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
cmd->data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
}
/*
* Generic command send function for invalidaing TLB entries
*/
@ -273,16 +382,7 @@ static int iommu_queue_inv_iommu_pages(struct amd_iommu *iommu,
struct iommu_cmd cmd;
int ret;
memset(&cmd, 0, sizeof(cmd));
address &= PAGE_MASK;
CMD_SET_TYPE(&cmd, CMD_INV_IOMMU_PAGES);
cmd.data[1] |= domid;
cmd.data[2] = lower_32_bits(address);
cmd.data[3] = upper_32_bits(address);
if (s) /* size bit - we flush more than one 4kb page */
cmd.data[2] |= CMD_INV_IOMMU_PAGES_SIZE_MASK;
if (pde) /* PDE bit - we wan't flush everything not only the PTEs */
cmd.data[2] |= CMD_INV_IOMMU_PAGES_PDE_MASK;
__iommu_build_inv_iommu_pages(&cmd, address, domid, pde, s);
ret = iommu_queue_command(iommu, &cmd);
@ -321,9 +421,37 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
{
u64 address = CMD_INV_IOMMU_ALL_PAGES_ADDRESS;
INC_STATS_COUNTER(domain_flush_single);
iommu_queue_inv_iommu_pages(iommu, address, domid, 0, 1);
}
#ifdef CONFIG_IOMMU_API
/*
* This function is used to flush the IO/TLB for a given protection domain
* on every IOMMU in the system
*/
static void iommu_flush_domain(u16 domid)
{
unsigned long flags;
struct amd_iommu *iommu;
struct iommu_cmd cmd;
INC_STATS_COUNTER(domain_flush_all);
__iommu_build_inv_iommu_pages(&cmd, CMD_INV_IOMMU_ALL_PAGES_ADDRESS,
domid, 1, 1);
list_for_each_entry(iommu, &amd_iommu_list, list) {
spin_lock_irqsave(&iommu->lock, flags);
__iommu_queue_command(iommu, &cmd);
__iommu_completion_wait(iommu);
__iommu_wait_for_completion(iommu);
spin_unlock_irqrestore(&iommu->lock, flags);
}
}
#endif
/****************************************************************************
*
* The functions below are used the create the page table mappings for
@ -338,10 +466,10 @@ static void iommu_flush_tlb(struct amd_iommu *iommu, u16 domid)
* supporting all features of AMD IOMMU page tables like level skipping
* and full 64 bit address spaces.
*/
static int iommu_map(struct protection_domain *dom,
unsigned long bus_addr,
unsigned long phys_addr,
int prot)
static int iommu_map_page(struct protection_domain *dom,
unsigned long bus_addr,
unsigned long phys_addr,
int prot)
{
u64 __pte, *pte, *page;
@ -388,6 +516,30 @@ static int iommu_map(struct protection_domain *dom,
return 0;
}
#ifdef CONFIG_IOMMU_API
static void iommu_unmap_page(struct protection_domain *dom,
unsigned long bus_addr)
{
u64 *pte;
pte = &dom->pt_root[IOMMU_PTE_L2_INDEX(bus_addr)];
if (!IOMMU_PTE_PRESENT(*pte))
return;
pte = IOMMU_PTE_PAGE(*pte);
pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
if (!IOMMU_PTE_PRESENT(*pte))
return;
pte = IOMMU_PTE_PAGE(*pte);
pte = &pte[IOMMU_PTE_L1_INDEX(bus_addr)];
*pte = 0;
}
#endif
/*
* This function checks if a specific unity mapping entry is needed for
* this specific IOMMU.
@ -440,7 +592,7 @@ static int dma_ops_unity_map(struct dma_ops_domain *dma_dom,
for (addr = e->address_start; addr < e->address_end;
addr += PAGE_SIZE) {
ret = iommu_map(&dma_dom->domain, addr, addr, e->prot);
ret = iommu_map_page(&dma_dom->domain, addr, addr, e->prot);
if (ret)
return ret;
/*
@ -571,6 +723,18 @@ static u16 domain_id_alloc(void)
return id;
}
#ifdef CONFIG_IOMMU_API
static void domain_id_free(int id)
{
unsigned long flags;
write_lock_irqsave(&amd_iommu_devtable_lock, flags);
if (id > 0 && id < MAX_DOMAIN_ID)
__clear_bit(id, amd_iommu_pd_alloc_bitmap);
write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
}
#endif
/*
* Used to reserve address ranges in the aperture (e.g. for exclusion
* ranges.
@ -587,12 +751,12 @@ static void dma_ops_reserve_addresses(struct dma_ops_domain *dom,
iommu_area_reserve(dom->bitmap, start_page, pages);
}
static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
static void free_pagetable(struct protection_domain *domain)
{
int i, j;
u64 *p1, *p2, *p3;
p1 = dma_dom->domain.pt_root;
p1 = domain->pt_root;
if (!p1)
return;
@ -613,6 +777,8 @@ static void dma_ops_free_pagetable(struct dma_ops_domain *dma_dom)
}
free_page((unsigned long)p1);
domain->pt_root = NULL;
}
/*
@ -624,7 +790,7 @@ static void dma_ops_domain_free(struct dma_ops_domain *dom)
if (!dom)
return;
dma_ops_free_pagetable(dom);
free_pagetable(&dom->domain);
kfree(dom->pte_pages);
@ -663,6 +829,7 @@ static struct dma_ops_domain *dma_ops_domain_alloc(struct amd_iommu *iommu,
goto free_dma_dom;
dma_dom->domain.mode = PAGE_MODE_3_LEVEL;
dma_dom->domain.pt_root = (void *)get_zeroed_page(GFP_KERNEL);
dma_dom->domain.flags = PD_DMA_OPS_MASK;
dma_dom->domain.priv = dma_dom;
if (!dma_dom->domain.pt_root)
goto free_dma_dom;
@ -724,6 +891,15 @@ free_dma_dom:
return NULL;
}
/*
* little helper function to check whether a given protection domain is a
* dma_ops domain
*/
static bool dma_ops_domain(struct protection_domain *domain)
{
return domain->flags & PD_DMA_OPS_MASK;
}
/*
* Find out the protection domain structure for a given PCI device. This
* will give us the pointer to the page table root for example.
@ -744,14 +920,15 @@ static struct protection_domain *domain_for_device(u16 devid)
* If a device is not yet associated with a domain, this function does
* assigns it visible for the hardware
*/
static void set_device_domain(struct amd_iommu *iommu,
struct protection_domain *domain,
u16 devid)
static void attach_device(struct amd_iommu *iommu,
struct protection_domain *domain,
u16 devid)
{
unsigned long flags;
u64 pte_root = virt_to_phys(domain->pt_root);
domain->dev_cnt += 1;
pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK)
<< DEV_ENTRY_MODE_SHIFT;
pte_root |= IOMMU_PTE_IR | IOMMU_PTE_IW | IOMMU_PTE_P | IOMMU_PTE_TV;
@ -767,6 +944,116 @@ static void set_device_domain(struct amd_iommu *iommu,
iommu_queue_inv_dev_entry(iommu, devid);
}
/*
* Removes a device from a protection domain (unlocked)
*/
static void __detach_device(struct protection_domain *domain, u16 devid)
{
/* lock domain */
spin_lock(&domain->lock);
/* remove domain from the lookup table */
amd_iommu_pd_table[devid] = NULL;
/* remove entry from the device table seen by the hardware */
amd_iommu_dev_table[devid].data[0] = IOMMU_PTE_P | IOMMU_PTE_TV;
amd_iommu_dev_table[devid].data[1] = 0;
amd_iommu_dev_table[devid].data[2] = 0;
/* decrease reference counter */
domain->dev_cnt -= 1;
/* ready */
spin_unlock(&domain->lock);
}
/*
* Removes a device from a protection domain (with devtable_lock held)
*/
static void detach_device(struct protection_domain *domain, u16 devid)
{
unsigned long flags;
/* lock device table */
write_lock_irqsave(&amd_iommu_devtable_lock, flags);
__detach_device(domain, devid);
write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
}
static int device_change_notifier(struct notifier_block *nb,
unsigned long action, void *data)
{
struct device *dev = data;
struct pci_dev *pdev = to_pci_dev(dev);
u16 devid = calc_devid(pdev->bus->number, pdev->devfn);
struct protection_domain *domain;
struct dma_ops_domain *dma_domain;
struct amd_iommu *iommu;
int order = amd_iommu_aperture_order;
unsigned long flags;
if (devid > amd_iommu_last_bdf)
goto out;
devid = amd_iommu_alias_table[devid];
iommu = amd_iommu_rlookup_table[devid];
if (iommu == NULL)
goto out;
domain = domain_for_device(devid);
if (domain && !dma_ops_domain(domain))
WARN_ONCE(1, "AMD IOMMU WARNING: device %s already bound "
"to a non-dma-ops domain\n", dev_name(dev));
switch (action) {
case BUS_NOTIFY_BOUND_DRIVER:
if (domain)
goto out;
dma_domain = find_protection_domain(devid);
if (!dma_domain)
dma_domain = iommu->default_dom;
attach_device(iommu, &dma_domain->domain, devid);
printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
"device %s\n", dma_domain->domain.id, dev_name(dev));
break;
case BUS_NOTIFY_UNBIND_DRIVER:
if (!domain)
goto out;
detach_device(domain, devid);
break;
case BUS_NOTIFY_ADD_DEVICE:
/* allocate a protection domain if a device is added */
dma_domain = find_protection_domain(devid);
if (dma_domain)
goto out;
dma_domain = dma_ops_domain_alloc(iommu, order);
if (!dma_domain)
goto out;
dma_domain->target_dev = devid;
spin_lock_irqsave(&iommu_pd_list_lock, flags);
list_add_tail(&dma_domain->list, &iommu_pd_list);
spin_unlock_irqrestore(&iommu_pd_list_lock, flags);
break;
default:
goto out;
}
iommu_queue_inv_dev_entry(iommu, devid);
iommu_completion_wait(iommu);
out:
return 0;
}
struct notifier_block device_nb = {
.notifier_call = device_change_notifier,
};
/*****************************************************************************
*
* The next functions belong to the dma_ops mapping/unmapping code.
@ -802,7 +1089,6 @@ static struct dma_ops_domain *find_protection_domain(u16 devid)
list_for_each_entry(entry, &iommu_pd_list, list) {
if (entry->target_dev == devid) {
ret = entry;
list_del(&ret->list);
break;
}
}
@ -853,14 +1139,13 @@ static int get_device_resources(struct device *dev,
if (!dma_dom)
dma_dom = (*iommu)->default_dom;
*domain = &dma_dom->domain;
set_device_domain(*iommu, *domain, *bdf);
attach_device(*iommu, *domain, *bdf);
printk(KERN_INFO "AMD IOMMU: Using protection domain %d for "
"device ", (*domain)->id);
print_devid(_bdf, 1);
"device %s\n", (*domain)->id, dev_name(dev));
}
if (domain_for_device(_bdf) == NULL)
set_device_domain(*iommu, *domain, _bdf);
attach_device(*iommu, *domain, _bdf);
return 1;
}
@ -946,6 +1231,11 @@ static dma_addr_t __map_single(struct device *dev,
pages = iommu_num_pages(paddr, size, PAGE_SIZE);
paddr &= PAGE_MASK;
INC_STATS_COUNTER(total_map_requests);
if (pages > 1)
INC_STATS_COUNTER(cross_page);
if (align)
align_mask = (1UL << get_order(size)) - 1;
@ -962,6 +1252,8 @@ static dma_addr_t __map_single(struct device *dev,
}
address += offset;
ADD_STATS_COUNTER(alloced_io_mem, size);
if (unlikely(dma_dom->need_flush && !amd_iommu_unmap_flush)) {
iommu_flush_tlb(iommu, dma_dom->domain.id);
dma_dom->need_flush = false;
@ -998,6 +1290,8 @@ static void __unmap_single(struct amd_iommu *iommu,
start += PAGE_SIZE;
}
SUB_STATS_COUNTER(alloced_io_mem, size);
dma_ops_free_addresses(dma_dom, dma_addr, pages);
if (amd_iommu_unmap_flush || dma_dom->need_flush) {
@ -1019,6 +1313,8 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
dma_addr_t addr;
u64 dma_mask;
INC_STATS_COUNTER(cnt_map_single);
if (!check_device(dev))
return bad_dma_address;
@ -1030,6 +1326,9 @@ static dma_addr_t map_single(struct device *dev, phys_addr_t paddr,
/* device not handled by any AMD IOMMU */
return (dma_addr_t)paddr;
if (!dma_ops_domain(domain))
return bad_dma_address;
spin_lock_irqsave(&domain->lock, flags);
addr = __map_single(dev, iommu, domain->priv, paddr, size, dir, false,
dma_mask);
@ -1055,11 +1354,16 @@ static void unmap_single(struct device *dev, dma_addr_t dma_addr,
struct protection_domain *domain;
u16 devid;
INC_STATS_COUNTER(cnt_unmap_single);
if (!check_device(dev) ||
!get_device_resources(dev, &iommu, &domain, &devid))
/* device not handled by any AMD IOMMU */
return;
if (!dma_ops_domain(domain))
return;
spin_lock_irqsave(&domain->lock, flags);
__unmap_single(iommu, domain->priv, dma_addr, size, dir);
@ -1104,6 +1408,8 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
int mapped_elems = 0;
u64 dma_mask;
INC_STATS_COUNTER(cnt_map_sg);
if (!check_device(dev))
return 0;
@ -1114,6 +1420,9 @@ static int map_sg(struct device *dev, struct scatterlist *sglist,
if (!iommu || !domain)
return map_sg_no_iommu(dev, sglist, nelems, dir);
if (!dma_ops_domain(domain))
return 0;
spin_lock_irqsave(&domain->lock, flags);
for_each_sg(sglist, s, nelems, i) {
@ -1163,10 +1472,15 @@ static void unmap_sg(struct device *dev, struct scatterlist *sglist,
u16 devid;
int i;
INC_STATS_COUNTER(cnt_unmap_sg);
if (!check_device(dev) ||
!get_device_resources(dev, &iommu, &domain, &devid))
return;
if (!dma_ops_domain(domain))
return;
spin_lock_irqsave(&domain->lock, flags);
for_each_sg(sglist, s, nelems, i) {
@ -1194,6 +1508,8 @@ static void *alloc_coherent(struct device *dev, size_t size,
phys_addr_t paddr;
u64 dma_mask = dev->coherent_dma_mask;
INC_STATS_COUNTER(cnt_alloc_coherent);
if (!check_device(dev))
return NULL;
@ -1212,6 +1528,9 @@ static void *alloc_coherent(struct device *dev, size_t size,
return virt_addr;
}
if (!dma_ops_domain(domain))
goto out_free;
if (!dma_mask)
dma_mask = *dev->dma_mask;
@ -1220,18 +1539,20 @@ static void *alloc_coherent(struct device *dev, size_t size,
*dma_addr = __map_single(dev, iommu, domain->priv, paddr,
size, DMA_BIDIRECTIONAL, true, dma_mask);
if (*dma_addr == bad_dma_address) {
free_pages((unsigned long)virt_addr, get_order(size));
virt_addr = NULL;
goto out;
}
if (*dma_addr == bad_dma_address)
goto out_free;
iommu_completion_wait(iommu);
out:
spin_unlock_irqrestore(&domain->lock, flags);
return virt_addr;
out_free:
free_pages((unsigned long)virt_addr, get_order(size));
return NULL;
}
/*
@ -1245,6 +1566,8 @@ static void free_coherent(struct device *dev, size_t size,
struct protection_domain *domain;
u16 devid;
INC_STATS_COUNTER(cnt_free_coherent);
if (!check_device(dev))
return;
@ -1253,6 +1576,9 @@ static void free_coherent(struct device *dev, size_t size,
if (!iommu || !domain)
goto free_mem;
if (!dma_ops_domain(domain))
goto free_mem;
spin_lock_irqsave(&domain->lock, flags);
__unmap_single(iommu, domain->priv, dma_addr, size, DMA_BIDIRECTIONAL);
@ -1296,7 +1622,7 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask)
* we don't need to preallocate the protection domains anymore.
* For now we have to.
*/
void prealloc_protection_domains(void)
static void prealloc_protection_domains(void)
{
struct pci_dev *dev = NULL;
struct dma_ops_domain *dma_dom;
@ -1305,7 +1631,7 @@ void prealloc_protection_domains(void)
u16 devid;
while ((dev = pci_get_device(PCI_ANY_ID, PCI_ANY_ID, dev)) != NULL) {
devid = (dev->bus->number << 8) | dev->devfn;
devid = calc_devid(dev->bus->number, dev->devfn);
if (devid > amd_iommu_last_bdf)
continue;
devid = amd_iommu_alias_table[devid];
@ -1352,6 +1678,7 @@ int __init amd_iommu_init_dma_ops(void)
iommu->default_dom = dma_ops_domain_alloc(iommu, order);
if (iommu->default_dom == NULL)
return -ENOMEM;
iommu->default_dom->domain.flags |= PD_DEFAULT_MASK;
ret = iommu_init_unity_mappings(iommu);
if (ret)
goto free_domains;
@ -1375,6 +1702,14 @@ int __init amd_iommu_init_dma_ops(void)
/* Make the driver finally visible to the drivers */
dma_ops = &amd_iommu_dma_ops;
#ifdef CONFIG_IOMMU_API
register_iommu(&amd_iommu_ops);
#endif
bus_register_notifier(&pci_bus_type, &device_nb);
amd_iommu_stats_init();
return 0;
free_domains:
@ -1386,3 +1721,227 @@ free_domains:
return ret;
}
/*****************************************************************************
*
* The following functions belong to the exported interface of AMD IOMMU
*
* This interface allows access to lower level functions of the IOMMU
* like protection domain handling and assignement of devices to domains
* which is not possible with the dma_ops interface.
*
*****************************************************************************/
#ifdef CONFIG_IOMMU_API
static void cleanup_domain(struct protection_domain *domain)
{
unsigned long flags;
u16 devid;
write_lock_irqsave(&amd_iommu_devtable_lock, flags);
for (devid = 0; devid <= amd_iommu_last_bdf; ++devid)
if (amd_iommu_pd_table[devid] == domain)
__detach_device(domain, devid);
write_unlock_irqrestore(&amd_iommu_devtable_lock, flags);
}
static int amd_iommu_domain_init(struct iommu_domain *dom)
{
struct protection_domain *domain;
domain = kzalloc(sizeof(*domain), GFP_KERNEL);
if (!domain)
return -ENOMEM;
spin_lock_init(&domain->lock);
domain->mode = PAGE_MODE_3_LEVEL;
domain->id = domain_id_alloc();
if (!domain->id)
goto out_free;
domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL);
if (!domain->pt_root)
goto out_free;
dom->priv = domain;
return 0;
out_free:
kfree(domain);
return -ENOMEM;
}
static void amd_iommu_domain_destroy(struct iommu_domain *dom)
{
struct protection_domain *domain = dom->priv;
if (!domain)
return;
if (domain->dev_cnt > 0)
cleanup_domain(domain);
BUG_ON(domain->dev_cnt != 0);
free_pagetable(domain);
domain_id_free(domain->id);
kfree(domain);
dom->priv = NULL;
}
static void amd_iommu_detach_device(struct iommu_domain *dom,
struct device *dev)
{
struct protection_domain *domain = dom->priv;
struct amd_iommu *iommu;
struct pci_dev *pdev;
u16 devid;
if (dev->bus != &pci_bus_type)
return;
pdev = to_pci_dev(dev);
devid = calc_devid(pdev->bus->number, pdev->devfn);
if (devid > 0)
detach_device(domain, devid);
iommu = amd_iommu_rlookup_table[devid];
if (!iommu)
return;
iommu_queue_inv_dev_entry(iommu, devid);
iommu_completion_wait(iommu);
}
static int amd_iommu_attach_device(struct iommu_domain *dom,
struct device *dev)
{
struct protection_domain *domain = dom->priv;
struct protection_domain *old_domain;
struct amd_iommu *iommu;
struct pci_dev *pdev;
u16 devid;
if (dev->bus != &pci_bus_type)
return -EINVAL;
pdev = to_pci_dev(dev);
devid = calc_devid(pdev->bus->number, pdev->devfn);
if (devid >= amd_iommu_last_bdf ||
devid != amd_iommu_alias_table[devid])
return -EINVAL;
iommu = amd_iommu_rlookup_table[devid];
if (!iommu)
return -EINVAL;
old_domain = domain_for_device(devid);
if (old_domain)
return -EBUSY;
attach_device(iommu, domain, devid);
iommu_completion_wait(iommu);
return 0;
}
static int amd_iommu_map_range(struct iommu_domain *dom,
unsigned long iova, phys_addr_t paddr,
size_t size, int iommu_prot)
{
struct protection_domain *domain = dom->priv;
unsigned long i, npages = iommu_num_pages(paddr, size, PAGE_SIZE);
int prot = 0;
int ret;
if (iommu_prot & IOMMU_READ)
prot |= IOMMU_PROT_IR;
if (iommu_prot & IOMMU_WRITE)
prot |= IOMMU_PROT_IW;
iova &= PAGE_MASK;
paddr &= PAGE_MASK;
for (i = 0; i < npages; ++i) {
ret = iommu_map_page(domain, iova, paddr, prot);
if (ret)
return ret;
iova += PAGE_SIZE;
paddr += PAGE_SIZE;
}
return 0;
}
static void amd_iommu_unmap_range(struct iommu_domain *dom,
unsigned long iova, size_t size)
{
struct protection_domain *domain = dom->priv;
unsigned long i, npages = iommu_num_pages(iova, size, PAGE_SIZE);
iova &= PAGE_MASK;
for (i = 0; i < npages; ++i) {
iommu_unmap_page(domain, iova);
iova += PAGE_SIZE;
}
iommu_flush_domain(domain->id);
}
static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom,
unsigned long iova)
{
struct protection_domain *domain = dom->priv;
unsigned long offset = iova & ~PAGE_MASK;
phys_addr_t paddr;
u64 *pte;
pte = &domain->pt_root[IOMMU_PTE_L2_INDEX(iova)];
if (!IOMMU_PTE_PRESENT(*pte))
return 0;
pte = IOMMU_PTE_PAGE(*pte);
pte = &pte[IOMMU_PTE_L1_INDEX(iova)];
if (!IOMMU_PTE_PRESENT(*pte))
return 0;
pte = IOMMU_PTE_PAGE(*pte);
pte = &pte[IOMMU_PTE_L0_INDEX(iova)];
if (!IOMMU_PTE_PRESENT(*pte))
return 0;
paddr = *pte & IOMMU_PAGE_MASK;
paddr |= offset;
return paddr;
}
static struct iommu_ops amd_iommu_ops = {
.domain_init = amd_iommu_domain_init,
.domain_destroy = amd_iommu_domain_destroy,
.attach_dev = amd_iommu_attach_device,
.detach_dev = amd_iommu_detach_device,
.map = amd_iommu_map_range,
.unmap = amd_iommu_unmap_range,
.iova_to_phys = amd_iommu_iova_to_phys,
};
#endif

View File

@ -122,7 +122,8 @@ u16 amd_iommu_last_bdf; /* largest PCI device id we have
LIST_HEAD(amd_iommu_unity_map); /* a list of required unity mappings
we find in ACPI */
unsigned amd_iommu_aperture_order = 26; /* size of aperture in power of 2 */
int amd_iommu_isolate = 1; /* if 1, device isolation is enabled */
bool amd_iommu_isolate = true; /* if true, device isolation is
enabled */
bool amd_iommu_unmap_flush; /* if true, flush on every unmap */
LIST_HEAD(amd_iommu_list); /* list of all AMD IOMMUs in the
@ -245,12 +246,8 @@ static void __init iommu_feature_disable(struct amd_iommu *iommu, u8 bit)
/* Function to enable the hardware */
void __init iommu_enable(struct amd_iommu *iommu)
{
printk(KERN_INFO "AMD IOMMU: Enabling IOMMU "
"at %02x:%02x.%x cap 0x%hx\n",
iommu->dev->bus->number,
PCI_SLOT(iommu->dev->devfn),
PCI_FUNC(iommu->dev->devfn),
iommu->cap_ptr);
printk(KERN_INFO "AMD IOMMU: Enabling IOMMU at %s cap 0x%hx\n",
dev_name(&iommu->dev->dev), iommu->cap_ptr);
iommu_feature_enable(iommu, CONTROL_IOMMU_EN);
}
@ -1218,9 +1215,9 @@ static int __init parse_amd_iommu_options(char *str)
{
for (; *str; ++str) {
if (strncmp(str, "isolate", 7) == 0)
amd_iommu_isolate = 1;
amd_iommu_isolate = true;
if (strncmp(str, "share", 5) == 0)
amd_iommu_isolate = 0;
amd_iommu_isolate = false;
if (strncmp(str, "fullflush", 9) == 0)
amd_iommu_unmap_flush = true;
}