diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 20cce366e951..1dc3718560d0 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -101,6 +101,8 @@ struct kmem_cache *amd_iommu_irq_cache; static void update_domain(struct protection_domain *domain); static int protection_domain_init(struct protection_domain *domain); static void detach_device(struct device *dev); +static void update_and_flush_device_table(struct protection_domain *domain, + struct domain_pgtable *pgtable); /**************************************************************************** * @@ -151,6 +153,26 @@ static struct protection_domain *to_pdomain(struct iommu_domain *dom) return container_of(dom, struct protection_domain, domain); } +static void amd_iommu_domain_get_pgtable(struct protection_domain *domain, + struct domain_pgtable *pgtable) +{ + u64 pt_root = atomic64_read(&domain->pt_root); + + pgtable->root = (u64 *)(pt_root & PAGE_MASK); + pgtable->mode = pt_root & 7; /* lowest 3 bits encode pgtable mode */ +} + +static u64 amd_iommu_domain_encode_pgtable(u64 *root, int mode) +{ + u64 pt_root; + + /* lowest 3 bits encode pgtable mode */ + pt_root = mode & 7; + pt_root |= (u64)root; + + return pt_root; +} + static struct iommu_dev_data *alloc_dev_data(u16 devid) { struct iommu_dev_data *dev_data; @@ -1397,13 +1419,18 @@ static struct page *free_sub_pt(unsigned long root, int mode, static void free_pagetable(struct protection_domain *domain) { - unsigned long root = (unsigned long)domain->pt_root; + struct domain_pgtable pgtable; struct page *freelist = NULL; + unsigned long root; - BUG_ON(domain->mode < PAGE_MODE_NONE || - domain->mode > PAGE_MODE_6_LEVEL); + amd_iommu_domain_get_pgtable(domain, &pgtable); + atomic64_set(&domain->pt_root, 0); - freelist = free_sub_pt(root, domain->mode, freelist); + BUG_ON(pgtable.mode < PAGE_MODE_NONE || + pgtable.mode > PAGE_MODE_6_LEVEL); + + root = (unsigned long)pgtable.root; + freelist = free_sub_pt(root, pgtable.mode, freelist); free_page_list(freelist); } @@ -1417,24 +1444,39 @@ static bool increase_address_space(struct protection_domain *domain, unsigned long address, gfp_t gfp) { + struct domain_pgtable pgtable; unsigned long flags; - bool ret = false; - u64 *pte; + bool ret = true; + u64 *pte, root; spin_lock_irqsave(&domain->lock, flags); - if (address <= PM_LEVEL_SIZE(domain->mode) || - WARN_ON_ONCE(domain->mode == PAGE_MODE_6_LEVEL)) + amd_iommu_domain_get_pgtable(domain, &pgtable); + + if (address <= PM_LEVEL_SIZE(pgtable.mode)) + goto out; + + ret = false; + if (WARN_ON_ONCE(pgtable.mode == PAGE_MODE_6_LEVEL)) goto out; pte = (void *)get_zeroed_page(gfp); if (!pte) goto out; - *pte = PM_LEVEL_PDE(domain->mode, - iommu_virt_to_phys(domain->pt_root)); - domain->pt_root = pte; - domain->mode += 1; + *pte = PM_LEVEL_PDE(pgtable.mode, iommu_virt_to_phys(pgtable.root)); + + pgtable.root = pte; + pgtable.mode += 1; + update_and_flush_device_table(domain, &pgtable); + domain_flush_complete(domain); + + /* + * Device Table needs to be updated and flushed before the new root can + * be published. + */ + root = amd_iommu_domain_encode_pgtable(pte, pgtable.mode); + atomic64_set(&domain->pt_root, root); ret = true; @@ -1451,16 +1493,29 @@ static u64 *alloc_pte(struct protection_domain *domain, gfp_t gfp, bool *updated) { + struct domain_pgtable pgtable; int level, end_lvl; u64 *pte, *page; BUG_ON(!is_power_of_2(page_size)); - while (address > PM_LEVEL_SIZE(domain->mode)) - *updated = increase_address_space(domain, address, gfp) || *updated; + amd_iommu_domain_get_pgtable(domain, &pgtable); - level = domain->mode - 1; - pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; + while (address > PM_LEVEL_SIZE(pgtable.mode)) { + /* + * Return an error if there is no memory to update the + * page-table. + */ + if (!increase_address_space(domain, address, gfp)) + return NULL; + + /* Read new values to check if update was successful */ + amd_iommu_domain_get_pgtable(domain, &pgtable); + } + + + level = pgtable.mode - 1; + pte = &pgtable.root[PM_LEVEL_INDEX(level, address)]; address = PAGE_SIZE_ALIGN(address, page_size); end_lvl = PAGE_SIZE_LEVEL(page_size); @@ -1536,16 +1591,19 @@ static u64 *fetch_pte(struct protection_domain *domain, unsigned long address, unsigned long *page_size) { + struct domain_pgtable pgtable; int level; u64 *pte; *page_size = 0; - if (address > PM_LEVEL_SIZE(domain->mode)) + amd_iommu_domain_get_pgtable(domain, &pgtable); + + if (address > PM_LEVEL_SIZE(pgtable.mode)) return NULL; - level = domain->mode - 1; - pte = &domain->pt_root[PM_LEVEL_INDEX(level, address)]; + level = pgtable.mode - 1; + pte = &pgtable.root[PM_LEVEL_INDEX(level, address)]; *page_size = PTE_LEVEL_PAGE_SIZE(level); while (level > 0) { @@ -1660,7 +1718,13 @@ out: unsigned long flags; spin_lock_irqsave(&dom->lock, flags); - update_domain(dom); + /* + * Flush domain TLB(s) and wait for completion. Any Device-Table + * Updates and flushing already happened in + * increase_address_space(). + */ + domain_flush_tlb_pde(dom); + domain_flush_complete(dom); spin_unlock_irqrestore(&dom->lock, flags); } @@ -1806,6 +1870,7 @@ static void dma_ops_domain_free(struct protection_domain *domain) static struct protection_domain *dma_ops_domain_alloc(void) { struct protection_domain *domain; + u64 *pt_root, root; domain = kzalloc(sizeof(struct protection_domain), GFP_KERNEL); if (!domain) @@ -1814,12 +1879,14 @@ static struct protection_domain *dma_ops_domain_alloc(void) if (protection_domain_init(domain)) goto free_domain; - domain->mode = PAGE_MODE_3_LEVEL; - domain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); - domain->flags = PD_DMA_OPS_MASK; - if (!domain->pt_root) + pt_root = (void *)get_zeroed_page(GFP_KERNEL); + if (!pt_root) goto free_domain; + root = amd_iommu_domain_encode_pgtable(pt_root, PAGE_MODE_3_LEVEL); + atomic64_set(&domain->pt_root, root); + domain->flags = PD_DMA_OPS_MASK; + if (iommu_get_dma_cookie(&domain->domain) == -ENOMEM) goto free_domain; @@ -1841,16 +1908,17 @@ static bool dma_ops_domain(struct protection_domain *domain) } static void set_dte_entry(u16 devid, struct protection_domain *domain, + struct domain_pgtable *pgtable, bool ats, bool ppr) { u64 pte_root = 0; u64 flags = 0; u32 old_domid; - if (domain->mode != PAGE_MODE_NONE) - pte_root = iommu_virt_to_phys(domain->pt_root); + if (pgtable->mode != PAGE_MODE_NONE) + pte_root = iommu_virt_to_phys(pgtable->root); - pte_root |= (domain->mode & DEV_ENTRY_MODE_MASK) + pte_root |= (pgtable->mode & DEV_ENTRY_MODE_MASK) << DEV_ENTRY_MODE_SHIFT; pte_root |= DTE_FLAG_IR | DTE_FLAG_IW | DTE_FLAG_V | DTE_FLAG_TV; @@ -1923,6 +1991,7 @@ static void clear_dte_entry(u16 devid) static void do_attach(struct iommu_dev_data *dev_data, struct protection_domain *domain) { + struct domain_pgtable pgtable; struct amd_iommu *iommu; bool ats; @@ -1938,7 +2007,9 @@ static void do_attach(struct iommu_dev_data *dev_data, domain->dev_cnt += 1; /* Update device table */ - set_dte_entry(dev_data->devid, domain, ats, dev_data->iommu_v2); + amd_iommu_domain_get_pgtable(domain, &pgtable); + set_dte_entry(dev_data->devid, domain, &pgtable, + ats, dev_data->iommu_v2); clone_aliases(dev_data->pdev); device_flush_dte(dev_data); @@ -2249,23 +2320,36 @@ static int amd_iommu_domain_get_attr(struct iommu_domain *domain, * *****************************************************************************/ -static void update_device_table(struct protection_domain *domain) +static void update_device_table(struct protection_domain *domain, + struct domain_pgtable *pgtable) { struct iommu_dev_data *dev_data; list_for_each_entry(dev_data, &domain->dev_list, list) { - set_dte_entry(dev_data->devid, domain, dev_data->ats.enabled, - dev_data->iommu_v2); + set_dte_entry(dev_data->devid, domain, pgtable, + dev_data->ats.enabled, dev_data->iommu_v2); clone_aliases(dev_data->pdev); } } +static void update_and_flush_device_table(struct protection_domain *domain, + struct domain_pgtable *pgtable) +{ + update_device_table(domain, pgtable); + domain_flush_devices(domain); +} + static void update_domain(struct protection_domain *domain) { - update_device_table(domain); + struct domain_pgtable pgtable; - domain_flush_devices(domain); + /* Update device table */ + amd_iommu_domain_get_pgtable(domain, &pgtable); + update_and_flush_device_table(domain, &pgtable); + + /* Flush domain TLB(s) and wait for completion */ domain_flush_tlb_pde(domain); + domain_flush_complete(domain); } int __init amd_iommu_init_api(void) @@ -2375,6 +2459,7 @@ out_err: static struct iommu_domain *amd_iommu_domain_alloc(unsigned type) { struct protection_domain *pdomain; + u64 *pt_root, root; switch (type) { case IOMMU_DOMAIN_UNMANAGED: @@ -2382,13 +2467,15 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type) if (!pdomain) return NULL; - pdomain->mode = PAGE_MODE_3_LEVEL; - pdomain->pt_root = (void *)get_zeroed_page(GFP_KERNEL); - if (!pdomain->pt_root) { + pt_root = (void *)get_zeroed_page(GFP_KERNEL); + if (!pt_root) { protection_domain_free(pdomain); return NULL; } + root = amd_iommu_domain_encode_pgtable(pt_root, PAGE_MODE_3_LEVEL); + atomic64_set(&pdomain->pt_root, root); + pdomain->domain.geometry.aperture_start = 0; pdomain->domain.geometry.aperture_end = ~0ULL; pdomain->domain.geometry.force_aperture = true; @@ -2406,7 +2493,7 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type) if (!pdomain) return NULL; - pdomain->mode = PAGE_MODE_NONE; + atomic64_set(&pdomain->pt_root, PAGE_MODE_NONE); break; default: return NULL; @@ -2418,6 +2505,7 @@ static struct iommu_domain *amd_iommu_domain_alloc(unsigned type) static void amd_iommu_domain_free(struct iommu_domain *dom) { struct protection_domain *domain; + struct domain_pgtable pgtable; domain = to_pdomain(dom); @@ -2435,7 +2523,9 @@ static void amd_iommu_domain_free(struct iommu_domain *dom) dma_ops_domain_free(domain); break; default: - if (domain->mode != PAGE_MODE_NONE) + amd_iommu_domain_get_pgtable(domain, &pgtable); + + if (pgtable.mode != PAGE_MODE_NONE) free_pagetable(domain); if (domain->flags & PD_IOMMUV2_MASK) @@ -2518,10 +2608,12 @@ static int amd_iommu_map(struct iommu_domain *dom, unsigned long iova, gfp_t gfp) { struct protection_domain *domain = to_pdomain(dom); + struct domain_pgtable pgtable; int prot = 0; int ret; - if (domain->mode == PAGE_MODE_NONE) + amd_iommu_domain_get_pgtable(domain, &pgtable); + if (pgtable.mode == PAGE_MODE_NONE) return -EINVAL; if (iommu_prot & IOMMU_READ) @@ -2541,8 +2633,10 @@ static size_t amd_iommu_unmap(struct iommu_domain *dom, unsigned long iova, struct iommu_iotlb_gather *gather) { struct protection_domain *domain = to_pdomain(dom); + struct domain_pgtable pgtable; - if (domain->mode == PAGE_MODE_NONE) + amd_iommu_domain_get_pgtable(domain, &pgtable); + if (pgtable.mode == PAGE_MODE_NONE) return 0; return iommu_unmap_page(domain, iova, page_size); @@ -2553,9 +2647,11 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, { struct protection_domain *domain = to_pdomain(dom); unsigned long offset_mask, pte_pgsize; + struct domain_pgtable pgtable; u64 *pte, __pte; - if (domain->mode == PAGE_MODE_NONE) + amd_iommu_domain_get_pgtable(domain, &pgtable); + if (pgtable.mode == PAGE_MODE_NONE) return iova; pte = fetch_pte(domain, iova, &pte_pgsize); @@ -2708,16 +2804,26 @@ EXPORT_SYMBOL(amd_iommu_unregister_ppr_notifier); void amd_iommu_domain_direct_map(struct iommu_domain *dom) { struct protection_domain *domain = to_pdomain(dom); + struct domain_pgtable pgtable; unsigned long flags; + u64 pt_root; spin_lock_irqsave(&domain->lock, flags); + /* First save pgtable configuration*/ + amd_iommu_domain_get_pgtable(domain, &pgtable); + /* Update data structure */ - domain->mode = PAGE_MODE_NONE; + pt_root = amd_iommu_domain_encode_pgtable(NULL, PAGE_MODE_NONE); + atomic64_set(&domain->pt_root, pt_root); /* Make changes visible to IOMMUs */ update_domain(domain); + /* Restore old pgtable in domain->ptroot to free page-table */ + pt_root = amd_iommu_domain_encode_pgtable(pgtable.root, pgtable.mode); + atomic64_set(&domain->pt_root, pt_root); + /* Page-table is not visible to IOMMU anymore, so free it */ free_pagetable(domain); @@ -2908,9 +3014,11 @@ static u64 *__get_gcr3_pte(u64 *root, int level, int pasid, bool alloc) static int __set_gcr3(struct protection_domain *domain, int pasid, unsigned long cr3) { + struct domain_pgtable pgtable; u64 *pte; - if (domain->mode != PAGE_MODE_NONE) + amd_iommu_domain_get_pgtable(domain, &pgtable); + if (pgtable.mode != PAGE_MODE_NONE) return -EINVAL; pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, true); @@ -2924,9 +3032,11 @@ static int __set_gcr3(struct protection_domain *domain, int pasid, static int __clear_gcr3(struct protection_domain *domain, int pasid) { + struct domain_pgtable pgtable; u64 *pte; - if (domain->mode != PAGE_MODE_NONE) + amd_iommu_domain_get_pgtable(domain, &pgtable); + if (pgtable.mode != PAGE_MODE_NONE) return -EINVAL; pte = __get_gcr3_pte(domain->gcr3_tbl, domain->glx, pasid, false); diff --git a/drivers/iommu/amd_iommu_types.h b/drivers/iommu/amd_iommu_types.h index ca8c4522045b..7a8fdec138bd 100644 --- a/drivers/iommu/amd_iommu_types.h +++ b/drivers/iommu/amd_iommu_types.h @@ -468,8 +468,7 @@ struct protection_domain { iommu core code */ spinlock_t lock; /* mostly used to lock the page table*/ u16 id; /* the domain id written to the device table */ - int mode; /* paging mode (0-6 levels) */ - u64 *pt_root; /* page table root pointer */ + atomic64_t pt_root; /* pgtable root and pgtable mode */ int glx; /* Number of levels for GCR3 table */ u64 *gcr3_tbl; /* Guest CR3 table */ unsigned long flags; /* flags to find out type of domain */ @@ -477,6 +476,12 @@ struct protection_domain { unsigned dev_iommu[MAX_IOMMUS]; /* per-IOMMU reference count */ }; +/* For decocded pt_root */ +struct domain_pgtable { + int mode; + u64 *root; +}; + /* * Structure where we save information about one hardware AMD IOMMU in the * system. diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index d5cac4f46ca5..4e1d11af23c8 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -453,7 +453,7 @@ static int viommu_add_resv_mem(struct viommu_endpoint *vdev, if (!region) return -ENOMEM; - list_add(&vdev->resv_regions, ®ion->list); + list_add(®ion->list, &vdev->resv_regions); return 0; }