From 21bb9d64c5adc0a87c3736bc9a2d386023b88a5b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 12 Apr 2018 10:46:01 +0200 Subject: [PATCH 01/10] swiotlb: remove a pointless comment This comments describes an aspect of the map_sg interface that isn't even exploited by swiotlb. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy Reviewed-by: Konrad Rzeszutek Wilk --- kernel/dma/swiotlb.c | 6 ------ 1 file changed, 6 deletions(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 4f8a6dbf0b60..9062b14bc7f4 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -925,12 +925,6 @@ swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, * appropriate dma address and length. They are obtained via * sg_dma_{address,length}(SG). * - * NOTE: An implementation may be able to use a smaller number of - * DMA address/length pairs than there are SG table elements. - * (for example via virtual mapping capabilities) - * The routine returns the number of addr/length pairs actually - * used, at most nents. - * * Device ownership issues as mentioned above for swiotlb_map_page are the * same here. */ From b65125c6acf38388d3342b37c18c3b6cc97eeb75 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 12 Apr 2018 14:49:23 +0200 Subject: [PATCH 02/10] swiotlb: mark is_swiotlb_buffer static Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy Reviewed-by: Konrad Rzeszutek Wilk --- include/linux/swiotlb.h | 1 - kernel/dma/swiotlb.c | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 965be92c33b5..7ef541ce8f34 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -121,7 +121,6 @@ static inline unsigned int swiotlb_max_segment(void) { return 0; } #endif extern void swiotlb_print_info(void); -extern int is_swiotlb_buffer(phys_addr_t paddr); extern void swiotlb_set_max_segment(unsigned int); extern const struct dma_map_ops swiotlb_dma_ops; diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 9062b14bc7f4..26d3af52956f 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -429,7 +429,7 @@ void __init swiotlb_exit(void) max_segment = 0; } -int is_swiotlb_buffer(phys_addr_t paddr) +static int is_swiotlb_buffer(phys_addr_t paddr) { return paddr >= io_tlb_start && paddr < io_tlb_end; } From 8088546832aa2c0d8f99dd56edf6384f8a9b63b3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 12 Apr 2018 10:38:08 +0200 Subject: [PATCH 03/10] swiotlb: do not panic on mapping failures All properly written drivers now have error handling in the dma_map_single / dma_map_page callers. As swiotlb_tbl_map_single already prints a useful warning when running out of swiotlb pool space we can also remove swiotlb_full entirely as it serves no purpose now. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy --- kernel/dma/swiotlb.c | 33 +-------------------------------- 1 file changed, 1 insertion(+), 32 deletions(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 26d3af52956f..69bf305ee5f8 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -761,34 +761,6 @@ static bool swiotlb_free_buffer(struct device *dev, size_t size, return true; } -static void -swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir, - int do_panic) -{ - if (swiotlb_force == SWIOTLB_NO_FORCE) - return; - - /* - * Ran out of IOMMU space for this operation. This is very bad. - * Unfortunately the drivers cannot handle this operation properly. - * unless they check for dma_mapping_error (most don't) - * When the mapping is small enough return a static buffer to limit - * the damage, or panic when the transfer is too big. - */ - dev_err_ratelimited(dev, "DMA: Out of SW-IOMMU space for %zu bytes\n", - size); - - if (size <= io_tlb_overflow || !do_panic) - return; - - if (dir == DMA_BIDIRECTIONAL) - panic("DMA: Random memory could be DMA accessed\n"); - if (dir == DMA_FROM_DEVICE) - panic("DMA: Random memory could be DMA written\n"); - if (dir == DMA_TO_DEVICE) - panic("DMA: Random memory could be DMA read\n"); -} - /* * Map a single buffer of the indicated size for DMA in streaming mode. The * physical address to use is returned. @@ -817,10 +789,8 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, /* Oh well, have to allocate and map a bounce buffer. */ map = map_single(dev, phys, size, dir, attrs); - if (map == SWIOTLB_MAP_ERROR) { - swiotlb_full(dev, size, dir, 1); + if (map == SWIOTLB_MAP_ERROR) return __phys_to_dma(dev, io_tlb_overflow_buffer); - } dev_addr = __phys_to_dma(dev, map); @@ -948,7 +918,6 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, if (map == SWIOTLB_MAP_ERROR) { /* Don't panic here, we expect map_sg users to do proper error handling. */ - swiotlb_full(hwdev, sg->length, dir, 0); attrs |= DMA_ATTR_SKIP_CPU_SYNC; swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, attrs); From dff8d6c1ed584de65aac40494d3e7468c50980c3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 16 Aug 2018 15:30:39 +0300 Subject: [PATCH 04/10] swiotlb: remove the overflow buffer Like all other dma mapping drivers just return an error code instead of an actual memory buffer. The reason for the overflow buffer was that at the time swiotlb was invented there was no way to check for dma mapping errors, but this has long been fixed. Signed-off-by: Christoph Hellwig Acked-by: Catalin Marinas Reviewed-by: Robin Murphy Reviewed-by: Konrad Rzeszutek Wilk --- arch/arm64/mm/dma-mapping.c | 2 +- arch/powerpc/kernel/dma-swiotlb.c | 4 +-- include/linux/dma-direct.h | 2 ++ include/linux/swiotlb.h | 3 -- kernel/dma/direct.c | 2 -- kernel/dma/swiotlb.c | 59 ++----------------------------- 6 files changed, 8 insertions(+), 64 deletions(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 072c51fb07d7..8d91b927e09e 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -324,7 +324,7 @@ static int __swiotlb_dma_supported(struct device *hwdev, u64 mask) static int __swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t addr) { if (swiotlb) - return swiotlb_dma_mapping_error(hwdev, addr); + return dma_direct_mapping_error(hwdev, addr); return 0; } diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index 88f3963ca30f..5fc335f4d9cd 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -11,7 +11,7 @@ * */ -#include +#include #include #include #include @@ -59,7 +59,7 @@ const struct dma_map_ops powerpc_swiotlb_dma_ops = { .sync_single_for_device = swiotlb_sync_single_for_device, .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, .sync_sg_for_device = swiotlb_sync_sg_for_device, - .mapping_error = swiotlb_dma_mapping_error, + .mapping_error = dma_direct_mapping_error, .get_required_mask = swiotlb_powerpc_get_required, }; diff --git a/include/linux/dma-direct.h b/include/linux/dma-direct.h index fbca184ff5a0..bd73e7a91410 100644 --- a/include/linux/dma-direct.h +++ b/include/linux/dma-direct.h @@ -5,6 +5,8 @@ #include #include +#define DIRECT_MAPPING_ERROR 0 + #ifdef CONFIG_ARCH_HAS_PHYS_TO_DMA #include #else diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index 7ef541ce8f34..f847c1b265c4 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -106,9 +106,6 @@ extern void swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, int nelems, enum dma_data_direction dir); -extern int -swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr); - extern int swiotlb_dma_supported(struct device *hwdev, u64 mask); diff --git a/kernel/dma/direct.c b/kernel/dma/direct.c index 87a6bc2a96c0..f14c376937e5 100644 --- a/kernel/dma/direct.c +++ b/kernel/dma/direct.c @@ -14,8 +14,6 @@ #include #include -#define DIRECT_MAPPING_ERROR 0 - /* * Most architectures use ZONE_DMA for the first 16 Megabytes, but * some use it for entirely different regions: diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 69bf305ee5f8..11dbcd80b4a6 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -72,13 +72,6 @@ static phys_addr_t io_tlb_start, io_tlb_end; */ static unsigned long io_tlb_nslabs; -/* - * When the IOMMU overflows we return a fallback buffer. This sets the size. - */ -static unsigned long io_tlb_overflow = 32*1024; - -static phys_addr_t io_tlb_overflow_buffer; - /* * This is a free list describing the number of free entries available from * each index @@ -126,7 +119,6 @@ setup_io_tlb_npages(char *str) return 0; } early_param("swiotlb", setup_io_tlb_npages); -/* make io_tlb_overflow tunable too? */ unsigned long swiotlb_nr_tbl(void) { @@ -194,16 +186,10 @@ void __init swiotlb_update_mem_attributes(void) bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT); set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); memset(vaddr, 0, bytes); - - vaddr = phys_to_virt(io_tlb_overflow_buffer); - bytes = PAGE_ALIGN(io_tlb_overflow); - set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); - memset(vaddr, 0, bytes); } int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) { - void *v_overflow_buffer; unsigned long i, bytes; bytes = nslabs << IO_TLB_SHIFT; @@ -212,17 +198,6 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) io_tlb_start = __pa(tlb); io_tlb_end = io_tlb_start + bytes; - /* - * Get the overflow emergency buffer - */ - v_overflow_buffer = memblock_virt_alloc_low_nopanic( - PAGE_ALIGN(io_tlb_overflow), - PAGE_SIZE); - if (!v_overflow_buffer) - return -ENOMEM; - - io_tlb_overflow_buffer = __pa(v_overflow_buffer); - /* * Allocate and initialize the free list array. This array is used * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE @@ -330,7 +305,6 @@ int swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) { unsigned long i, bytes; - unsigned char *v_overflow_buffer; bytes = nslabs << IO_TLB_SHIFT; @@ -341,19 +315,6 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT); memset(tlb, 0, bytes); - /* - * Get the overflow emergency buffer - */ - v_overflow_buffer = (void *)__get_free_pages(GFP_DMA, - get_order(io_tlb_overflow)); - if (!v_overflow_buffer) - goto cleanup2; - - set_memory_decrypted((unsigned long)v_overflow_buffer, - io_tlb_overflow >> PAGE_SHIFT); - memset(v_overflow_buffer, 0, io_tlb_overflow); - io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer); - /* * Allocate and initialize the free list array. This array is used * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE @@ -390,10 +351,6 @@ cleanup4: sizeof(int))); io_tlb_list = NULL; cleanup3: - free_pages((unsigned long)v_overflow_buffer, - get_order(io_tlb_overflow)); - io_tlb_overflow_buffer = 0; -cleanup2: io_tlb_end = 0; io_tlb_start = 0; io_tlb_nslabs = 0; @@ -407,8 +364,6 @@ void __init swiotlb_exit(void) return; if (late_alloc) { - free_pages((unsigned long)phys_to_virt(io_tlb_overflow_buffer), - get_order(io_tlb_overflow)); free_pages((unsigned long)io_tlb_orig_addr, get_order(io_tlb_nslabs * sizeof(phys_addr_t))); free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * @@ -416,8 +371,6 @@ void __init swiotlb_exit(void) free_pages((unsigned long)phys_to_virt(io_tlb_start), get_order(io_tlb_nslabs << IO_TLB_SHIFT)); } else { - memblock_free_late(io_tlb_overflow_buffer, - PAGE_ALIGN(io_tlb_overflow)); memblock_free_late(__pa(io_tlb_orig_addr), PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); memblock_free_late(__pa(io_tlb_list), @@ -790,7 +743,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, /* Oh well, have to allocate and map a bounce buffer. */ map = map_single(dev, phys, size, dir, attrs); if (map == SWIOTLB_MAP_ERROR) - return __phys_to_dma(dev, io_tlb_overflow_buffer); + return DIRECT_MAPPING_ERROR; dev_addr = __phys_to_dma(dev, map); @@ -801,7 +754,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, attrs |= DMA_ATTR_SKIP_CPU_SYNC; swiotlb_tbl_unmap_single(dev, map, size, dir, attrs); - return __phys_to_dma(dev, io_tlb_overflow_buffer); + return DIRECT_MAPPING_ERROR; } /* @@ -985,12 +938,6 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE); } -int -swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) -{ - return (dma_addr == __phys_to_dma(hwdev, io_tlb_overflow_buffer)); -} - /* * Return whether the given device DMA address mask can be supported * properly. For example, if your device can only drive the low 24-bits @@ -1033,7 +980,7 @@ void swiotlb_free(struct device *dev, size_t size, void *vaddr, } const struct dma_map_ops swiotlb_dma_ops = { - .mapping_error = swiotlb_dma_mapping_error, + .mapping_error = dma_direct_mapping_error, .alloc = swiotlb_alloc, .free = swiotlb_free, .sync_single_for_cpu = swiotlb_sync_single_for_cpu, From 27744e0077f4c8b40aaa3126256708c21e56655d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 12 Apr 2018 09:56:56 +0200 Subject: [PATCH 05/10] swiotlb: merge swiotlb_unmap_page and unmap_single Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy Reviewed-by: Konrad Rzeszutek Wilk --- kernel/dma/swiotlb.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 11dbcd80b4a6..15335f3a1bf3 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -765,9 +765,9 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, * After this call, reads by the cpu to the buffer are guaranteed to see * whatever the device wrote there. */ -static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) +void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, + size_t size, enum dma_data_direction dir, + unsigned long attrs) { phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); @@ -790,13 +790,6 @@ static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, dma_mark_clean(phys_to_virt(paddr), size); } -void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - unmap_single(hwdev, dev_addr, size, dir, attrs); -} - /* * Make physical memory consistent for a single streaming mode DMA translation * after a transfer. @@ -900,7 +893,7 @@ swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, BUG_ON(dir == DMA_NONE); for_each_sg(sgl, sg, nelems, i) - unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir, + swiotlb_unmap_page(hwdev, sg->dma_address, sg_dma_len(sg), dir, attrs); } From 4803b44e68fc08e76f00dec90074d199a11ad6f5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 20 Aug 2018 15:56:05 +0200 Subject: [PATCH 06/10] swiotlb: use swiotlb_map_page in swiotlb_map_sg_attrs No need to duplicate the code - map_sg is equivalent to map_page for each page in the scatterlist. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy Reviewed-by: Konrad Rzeszutek Wilk --- kernel/dma/swiotlb.c | 34 ++++++++++++---------------------- 1 file changed, 12 insertions(+), 22 deletions(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 15335f3a1bf3..15755d7a5242 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -845,37 +845,27 @@ swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, * same here. */ int -swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, +swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir, unsigned long attrs) { struct scatterlist *sg; int i; - BUG_ON(dir == DMA_NONE); - for_each_sg(sgl, sg, nelems, i) { - phys_addr_t paddr = sg_phys(sg); - dma_addr_t dev_addr = phys_to_dma(hwdev, paddr); - - if (swiotlb_force == SWIOTLB_FORCE || - !dma_capable(hwdev, dev_addr, sg->length)) { - phys_addr_t map = map_single(hwdev, sg_phys(sg), - sg->length, dir, attrs); - if (map == SWIOTLB_MAP_ERROR) { - /* Don't panic here, we expect map_sg users - to do proper error handling. */ - attrs |= DMA_ATTR_SKIP_CPU_SYNC; - swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, - attrs); - sg_dma_len(sgl) = 0; - return 0; - } - sg->dma_address = __phys_to_dma(hwdev, map); - } else - sg->dma_address = dev_addr; + sg->dma_address = swiotlb_map_page(dev, sg_page(sg), sg->offset, + sg->length, dir, attrs); + if (sg->dma_address == DIRECT_MAPPING_ERROR) + goto out_error; sg_dma_len(sg) = sg->length; } + return nelems; + +out_error: + swiotlb_unmap_sg_attrs(dev, sgl, i, dir, + attrs | DMA_ATTR_SKIP_CPU_SYNC); + sg_dma_len(sgl) = 0; + return 0; } /* From c4dae366925f929749b2a26efa53b561904a9a4f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 20 Aug 2018 16:21:10 +0200 Subject: [PATCH 07/10] swiotlb: refactor swiotlb_map_page Remove the somewhat useless map_single function, and replace it with a swiotlb_bounce_page handler that handles everything related to actually bouncing a page. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy Reviewed-by: Konrad Rzeszutek Wilk --- kernel/dma/swiotlb.c | 67 ++++++++++++++++++++------------------------ 1 file changed, 30 insertions(+), 37 deletions(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 15755d7a5242..57507b18caa4 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -543,26 +543,6 @@ found: return tlb_addr; } -/* - * Allocates bounce buffer and returns its physical address. - */ -static phys_addr_t -map_single(struct device *hwdev, phys_addr_t phys, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - dma_addr_t start_dma_addr; - - if (swiotlb_force == SWIOTLB_NO_FORCE) { - dev_warn_ratelimited(hwdev, "Cannot do DMA to address %pa\n", - &phys); - return SWIOTLB_MAP_ERROR; - } - - start_dma_addr = __phys_to_dma(hwdev, io_tlb_start); - return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, - dir, attrs); -} - /* * tlb_addr is the physical address of the bounce buffer to unmap. */ @@ -714,6 +694,34 @@ static bool swiotlb_free_buffer(struct device *dev, size_t size, return true; } +static dma_addr_t swiotlb_bounce_page(struct device *dev, phys_addr_t *phys, + size_t size, enum dma_data_direction dir, unsigned long attrs) +{ + dma_addr_t dma_addr; + + if (unlikely(swiotlb_force == SWIOTLB_NO_FORCE)) { + dev_warn_ratelimited(dev, + "Cannot do DMA to address %pa\n", phys); + return DIRECT_MAPPING_ERROR; + } + + /* Oh well, have to allocate and map a bounce buffer. */ + *phys = swiotlb_tbl_map_single(dev, __phys_to_dma(dev, io_tlb_start), + *phys, size, dir, attrs); + if (*phys == SWIOTLB_MAP_ERROR) + return DIRECT_MAPPING_ERROR; + + /* Ensure that the address returned is DMA'ble */ + dma_addr = __phys_to_dma(dev, *phys); + if (unlikely(!dma_capable(dev, dma_addr, size))) { + swiotlb_tbl_unmap_single(dev, *phys, size, dir, + attrs | DMA_ATTR_SKIP_CPU_SYNC); + return DIRECT_MAPPING_ERROR; + } + + return dma_addr; +} + /* * Map a single buffer of the indicated size for DMA in streaming mode. The * physical address to use is returned. @@ -726,7 +734,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, enum dma_data_direction dir, unsigned long attrs) { - phys_addr_t map, phys = page_to_phys(page) + offset; + phys_addr_t phys = page_to_phys(page) + offset; dma_addr_t dev_addr = phys_to_dma(dev, phys); BUG_ON(dir == DMA_NONE); @@ -739,22 +747,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, return dev_addr; trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force); - - /* Oh well, have to allocate and map a bounce buffer. */ - map = map_single(dev, phys, size, dir, attrs); - if (map == SWIOTLB_MAP_ERROR) - return DIRECT_MAPPING_ERROR; - - dev_addr = __phys_to_dma(dev, map); - - /* Ensure that the address returned is DMA'ble */ - if (dma_capable(dev, dev_addr, size)) - return dev_addr; - - attrs |= DMA_ATTR_SKIP_CPU_SYNC; - swiotlb_tbl_unmap_single(dev, map, size, dir, attrs); - - return DIRECT_MAPPING_ERROR; + return swiotlb_bounce_page(dev, &phys, size, dir, attrs); } /* From fafadcd16595c1df82df399f62421718ec9bf70a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 30 Sep 2018 16:13:33 -0700 Subject: [PATCH 08/10] swiotlb: don't dip into swiotlb pool for coherent allocations All architectures that support swiotlb also have a zone that backs up these less than full addressing allocations (usually ZONE_DMA32). Because of that it is rather pointless to fall back to the global swiotlb buffer if the normal dma direct allocation failed - the only thing this will do is to eat up bounce buffers that would be more useful to serve streaming mappings. Signed-off-by: Christoph Hellwig Acked-by: Catalin Marinas Acked-by: Konrad Rzeszutek Wilk --- arch/arm64/mm/dma-mapping.c | 6 +-- include/linux/swiotlb.h | 5 -- kernel/dma/swiotlb.c | 105 +----------------------------------- 3 files changed, 5 insertions(+), 111 deletions(-) diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 8d91b927e09e..eee6cfcfde9e 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -112,7 +112,7 @@ static void *__dma_alloc(struct device *dev, size_t size, return addr; } - ptr = swiotlb_alloc(dev, size, dma_handle, flags, attrs); + ptr = dma_direct_alloc_pages(dev, size, dma_handle, flags, attrs); if (!ptr) goto no_mem; @@ -133,7 +133,7 @@ static void *__dma_alloc(struct device *dev, size_t size, return coherent_ptr; no_map: - swiotlb_free(dev, size, ptr, *dma_handle, attrs); + dma_direct_free_pages(dev, size, ptr, *dma_handle, attrs); no_mem: return NULL; } @@ -151,7 +151,7 @@ static void __dma_free(struct device *dev, size_t size, return; vunmap(vaddr); } - swiotlb_free(dev, size, swiotlb_addr, dma_handle, attrs); + dma_direct_free_pages(dev, size, swiotlb_addr, dma_handle, attrs); } static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page, diff --git a/include/linux/swiotlb.h b/include/linux/swiotlb.h index f847c1b265c4..a387b59640a4 100644 --- a/include/linux/swiotlb.h +++ b/include/linux/swiotlb.h @@ -67,11 +67,6 @@ extern void swiotlb_tbl_sync_single(struct device *hwdev, /* Accessory functions. */ -void *swiotlb_alloc(struct device *hwdev, size_t size, dma_addr_t *dma_handle, - gfp_t flags, unsigned long attrs); -void swiotlb_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_addr, unsigned long attrs); - extern dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, unsigned long offset, size_t size, enum dma_data_direction dir, diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 57507b18caa4..1a01b0ac0a5e 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -622,78 +622,6 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr, } } -static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr, - size_t size) -{ - u64 mask = DMA_BIT_MASK(32); - - if (dev && dev->coherent_dma_mask) - mask = dev->coherent_dma_mask; - return addr + size - 1 <= mask; -} - -static void * -swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle, - unsigned long attrs) -{ - phys_addr_t phys_addr; - - if (swiotlb_force == SWIOTLB_NO_FORCE) - goto out_warn; - - phys_addr = swiotlb_tbl_map_single(dev, - __phys_to_dma(dev, io_tlb_start), - 0, size, DMA_FROM_DEVICE, attrs); - if (phys_addr == SWIOTLB_MAP_ERROR) - goto out_warn; - - *dma_handle = __phys_to_dma(dev, phys_addr); - if (!dma_coherent_ok(dev, *dma_handle, size)) - goto out_unmap; - - memset(phys_to_virt(phys_addr), 0, size); - return phys_to_virt(phys_addr); - -out_unmap: - dev_warn(dev, "hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n", - (unsigned long long)dev->coherent_dma_mask, - (unsigned long long)*dma_handle); - - /* - * DMA_TO_DEVICE to avoid memcpy in unmap_single. - * DMA_ATTR_SKIP_CPU_SYNC is optional. - */ - swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC); -out_warn: - if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) { - dev_warn(dev, - "swiotlb: coherent allocation failed, size=%zu\n", - size); - dump_stack(); - } - return NULL; -} - -static bool swiotlb_free_buffer(struct device *dev, size_t size, - dma_addr_t dma_addr) -{ - phys_addr_t phys_addr = dma_to_phys(dev, dma_addr); - - WARN_ON_ONCE(irqs_disabled()); - - if (!is_swiotlb_buffer(phys_addr)) - return false; - - /* - * DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single. - * DMA_ATTR_SKIP_CPU_SYNC is optional. - */ - swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC); - return true; -} - static dma_addr_t swiotlb_bounce_page(struct device *dev, phys_addr_t *phys, size_t size, enum dma_data_direction dir, unsigned long attrs) { @@ -926,39 +854,10 @@ swiotlb_dma_supported(struct device *hwdev, u64 mask) return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask; } -void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp, unsigned long attrs) -{ - void *vaddr; - - /* temporary workaround: */ - if (gfp & __GFP_NOWARN) - attrs |= DMA_ATTR_NO_WARN; - - /* - * Don't print a warning when the first allocation attempt fails. - * swiotlb_alloc_coherent() will print a warning when the DMA memory - * allocation ultimately failed. - */ - gfp |= __GFP_NOWARN; - - vaddr = dma_direct_alloc(dev, size, dma_handle, gfp, attrs); - if (!vaddr) - vaddr = swiotlb_alloc_buffer(dev, size, dma_handle, attrs); - return vaddr; -} - -void swiotlb_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_addr, unsigned long attrs) -{ - if (!swiotlb_free_buffer(dev, size, dma_addr)) - dma_direct_free(dev, size, vaddr, dma_addr, attrs); -} - const struct dma_map_ops swiotlb_dma_ops = { .mapping_error = dma_direct_mapping_error, - .alloc = swiotlb_alloc, - .free = swiotlb_free, + .alloc = dma_direct_alloc, + .free = dma_direct_free, .sync_single_for_cpu = swiotlb_sync_single_for_cpu, .sync_single_for_device = swiotlb_sync_single_for_device, .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, From a4a4330db46a17289cf2ca5f9fb153d536267b97 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 19 Oct 2018 08:51:53 +0200 Subject: [PATCH 09/10] swiotlb: add support for non-coherent DMA Handle architectures that are not cache coherent directly in the main swiotlb code by calling arch_sync_dma_for_{device,cpu} in all the right places from the various dma_map/unmap/sync methods when the device is non-coherent. Because swiotlb now uses dma_direct_alloc for the coherent allocation that side is already taken care of by the dma-direct code calling into arch_dma_{alloc,free} for devices that are non-coherent. Signed-off-by: Christoph Hellwig Acked-by: Konrad Rzeszutek Wilk --- kernel/dma/swiotlb.c | 33 +++++++++++++++++++++++---------- 1 file changed, 23 insertions(+), 10 deletions(-) diff --git a/kernel/dma/swiotlb.c b/kernel/dma/swiotlb.c index 1a01b0ac0a5e..ebecaf255ea2 100644 --- a/kernel/dma/swiotlb.c +++ b/kernel/dma/swiotlb.c @@ -21,6 +21,7 @@ #include #include +#include #include #include #include @@ -671,11 +672,17 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, * we can safely return the device addr and not worry about bounce * buffering it. */ - if (dma_capable(dev, dev_addr, size) && swiotlb_force != SWIOTLB_FORCE) - return dev_addr; + if (!dma_capable(dev, dev_addr, size) || + swiotlb_force == SWIOTLB_FORCE) { + trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force); + dev_addr = swiotlb_bounce_page(dev, &phys, size, dir, attrs); + } - trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force); - return swiotlb_bounce_page(dev, &phys, size, dir, attrs); + if (!dev_is_dma_coherent(dev) && + (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) + arch_sync_dma_for_device(dev, phys, size, dir); + + return dev_addr; } /* @@ -694,6 +701,10 @@ void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, BUG_ON(dir == DMA_NONE); + if (!dev_is_dma_coherent(hwdev) && + (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) + arch_sync_dma_for_cpu(hwdev, paddr, size, dir); + if (is_swiotlb_buffer(paddr)) { swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs); return; @@ -730,15 +741,17 @@ swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, BUG_ON(dir == DMA_NONE); - if (is_swiotlb_buffer(paddr)) { + if (!dev_is_dma_coherent(hwdev) && target == SYNC_FOR_CPU) + arch_sync_dma_for_cpu(hwdev, paddr, size, dir); + + if (is_swiotlb_buffer(paddr)) swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target); - return; - } - if (dir != DMA_FROM_DEVICE) - return; + if (!dev_is_dma_coherent(hwdev) && target == SYNC_FOR_DEVICE) + arch_sync_dma_for_device(hwdev, paddr, size, dir); - dma_mark_clean(phys_to_virt(paddr), size); + if (!is_swiotlb_buffer(paddr) && dir == DMA_FROM_DEVICE) + dma_mark_clean(phys_to_virt(paddr), size); } void From 886643b766321f15f63bd950be618cbb6dd22bbc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 8 Oct 2018 09:12:01 +0200 Subject: [PATCH 10/10] arm64: use the generic swiotlb_dma_ops Now that the generic swiotlb code supports non-coherent DMA we can switch to it for arm64. For that we need to refactor the existing alloc/free/mmap/pgprot helpers to be used as the architecture hooks, and implement the standard arch_sync_dma_for_{device,cpu} hooks for cache maintaincance in the streaming dma hooks, which also implies using the generic dma_coherent flag in struct device. Note that we need to keep the old is_device_dma_coherent function around for now, so that the shared arm/arm64 Xen code keeps working. Signed-off-by: Christoph Hellwig Acked-by: Catalin Marinas --- arch/arm64/Kconfig | 4 + arch/arm64/include/asm/device.h | 1 - arch/arm64/include/asm/dma-mapping.h | 7 +- arch/arm64/mm/dma-mapping.c | 263 ++++++--------------------- 4 files changed, 60 insertions(+), 215 deletions(-) diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 1b1a0e95c751..c4db5131d837 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -11,6 +11,8 @@ config ARM64 select ARCH_CLOCKSOURCE_DATA select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEVMEM_IS_ALLOWED + select ARCH_HAS_DMA_COHERENT_TO_PFN + select ARCH_HAS_DMA_MMAP_PGPROT select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FAST_MULTIPLIER @@ -24,6 +26,8 @@ config ARM64 select ARCH_HAS_SG_CHAIN select ARCH_HAS_STRICT_KERNEL_RWX select ARCH_HAS_STRICT_MODULE_RWX + select ARCH_HAS_SYNC_DMA_FOR_DEVICE + select ARCH_HAS_SYNC_DMA_FOR_CPU select ARCH_HAS_SYSCALL_WRAPPER select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAVE_NMI_SAFE_CMPXCHG diff --git a/arch/arm64/include/asm/device.h b/arch/arm64/include/asm/device.h index 5a5fa47a6b18..3dd3d664c5c5 100644 --- a/arch/arm64/include/asm/device.h +++ b/arch/arm64/include/asm/device.h @@ -23,7 +23,6 @@ struct dev_archdata { #ifdef CONFIG_XEN const struct dma_map_ops *dev_dma_ops; #endif - bool dma_coherent; }; struct pdev_archdata { diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index b7847eb8a7bb..c41f3fb1446c 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -44,10 +44,13 @@ void arch_teardown_dma_ops(struct device *dev); #define arch_teardown_dma_ops arch_teardown_dma_ops #endif -/* do not use this function in a driver */ +/* + * Do not use this function in a driver, it is only provided for + * arch/arm/mm/xen.c, which is used by arm64 as well. + */ static inline bool is_device_dma_coherent(struct device *dev) { - return dev->archdata.dma_coherent; + return dev->dma_coherent; } #endif /* __KERNEL__ */ diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index eee6cfcfde9e..5a5d7990e980 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include @@ -32,16 +33,6 @@ #include -static int swiotlb __ro_after_init; - -static pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot, - bool coherent) -{ - if (!coherent || (attrs & DMA_ATTR_WRITE_COMBINE)) - return pgprot_writecombine(prot); - return prot; -} - static struct gen_pool *atomic_pool __ro_after_init; #define DEFAULT_DMA_COHERENT_POOL_SIZE SZ_256K @@ -91,18 +82,16 @@ static int __free_from_pool(void *start, size_t size) return 1; } -static void *__dma_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t flags, - unsigned long attrs) +void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, + gfp_t flags, unsigned long attrs) { struct page *page; void *ptr, *coherent_ptr; - bool coherent = is_device_dma_coherent(dev); - pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, false); + pgprot_t prot = pgprot_writecombine(PAGE_KERNEL); size = PAGE_ALIGN(size); - if (!coherent && !gfpflags_allow_blocking(flags)) { + if (!gfpflags_allow_blocking(flags)) { struct page *page = NULL; void *addr = __alloc_from_pool(size, &page, flags); @@ -116,10 +105,6 @@ static void *__dma_alloc(struct device *dev, size_t size, if (!ptr) goto no_mem; - /* no need for non-cacheable mapping if coherent */ - if (coherent) - return ptr; - /* remove any dirty cache lines on the kernel alias */ __dma_flush_area(ptr, size); @@ -138,127 +123,54 @@ no_mem: return NULL; } -static void __dma_free(struct device *dev, size_t size, - void *vaddr, dma_addr_t dma_handle, - unsigned long attrs) +void arch_dma_free(struct device *dev, size_t size, void *vaddr, + dma_addr_t dma_handle, unsigned long attrs) { - void *swiotlb_addr = phys_to_virt(dma_to_phys(dev, dma_handle)); + if (!__free_from_pool(vaddr, PAGE_ALIGN(size))) { + void *kaddr = phys_to_virt(dma_to_phys(dev, dma_handle)); - size = PAGE_ALIGN(size); - - if (!is_device_dma_coherent(dev)) { - if (__free_from_pool(vaddr, size)) - return; vunmap(vaddr); + dma_direct_free_pages(dev, size, kaddr, dma_handle, attrs); } - dma_direct_free_pages(dev, size, swiotlb_addr, dma_handle, attrs); } -static dma_addr_t __swiotlb_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) +long arch_dma_coherent_to_pfn(struct device *dev, void *cpu_addr, + dma_addr_t dma_addr) { - dma_addr_t dev_addr; - - dev_addr = swiotlb_map_page(dev, page, offset, size, dir, attrs); - if (!is_device_dma_coherent(dev) && - (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) - __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); - - return dev_addr; + return __phys_to_pfn(dma_to_phys(dev, dma_addr)); } - -static void __swiotlb_unmap_page(struct device *dev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) +pgprot_t arch_dma_mmap_pgprot(struct device *dev, pgprot_t prot, + unsigned long attrs) { - if (!is_device_dma_coherent(dev) && - (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) - __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); - swiotlb_unmap_page(dev, dev_addr, size, dir, attrs); + if (!dev_is_dma_coherent(dev) || (attrs & DMA_ATTR_WRITE_COMBINE)) + return pgprot_writecombine(prot); + return prot; } -static int __swiotlb_map_sg_attrs(struct device *dev, struct scatterlist *sgl, - int nelems, enum dma_data_direction dir, - unsigned long attrs) +void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) { - struct scatterlist *sg; - int i, ret; + __dma_map_area(phys_to_virt(paddr), size, dir); +} - ret = swiotlb_map_sg_attrs(dev, sgl, nelems, dir, attrs); - if (!is_device_dma_coherent(dev) && - (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) - for_each_sg(sgl, sg, ret, i) - __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), - sg->length, dir); +void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, + size_t size, enum dma_data_direction dir) +{ + __dma_unmap_area(phys_to_virt(paddr), size, dir); +} + +static int __swiotlb_get_sgtable_page(struct sg_table *sgt, + struct page *page, size_t size) +{ + int ret = sg_alloc_table(sgt, 1, GFP_KERNEL); + + if (!ret) + sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); return ret; } -static void __swiotlb_unmap_sg_attrs(struct device *dev, - struct scatterlist *sgl, int nelems, - enum dma_data_direction dir, - unsigned long attrs) -{ - struct scatterlist *sg; - int i; - - if (!is_device_dma_coherent(dev) && - (attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) - for_each_sg(sgl, sg, nelems, i) - __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), - sg->length, dir); - swiotlb_unmap_sg_attrs(dev, sgl, nelems, dir, attrs); -} - -static void __swiotlb_sync_single_for_cpu(struct device *dev, - dma_addr_t dev_addr, size_t size, - enum dma_data_direction dir) -{ - if (!is_device_dma_coherent(dev)) - __dma_unmap_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); - swiotlb_sync_single_for_cpu(dev, dev_addr, size, dir); -} - -static void __swiotlb_sync_single_for_device(struct device *dev, - dma_addr_t dev_addr, size_t size, - enum dma_data_direction dir) -{ - swiotlb_sync_single_for_device(dev, dev_addr, size, dir); - if (!is_device_dma_coherent(dev)) - __dma_map_area(phys_to_virt(dma_to_phys(dev, dev_addr)), size, dir); -} - -static void __swiotlb_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nelems, - enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - if (!is_device_dma_coherent(dev)) - for_each_sg(sgl, sg, nelems, i) - __dma_unmap_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), - sg->length, dir); - swiotlb_sync_sg_for_cpu(dev, sgl, nelems, dir); -} - -static void __swiotlb_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nelems, - enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - swiotlb_sync_sg_for_device(dev, sgl, nelems, dir); - if (!is_device_dma_coherent(dev)) - for_each_sg(sgl, sg, nelems, i) - __dma_map_area(phys_to_virt(dma_to_phys(dev, sg->dma_address)), - sg->length, dir); -} - static int __swiotlb_mmap_pfn(struct vm_area_struct *vma, unsigned long pfn, size_t size) { @@ -277,74 +189,6 @@ static int __swiotlb_mmap_pfn(struct vm_area_struct *vma, return ret; } -static int __swiotlb_mmap(struct device *dev, - struct vm_area_struct *vma, - void *cpu_addr, dma_addr_t dma_addr, size_t size, - unsigned long attrs) -{ - int ret; - unsigned long pfn = dma_to_phys(dev, dma_addr) >> PAGE_SHIFT; - - vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, - is_device_dma_coherent(dev)); - - if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) - return ret; - - return __swiotlb_mmap_pfn(vma, pfn, size); -} - -static int __swiotlb_get_sgtable_page(struct sg_table *sgt, - struct page *page, size_t size) -{ - int ret = sg_alloc_table(sgt, 1, GFP_KERNEL); - - if (!ret) - sg_set_page(sgt->sgl, page, PAGE_ALIGN(size), 0); - - return ret; -} - -static int __swiotlb_get_sgtable(struct device *dev, struct sg_table *sgt, - void *cpu_addr, dma_addr_t handle, size_t size, - unsigned long attrs) -{ - struct page *page = phys_to_page(dma_to_phys(dev, handle)); - - return __swiotlb_get_sgtable_page(sgt, page, size); -} - -static int __swiotlb_dma_supported(struct device *hwdev, u64 mask) -{ - if (swiotlb) - return swiotlb_dma_supported(hwdev, mask); - return 1; -} - -static int __swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t addr) -{ - if (swiotlb) - return dma_direct_mapping_error(hwdev, addr); - return 0; -} - -static const struct dma_map_ops arm64_swiotlb_dma_ops = { - .alloc = __dma_alloc, - .free = __dma_free, - .mmap = __swiotlb_mmap, - .get_sgtable = __swiotlb_get_sgtable, - .map_page = __swiotlb_map_page, - .unmap_page = __swiotlb_unmap_page, - .map_sg = __swiotlb_map_sg_attrs, - .unmap_sg = __swiotlb_unmap_sg_attrs, - .sync_single_for_cpu = __swiotlb_sync_single_for_cpu, - .sync_single_for_device = __swiotlb_sync_single_for_device, - .sync_sg_for_cpu = __swiotlb_sync_sg_for_cpu, - .sync_sg_for_device = __swiotlb_sync_sg_for_device, - .dma_supported = __swiotlb_dma_supported, - .mapping_error = __swiotlb_dma_mapping_error, -}; - static int __init atomic_pool_init(void) { pgprot_t prot = __pgprot(PROT_NORMAL_NC); @@ -500,10 +344,6 @@ EXPORT_SYMBOL(dummy_dma_ops); static int __init arm64_dma_init(void) { - if (swiotlb_force == SWIOTLB_FORCE || - max_pfn > (arm64_dma_phys_limit >> PAGE_SHIFT)) - swiotlb = 1; - WARN_TAINT(ARCH_DMA_MINALIGN < cache_line_size(), TAINT_CPU_OUT_OF_SPEC, "ARCH_DMA_MINALIGN smaller than CTR_EL0.CWG (%d < %d)", @@ -528,7 +368,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, dma_addr_t *handle, gfp_t gfp, unsigned long attrs) { - bool coherent = is_device_dma_coherent(dev); + bool coherent = dev_is_dma_coherent(dev); int ioprot = dma_info_to_prot(DMA_BIDIRECTIONAL, coherent, attrs); size_t iosize = size; void *addr; @@ -569,7 +409,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, addr = NULL; } } else if (attrs & DMA_ATTR_FORCE_CONTIGUOUS) { - pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); + pgprot_t prot = arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs); struct page *page; page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, @@ -596,7 +436,7 @@ static void *__iommu_alloc_attrs(struct device *dev, size_t size, size >> PAGE_SHIFT); } } else { - pgprot_t prot = __get_dma_pgprot(attrs, PAGE_KERNEL, coherent); + pgprot_t prot = arch_dma_mmap_pgprot(dev, PAGE_KERNEL, attrs); struct page **pages; pages = iommu_dma_alloc(dev, iosize, gfp, attrs, ioprot, @@ -658,8 +498,7 @@ static int __iommu_mmap_attrs(struct device *dev, struct vm_area_struct *vma, struct vm_struct *area; int ret; - vma->vm_page_prot = __get_dma_pgprot(attrs, vma->vm_page_prot, - is_device_dma_coherent(dev)); + vma->vm_page_prot = arch_dma_mmap_pgprot(dev, vma->vm_page_prot, attrs); if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) return ret; @@ -709,11 +548,11 @@ static void __iommu_sync_single_for_cpu(struct device *dev, { phys_addr_t phys; - if (is_device_dma_coherent(dev)) + if (dev_is_dma_coherent(dev)) return; phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr); - __dma_unmap_area(phys_to_virt(phys), size, dir); + arch_sync_dma_for_cpu(dev, phys, size, dir); } static void __iommu_sync_single_for_device(struct device *dev, @@ -722,11 +561,11 @@ static void __iommu_sync_single_for_device(struct device *dev, { phys_addr_t phys; - if (is_device_dma_coherent(dev)) + if (dev_is_dma_coherent(dev)) return; phys = iommu_iova_to_phys(iommu_get_domain_for_dev(dev), dev_addr); - __dma_map_area(phys_to_virt(phys), size, dir); + arch_sync_dma_for_device(dev, phys, size, dir); } static dma_addr_t __iommu_map_page(struct device *dev, struct page *page, @@ -734,7 +573,7 @@ static dma_addr_t __iommu_map_page(struct device *dev, struct page *page, enum dma_data_direction dir, unsigned long attrs) { - bool coherent = is_device_dma_coherent(dev); + bool coherent = dev_is_dma_coherent(dev); int prot = dma_info_to_prot(dir, coherent, attrs); dma_addr_t dev_addr = iommu_dma_map_page(dev, page, offset, size, prot); @@ -762,11 +601,11 @@ static void __iommu_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg; int i; - if (is_device_dma_coherent(dev)) + if (dev_is_dma_coherent(dev)) return; for_each_sg(sgl, sg, nelems, i) - __dma_unmap_area(sg_virt(sg), sg->length, dir); + arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir); } static void __iommu_sync_sg_for_device(struct device *dev, @@ -776,18 +615,18 @@ static void __iommu_sync_sg_for_device(struct device *dev, struct scatterlist *sg; int i; - if (is_device_dma_coherent(dev)) + if (dev_is_dma_coherent(dev)) return; for_each_sg(sgl, sg, nelems, i) - __dma_map_area(sg_virt(sg), sg->length, dir); + arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir); } static int __iommu_map_sg_attrs(struct device *dev, struct scatterlist *sgl, int nelems, enum dma_data_direction dir, unsigned long attrs) { - bool coherent = is_device_dma_coherent(dev); + bool coherent = dev_is_dma_coherent(dev); if ((attrs & DMA_ATTR_SKIP_CPU_SYNC) == 0) __iommu_sync_sg_for_device(dev, sgl, nelems, dir); @@ -879,9 +718,9 @@ void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent) { if (!dev->dma_ops) - dev->dma_ops = &arm64_swiotlb_dma_ops; + dev->dma_ops = &swiotlb_dma_ops; - dev->archdata.dma_coherent = coherent; + dev->dma_coherent = coherent; __iommu_setup_dma_ops(dev, dma_base, size, iommu); #ifdef CONFIG_XEN