From f9820a46dd7888b05a36e81166fb1abcc47dcc3f Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 29 Nov 2010 13:52:18 -0500
Subject: [PATCH 1/6] ttm: Introduce a placeholder for DMA (bus) addresses.

This is right now limited to only non-pool constructs.

[v2: Fixed indentation issues, add review-by tag]

Reviewed-by: Thomas Hellstrom <thomas@shipmail.org>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Tested-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/gpu/drm/ttm/ttm_page_alloc.c |  8 +++++---
 drivers/gpu/drm/ttm/ttm_tt.c         | 10 ++++++++--
 include/drm/ttm/ttm_bo_driver.h      |  2 ++
 include/drm/ttm/ttm_page_alloc.h     |  8 ++++++--
 4 files changed, 21 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index b1e02fffd3cc..9d9d92945f8c 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -38,6 +38,7 @@
 #include <linux/mm.h>
 #include <linux/seq_file.h> /* for seq_printf */
 #include <linux/slab.h>
+#include <linux/dma-mapping.h>
 
 #include <asm/atomic.h>
 
@@ -662,7 +663,8 @@ out:
  * cached pages.
  */
 int ttm_get_pages(struct list_head *pages, int flags,
-		enum ttm_caching_state cstate, unsigned count)
+		  enum ttm_caching_state cstate, unsigned count,
+		  dma_addr_t *dma_address)
 {
 	struct ttm_page_pool *pool = ttm_get_pool(flags, cstate);
 	struct page *p = NULL;
@@ -720,7 +722,7 @@ int ttm_get_pages(struct list_head *pages, int flags,
 			printk(KERN_ERR TTM_PFX
 			       "Failed to allocate extra pages "
 			       "for large request.");
-			ttm_put_pages(pages, 0, flags, cstate);
+			ttm_put_pages(pages, 0, flags, cstate, NULL);
 			return r;
 		}
 	}
@@ -731,7 +733,7 @@ int ttm_get_pages(struct list_head *pages, int flags,
 
 /* Put all pages in pages list to correct pool to wait for reuse */
 void ttm_put_pages(struct list_head *pages, unsigned page_count, int flags,
-		enum ttm_caching_state cstate)
+		   enum ttm_caching_state cstate, dma_addr_t *dma_address)
 {
 	unsigned long irq_flags;
 	struct ttm_page_pool *pool = ttm_get_pool(flags, cstate);
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index af789dc869b9..0d39001259fb 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -49,12 +49,16 @@ static int ttm_tt_swapin(struct ttm_tt *ttm);
 static void ttm_tt_alloc_page_directory(struct ttm_tt *ttm)
 {
 	ttm->pages = drm_calloc_large(ttm->num_pages, sizeof(*ttm->pages));
+	ttm->dma_address = drm_calloc_large(ttm->num_pages,
+					    sizeof(*ttm->dma_address));
 }
 
 static void ttm_tt_free_page_directory(struct ttm_tt *ttm)
 {
 	drm_free_large(ttm->pages);
 	ttm->pages = NULL;
+	drm_free_large(ttm->dma_address);
+	ttm->dma_address = NULL;
 }
 
 static void ttm_tt_free_user_pages(struct ttm_tt *ttm)
@@ -105,7 +109,8 @@ static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, int index)
 
 		INIT_LIST_HEAD(&h);
 
-		ret = ttm_get_pages(&h, ttm->page_flags, ttm->caching_state, 1);
+		ret = ttm_get_pages(&h, ttm->page_flags, ttm->caching_state, 1,
+				    &ttm->dma_address[index]);
 
 		if (ret != 0)
 			return NULL;
@@ -298,7 +303,8 @@ static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm)
 			count++;
 		}
 	}
-	ttm_put_pages(&h, count, ttm->page_flags, ttm->caching_state);
+	ttm_put_pages(&h, count, ttm->page_flags, ttm->caching_state,
+		      ttm->dma_address);
 	ttm->state = tt_unpopulated;
 	ttm->first_himem_page = ttm->num_pages;
 	ttm->last_lomem_page = -1;
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 8e0c848326b6..6dc4fccda73c 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -149,6 +149,7 @@ enum ttm_caching_state {
  * @swap_storage: Pointer to shmem struct file for swap storage.
  * @caching_state: The current caching state of the pages.
  * @state: The current binding state of the pages.
+ * @dma_address: The DMA (bus) addresses of the pages (if TTM_PAGE_FLAG_DMA32)
  *
  * This is a structure holding the pages, caching- and aperture binding
  * status for a buffer object that isn't backed by fixed (VRAM / AGP)
@@ -173,6 +174,7 @@ struct ttm_tt {
 		tt_unbound,
 		tt_unpopulated,
 	} state;
+	dma_addr_t *dma_address;
 };
 
 #define TTM_MEMTYPE_FLAG_FIXED         (1 << 0)	/* Fixed (on-card) PCI memory */
diff --git a/include/drm/ttm/ttm_page_alloc.h b/include/drm/ttm/ttm_page_alloc.h
index 116821448c38..8062890f725e 100644
--- a/include/drm/ttm/ttm_page_alloc.h
+++ b/include/drm/ttm/ttm_page_alloc.h
@@ -36,11 +36,13 @@
  * @flags: ttm flags for page allocation.
  * @cstate: ttm caching state for the page.
  * @count: number of pages to allocate.
+ * @dma_address: The DMA (bus) address of pages (if TTM_PAGE_FLAG_DMA32 set).
  */
 int ttm_get_pages(struct list_head *pages,
 		  int flags,
 		  enum ttm_caching_state cstate,
-		  unsigned count);
+		  unsigned count,
+		  dma_addr_t *dma_address);
 /**
  * Put linked list of pages to pool.
  *
@@ -49,11 +51,13 @@ int ttm_get_pages(struct list_head *pages,
  * count.
  * @flags: ttm flags for page allocation.
  * @cstate: ttm caching state.
+ * @dma_address: The DMA (bus) address of pages (if TTM_PAGE_FLAG_DMA32 set).
  */
 void ttm_put_pages(struct list_head *pages,
 		   unsigned page_count,
 		   int flags,
-		   enum ttm_caching_state cstate);
+		   enum ttm_caching_state cstate,
+		   dma_addr_t *dma_address);
 /**
  * Initialize pool allocator.
  */

From 69a07f0b117a40fcc1a479358d8e1f41793617f2 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Mon, 29 Nov 2010 14:03:30 -0500
Subject: [PATCH 2/6] ttm: Utilize the DMA API for pages that have
 TTM_PAGE_FLAG_DMA32 set.

For pages that have the TTM_PAGE_FLAG_DMA32 flag set we
use the DMA API. We save the bus address in our array which we
use to program the GART (see "radeon/ttm/PCIe: Use dma_addr if TTM
has set it." and "nouveau/ttm/PCIe: Use dma_addr if TTM has set it.").

The reason behind using the DMA API is that under Xen we would
end up programming the GART with the bounce buffer (SWIOTLB)
DMA address instead of the physical DMA address of the TTM page.
The reason being that alloc_page with GFP_DMA32 does not allocate
pages under the the 4GB mark when running under Xen hypervisor.

Under baremetal this means we do the DMA API call earlier instead
of when we program the GART.

For details please refer to:
https://lkml.org/lkml/2011/1/7/251

[v2: Fixed indentation, revised desc, added Reviewed-by]
Reviewed-by: Thomas Hellstrom <thomas@shipmail.org>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Tested-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/gpu/drm/ttm/ttm_page_alloc.c | 26 +++++++++++++++++++++++---
 1 file changed, 23 insertions(+), 3 deletions(-)

diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index 9d9d92945f8c..737a2a2e46a5 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -683,14 +683,22 @@ int ttm_get_pages(struct list_head *pages, int flags,
 			gfp_flags |= GFP_HIGHUSER;
 
 		for (r = 0; r < count; ++r) {
-			p = alloc_page(gfp_flags);
+			if ((flags & TTM_PAGE_FLAG_DMA32) && dma_address) {
+				void *addr;
+				addr = dma_alloc_coherent(NULL, PAGE_SIZE,
+							  &dma_address[r],
+							  gfp_flags);
+				if (addr == NULL)
+					return -ENOMEM;
+				p = virt_to_page(addr);
+			} else
+				p = alloc_page(gfp_flags);
 			if (!p) {
 
 				printk(KERN_ERR TTM_PFX
 				       "Unable to allocate page.");
 				return -ENOMEM;
 			}
-
 			list_add(&p->lru, pages);
 		}
 		return 0;
@@ -738,12 +746,24 @@ void ttm_put_pages(struct list_head *pages, unsigned page_count, int flags,
 	unsigned long irq_flags;
 	struct ttm_page_pool *pool = ttm_get_pool(flags, cstate);
 	struct page *p, *tmp;
+	unsigned r;
 
 	if (pool == NULL) {
 		/* No pool for this memory type so free the pages */
 
+		r = page_count-1;
 		list_for_each_entry_safe(p, tmp, pages, lru) {
-			__free_page(p);
+			if ((flags & TTM_PAGE_FLAG_DMA32) && dma_address) {
+				void *addr = page_address(p);
+				WARN_ON(!addr || !dma_address[r]);
+				if (addr)
+					dma_free_coherent(NULL, PAGE_SIZE,
+							  addr,
+							  dma_address[r]);
+				dma_address[r] = 0;
+			} else
+				__free_page(p);
+			r--;
 		}
 		/* Make the pages list empty */
 		INIT_LIST_HEAD(pages);

From 27e8b237944af967e0a808580278d432cb028455 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 2 Dec 2010 10:24:13 -0500
Subject: [PATCH 3/6] ttm: Expand (*populate) to support an array of DMA
 addresses.

We pass in the array of ttm pages to be populated in the GART/MM
of the card (or AGP). Patch titled: "ttm: Utilize the DMA API for
pages that have TTM_PAGE_FLAG_DMA32 set." uses the DMA API to make
those pages have a proper DMA addresses (in the situation where
page_to_phys or virt_to_phys do not give use the DMA (bus) address).

Since we are using the DMA API on those pages, we should pass in the
DMA address to this function so it can save it in its proper fields
(later patches use it).

[v2: Added reviewed-by tag]

Reviewed-by: Thomas Hellstrom <thellstrom@shipmail.org>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Tested-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/gpu/drm/nouveau/nouveau_sgdma.c | 3 ++-
 drivers/gpu/drm/radeon/radeon_ttm.c     | 3 ++-
 drivers/gpu/drm/ttm/ttm_agp_backend.c   | 3 ++-
 drivers/gpu/drm/ttm/ttm_tt.c            | 2 +-
 drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c  | 3 ++-
 include/drm/ttm/ttm_bo_driver.h         | 4 +++-
 6 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
index 288bacac7e5a..edc140ab4df1 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
@@ -20,7 +20,8 @@ struct nouveau_sgdma_be {
 
 static int
 nouveau_sgdma_populate(struct ttm_backend *be, unsigned long num_pages,
-		       struct page **pages, struct page *dummy_read_page)
+		       struct page **pages, struct page *dummy_read_page,
+		       dma_addr_t *dma_addrs)
 {
 	struct nouveau_sgdma_be *nvbe = (struct nouveau_sgdma_be *)be;
 	struct drm_device *dev = nvbe->dev;
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 01c2c736a1da..6f156e9d3f31 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -655,7 +655,8 @@ struct radeon_ttm_backend {
 static int radeon_ttm_backend_populate(struct ttm_backend *backend,
 				       unsigned long num_pages,
 				       struct page **pages,
-				       struct page *dummy_read_page)
+				       struct page *dummy_read_page,
+				       dma_addr_t *dma_addrs)
 {
 	struct radeon_ttm_backend *gtt;
 
diff --git a/drivers/gpu/drm/ttm/ttm_agp_backend.c b/drivers/gpu/drm/ttm/ttm_agp_backend.c
index f999e36f30b4..1c4a72f681c1 100644
--- a/drivers/gpu/drm/ttm/ttm_agp_backend.c
+++ b/drivers/gpu/drm/ttm/ttm_agp_backend.c
@@ -47,7 +47,8 @@ struct ttm_agp_backend {
 
 static int ttm_agp_populate(struct ttm_backend *backend,
 			    unsigned long num_pages, struct page **pages,
-			    struct page *dummy_read_page)
+			    struct page *dummy_read_page,
+			    dma_addr_t *dma_addrs)
 {
 	struct ttm_agp_backend *agp_be =
 	    container_of(backend, struct ttm_agp_backend, backend);
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 0d39001259fb..86d5b1745a45 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -169,7 +169,7 @@ int ttm_tt_populate(struct ttm_tt *ttm)
 	}
 
 	be->func->populate(be, ttm->num_pages, ttm->pages,
-			   ttm->dummy_read_page);
+			   ttm->dummy_read_page, ttm->dma_address);
 	ttm->state = tt_unbound;
 	return 0;
 }
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
index 80bc37b274e7..87e43e0733bf 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_buffer.c
@@ -102,7 +102,8 @@ struct vmw_ttm_backend {
 
 static int vmw_ttm_populate(struct ttm_backend *backend,
 			    unsigned long num_pages, struct page **pages,
-			    struct page *dummy_read_page)
+			    struct page *dummy_read_page,
+			    dma_addr_t *dma_addrs)
 {
 	struct vmw_ttm_backend *vmw_be =
 	    container_of(backend, struct vmw_ttm_backend, backend);
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index 6dc4fccda73c..ebcd3dd7203b 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -50,13 +50,15 @@ struct ttm_backend_func {
 	 * @pages: Array of pointers to ttm pages.
 	 * @dummy_read_page: Page to be used instead of NULL pages in the
 	 * array @pages.
+	 * @dma_addrs: Array of DMA (bus) address of the ttm pages.
 	 *
 	 * Populate the backend with ttm pages. Depending on the backend,
 	 * it may or may not copy the @pages array.
 	 */
 	int (*populate) (struct ttm_backend *backend,
 			 unsigned long num_pages, struct page **pages,
-			 struct page *dummy_read_page);
+			 struct page *dummy_read_page,
+			 dma_addr_t *dma_addrs);
 	/**
 	 * struct ttm_backend_func member clear
 	 *

From c39d35161e87f1d7c0628af6907ac66a8c77f63f Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 2 Dec 2010 11:04:29 -0500
Subject: [PATCH 4/6] radeon/ttm/PCIe: Use dma_addr if TTM has set it.

If the TTM layer has used the DMA API to setup pages that are
TTM_PAGE_FLAG_DMA32 (look at patch titled: "ttm: Utilize the dma_addr_t
array for pages that are to in DMA32 pool."), lets use it
when programming the GART in the PCIe type cards.

This patch skips doing the pci_map_page (and pci_unmap_page) if
there is a DMA addresses passed in for that page. If the dma_address
is zero (or DMA_ERROR_CODE), then we continue on with our old
behaviour.

[v2: Fixed an indentation problem, added reviewed-by tag]
[v3: Added Acked-by Jerome]

Acked-by: Jerome Glisse <j.glisse@gmail.com>
Reviewed-by: Thomas Hellstrom <thomas@shipmail.org>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Tested-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/gpu/drm/radeon/radeon.h      |  4 +++-
 drivers/gpu/drm/radeon/radeon_gart.c | 36 ++++++++++++++++++++--------
 drivers/gpu/drm/radeon/radeon_ttm.c  |  5 +++-
 3 files changed, 33 insertions(+), 12 deletions(-)

diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h
index 73f600d39ad4..c9bbab921e61 100644
--- a/drivers/gpu/drm/radeon/radeon.h
+++ b/drivers/gpu/drm/radeon/radeon.h
@@ -317,6 +317,7 @@ struct radeon_gart {
 	union radeon_gart_table		table;
 	struct page			**pages;
 	dma_addr_t			*pages_addr;
+	bool				*ttm_alloced;
 	bool				ready;
 };
 
@@ -329,7 +330,8 @@ void radeon_gart_fini(struct radeon_device *rdev);
 void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
 			int pages);
 int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
-		     int pages, struct page **pagelist);
+		     int pages, struct page **pagelist,
+		     dma_addr_t *dma_addr);
 
 
 /*
diff --git a/drivers/gpu/drm/radeon/radeon_gart.c b/drivers/gpu/drm/radeon/radeon_gart.c
index e65b90317fab..5214bc29d9b3 100644
--- a/drivers/gpu/drm/radeon/radeon_gart.c
+++ b/drivers/gpu/drm/radeon/radeon_gart.c
@@ -149,8 +149,9 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
 	p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE);
 	for (i = 0; i < pages; i++, p++) {
 		if (rdev->gart.pages[p]) {
-			pci_unmap_page(rdev->pdev, rdev->gart.pages_addr[p],
-				       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
+			if (!rdev->gart.ttm_alloced[p])
+				pci_unmap_page(rdev->pdev, rdev->gart.pages_addr[p],
+				       		PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 			rdev->gart.pages[p] = NULL;
 			rdev->gart.pages_addr[p] = rdev->dummy_page.addr;
 			page_base = rdev->gart.pages_addr[p];
@@ -165,7 +166,7 @@ void radeon_gart_unbind(struct radeon_device *rdev, unsigned offset,
 }
 
 int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
-		     int pages, struct page **pagelist)
+		     int pages, struct page **pagelist, dma_addr_t *dma_addr)
 {
 	unsigned t;
 	unsigned p;
@@ -180,15 +181,22 @@ int radeon_gart_bind(struct radeon_device *rdev, unsigned offset,
 	p = t / (PAGE_SIZE / RADEON_GPU_PAGE_SIZE);
 
 	for (i = 0; i < pages; i++, p++) {
-		/* we need to support large memory configurations */
-		/* assume that unbind have already been call on the range */
-		rdev->gart.pages_addr[p] = pci_map_page(rdev->pdev, pagelist[i],
+		/* On TTM path, we only use the DMA API if TTM_PAGE_FLAG_DMA32
+		 * is requested. */
+		if (dma_addr[i] != DMA_ERROR_CODE) {
+			rdev->gart.ttm_alloced[p] = true;
+			rdev->gart.pages_addr[p] = dma_addr[i];
+		} else {
+			/* we need to support large memory configurations */
+			/* assume that unbind have already been call on the range */
+			rdev->gart.pages_addr[p] = pci_map_page(rdev->pdev, pagelist[i],
 							0, PAGE_SIZE,
 							PCI_DMA_BIDIRECTIONAL);
-		if (pci_dma_mapping_error(rdev->pdev, rdev->gart.pages_addr[p])) {
-			/* FIXME: failed to map page (return -ENOMEM?) */
-			radeon_gart_unbind(rdev, offset, pages);
-			return -ENOMEM;
+			if (pci_dma_mapping_error(rdev->pdev, rdev->gart.pages_addr[p])) {
+				/* FIXME: failed to map page (return -ENOMEM?) */
+				radeon_gart_unbind(rdev, offset, pages);
+				return -ENOMEM;
+			}
 		}
 		rdev->gart.pages[p] = pagelist[i];
 		page_base = rdev->gart.pages_addr[p];
@@ -251,6 +259,12 @@ int radeon_gart_init(struct radeon_device *rdev)
 		radeon_gart_fini(rdev);
 		return -ENOMEM;
 	}
+	rdev->gart.ttm_alloced = kzalloc(sizeof(bool) *
+					 rdev->gart.num_cpu_pages, GFP_KERNEL);
+	if (rdev->gart.ttm_alloced == NULL) {
+		radeon_gart_fini(rdev);
+		return -ENOMEM;
+	}
 	/* set GART entry to point to the dummy page by default */
 	for (i = 0; i < rdev->gart.num_cpu_pages; i++) {
 		rdev->gart.pages_addr[i] = rdev->dummy_page.addr;
@@ -267,6 +281,8 @@ void radeon_gart_fini(struct radeon_device *rdev)
 	rdev->gart.ready = false;
 	kfree(rdev->gart.pages);
 	kfree(rdev->gart.pages_addr);
+	kfree(rdev->gart.ttm_alloced);
 	rdev->gart.pages = NULL;
 	rdev->gart.pages_addr = NULL;
+	rdev->gart.ttm_alloced = NULL;
 }
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index 6f156e9d3f31..ca045058e498 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -647,6 +647,7 @@ struct radeon_ttm_backend {
 	unsigned long			num_pages;
 	struct page			**pages;
 	struct page			*dummy_read_page;
+	dma_addr_t			*dma_addrs;
 	bool				populated;
 	bool				bound;
 	unsigned			offset;
@@ -662,6 +663,7 @@ static int radeon_ttm_backend_populate(struct ttm_backend *backend,
 
 	gtt = container_of(backend, struct radeon_ttm_backend, backend);
 	gtt->pages = pages;
+	gtt->dma_addrs = dma_addrs;
 	gtt->num_pages = num_pages;
 	gtt->dummy_read_page = dummy_read_page;
 	gtt->populated = true;
@@ -674,6 +676,7 @@ static void radeon_ttm_backend_clear(struct ttm_backend *backend)
 
 	gtt = container_of(backend, struct radeon_ttm_backend, backend);
 	gtt->pages = NULL;
+	gtt->dma_addrs = NULL;
 	gtt->num_pages = 0;
 	gtt->dummy_read_page = NULL;
 	gtt->populated = false;
@@ -694,7 +697,7 @@ static int radeon_ttm_backend_bind(struct ttm_backend *backend,
 		     gtt->num_pages, bo_mem, backend);
 	}
 	r = radeon_gart_bind(gtt->rdev, gtt->offset,
-			     gtt->num_pages, gtt->pages);
+			     gtt->num_pages, gtt->pages, gtt->dma_addrs);
 	if (r) {
 		DRM_ERROR("failed to bind %lu pages at 0x%08X\n",
 			  gtt->num_pages, gtt->offset);

From e0138c26cdeee8c033256ccd9e07d66db3c998be Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Thu, 2 Dec 2010 11:36:24 -0500
Subject: [PATCH 5/6] nouveau/ttm/PCIe: Use dma_addr if TTM has set it.

If the TTM layer has used the DMA API to setup pages that are
TTM_PAGE_FLAG_DMA32 (look at patch titled: "ttm: Utilize the
DMA API for pages that have TTM_PAGE_FLAG_DMA32 set"), lets
use it when programming the GART in the PCIe type cards.

This patch skips doing the pci_map_page (and pci_unmap_page) if
there is a DMA addresses passed in for that page. If the dma_address
is zero (or DMA_ERROR_CODE), then we continue on with our old
behaviour.

[v2: Added a review-by tag]

Reviewed-by: Thomas Hellstrom <thomas@shipmail.org>
Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Tested-by: Ian Campbell <ian.campbell@citrix.com>
---
 drivers/gpu/drm/nouveau/nouveau_sgdma.c | 28 ++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_sgdma.c b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
index edc140ab4df1..bbdd982cbb3e 100644
--- a/drivers/gpu/drm/nouveau/nouveau_sgdma.c
+++ b/drivers/gpu/drm/nouveau/nouveau_sgdma.c
@@ -12,6 +12,7 @@ struct nouveau_sgdma_be {
 	struct drm_device *dev;
 
 	dma_addr_t *pages;
+	bool *ttm_alloced;
 	unsigned nr_pages;
 
 	unsigned pte_start;
@@ -35,15 +36,25 @@ nouveau_sgdma_populate(struct ttm_backend *be, unsigned long num_pages,
 	if (!nvbe->pages)
 		return -ENOMEM;
 
+	nvbe->ttm_alloced = kmalloc(sizeof(bool) * num_pages, GFP_KERNEL);
+	if (!nvbe->ttm_alloced)
+		return -ENOMEM;
+
 	nvbe->nr_pages = 0;
 	while (num_pages--) {
-		nvbe->pages[nvbe->nr_pages] =
-			pci_map_page(dev->pdev, pages[nvbe->nr_pages], 0,
+		if (dma_addrs[nvbe->nr_pages] != DMA_ERROR_CODE) {
+			nvbe->pages[nvbe->nr_pages] =
+					dma_addrs[nvbe->nr_pages];
+		 	nvbe->ttm_alloced[nvbe->nr_pages] = true;
+		} else {
+			nvbe->pages[nvbe->nr_pages] =
+				pci_map_page(dev->pdev, pages[nvbe->nr_pages], 0,
 				     PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
-		if (pci_dma_mapping_error(dev->pdev,
-					  nvbe->pages[nvbe->nr_pages])) {
-			be->func->clear(be);
-			return -EFAULT;
+			if (pci_dma_mapping_error(dev->pdev,
+						  nvbe->pages[nvbe->nr_pages])) {
+				be->func->clear(be);
+				return -EFAULT;
+			}
 		}
 
 		nvbe->nr_pages++;
@@ -66,11 +77,14 @@ nouveau_sgdma_clear(struct ttm_backend *be)
 			be->func->unbind(be);
 
 		while (nvbe->nr_pages--) {
-			pci_unmap_page(dev->pdev, nvbe->pages[nvbe->nr_pages],
+			if (!nvbe->ttm_alloced[nvbe->nr_pages])
+				pci_unmap_page(dev->pdev, nvbe->pages[nvbe->nr_pages],
 				       PAGE_SIZE, PCI_DMA_BIDIRECTIONAL);
 		}
 		kfree(nvbe->pages);
+		kfree(nvbe->ttm_alloced);
 		nvbe->pages = NULL;
+		nvbe->ttm_alloced = NULL;
 		nvbe->nr_pages = 0;
 	}
 }

From 5a893fc28f0393adb7c885a871b8c59e623fd528 Mon Sep 17 00:00:00 2001
From: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
Date: Tue, 22 Feb 2011 13:24:32 -0500
Subject: [PATCH 6/6] ttm: Include the 'struct dev' when using the DMA API.

This makes the accounting when using 'debug_dma_dump_mappings()'
and CONFIG_DMA_API_DEBUG=y be assigned to the correct device
instead of 'fallback'.

No functional change - just cosmetic.

Signed-off-by: Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
---
 drivers/gpu/drm/nouveau/nouveau_mem.c |  1 +
 drivers/gpu/drm/radeon/radeon_ttm.c   |  1 +
 drivers/gpu/drm/ttm/ttm_page_alloc.c  | 11 ++++++-----
 drivers/gpu/drm/ttm/ttm_tt.c          |  4 ++--
 drivers/gpu/drm/vmwgfx/vmwgfx_drv.c   |  2 +-
 include/drm/ttm/ttm_bo_driver.h       |  1 +
 include/drm/ttm/ttm_page_alloc.h      |  8 ++++++--
 7 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/drivers/gpu/drm/nouveau/nouveau_mem.c b/drivers/gpu/drm/nouveau/nouveau_mem.c
index a163c7c612e7..931b22142ed2 100644
--- a/drivers/gpu/drm/nouveau/nouveau_mem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_mem.c
@@ -559,6 +559,7 @@ nouveau_mem_vram_init(struct drm_device *dev)
 	if (ret)
 		return ret;
 
+	dev_priv->ttm.bdev.dev = dev->dev;
 	ret = ttm_bo_device_init(&dev_priv->ttm.bdev,
 				 dev_priv->ttm.bo_global_ref.ref.object,
 				 &nouveau_bo_driver, DRM_FILE_PAGE_OFFSET,
diff --git a/drivers/gpu/drm/radeon/radeon_ttm.c b/drivers/gpu/drm/radeon/radeon_ttm.c
index ca045058e498..cfe223f22394 100644
--- a/drivers/gpu/drm/radeon/radeon_ttm.c
+++ b/drivers/gpu/drm/radeon/radeon_ttm.c
@@ -513,6 +513,7 @@ int radeon_ttm_init(struct radeon_device *rdev)
 	if (r) {
 		return r;
 	}
+	rdev->mman.bdev.dev = rdev->dev;
 	/* No others user of address space so set it to 0 */
 	r = ttm_bo_device_init(&rdev->mman.bdev,
 			       rdev->mman.bo_global_ref.ref.object,
diff --git a/drivers/gpu/drm/ttm/ttm_page_alloc.c b/drivers/gpu/drm/ttm/ttm_page_alloc.c
index 737a2a2e46a5..35849dbf3ab5 100644
--- a/drivers/gpu/drm/ttm/ttm_page_alloc.c
+++ b/drivers/gpu/drm/ttm/ttm_page_alloc.c
@@ -664,7 +664,7 @@ out:
  */
 int ttm_get_pages(struct list_head *pages, int flags,
 		  enum ttm_caching_state cstate, unsigned count,
-		  dma_addr_t *dma_address)
+		  dma_addr_t *dma_address, struct device *dev)
 {
 	struct ttm_page_pool *pool = ttm_get_pool(flags, cstate);
 	struct page *p = NULL;
@@ -685,7 +685,7 @@ int ttm_get_pages(struct list_head *pages, int flags,
 		for (r = 0; r < count; ++r) {
 			if ((flags & TTM_PAGE_FLAG_DMA32) && dma_address) {
 				void *addr;
-				addr = dma_alloc_coherent(NULL, PAGE_SIZE,
+				addr = dma_alloc_coherent(dev, PAGE_SIZE,
 							  &dma_address[r],
 							  gfp_flags);
 				if (addr == NULL)
@@ -730,7 +730,7 @@ int ttm_get_pages(struct list_head *pages, int flags,
 			printk(KERN_ERR TTM_PFX
 			       "Failed to allocate extra pages "
 			       "for large request.");
-			ttm_put_pages(pages, 0, flags, cstate, NULL);
+			ttm_put_pages(pages, 0, flags, cstate, NULL, NULL);
 			return r;
 		}
 	}
@@ -741,7 +741,8 @@ int ttm_get_pages(struct list_head *pages, int flags,
 
 /* Put all pages in pages list to correct pool to wait for reuse */
 void ttm_put_pages(struct list_head *pages, unsigned page_count, int flags,
-		   enum ttm_caching_state cstate, dma_addr_t *dma_address)
+		   enum ttm_caching_state cstate, dma_addr_t *dma_address,
+		   struct device *dev)
 {
 	unsigned long irq_flags;
 	struct ttm_page_pool *pool = ttm_get_pool(flags, cstate);
@@ -757,7 +758,7 @@ void ttm_put_pages(struct list_head *pages, unsigned page_count, int flags,
 				void *addr = page_address(p);
 				WARN_ON(!addr || !dma_address[r]);
 				if (addr)
-					dma_free_coherent(NULL, PAGE_SIZE,
+					dma_free_coherent(dev, PAGE_SIZE,
 							  addr,
 							  dma_address[r]);
 				dma_address[r] = 0;
diff --git a/drivers/gpu/drm/ttm/ttm_tt.c b/drivers/gpu/drm/ttm/ttm_tt.c
index 86d5b1745a45..0f8fc9ff0c53 100644
--- a/drivers/gpu/drm/ttm/ttm_tt.c
+++ b/drivers/gpu/drm/ttm/ttm_tt.c
@@ -110,7 +110,7 @@ static struct page *__ttm_tt_get_page(struct ttm_tt *ttm, int index)
 		INIT_LIST_HEAD(&h);
 
 		ret = ttm_get_pages(&h, ttm->page_flags, ttm->caching_state, 1,
-				    &ttm->dma_address[index]);
+				    &ttm->dma_address[index], ttm->be->bdev->dev);
 
 		if (ret != 0)
 			return NULL;
@@ -304,7 +304,7 @@ static void ttm_tt_free_alloced_pages(struct ttm_tt *ttm)
 		}
 	}
 	ttm_put_pages(&h, count, ttm->page_flags, ttm->caching_state,
-		      ttm->dma_address);
+		      ttm->dma_address, ttm->be->bdev->dev);
 	ttm->state = tt_unpopulated;
 	ttm->first_himem_page = ttm->num_pages;
 	ttm->last_lomem_page = -1;
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
index 10ca97ee0206..4a8c7893e8ff 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.c
@@ -322,7 +322,7 @@ static int vmw_driver_load(struct drm_device *dev, unsigned long chipset)
 	ttm_lock_set_kill(&dev_priv->fbdev_master.lock, false, SIGTERM);
 	dev_priv->active_master = &dev_priv->fbdev_master;
 
-
+	dev_priv->bdev.dev = dev->dev;
 	ret = ttm_bo_device_init(&dev_priv->bdev,
 				 dev_priv->bo_global_ref.ref.object,
 				 &vmw_bo_driver, VMWGFX_FILE_PAGE_OFFSET,
diff --git a/include/drm/ttm/ttm_bo_driver.h b/include/drm/ttm/ttm_bo_driver.h
index ebcd3dd7203b..4d97014e8c8d 100644
--- a/include/drm/ttm/ttm_bo_driver.h
+++ b/include/drm/ttm/ttm_bo_driver.h
@@ -533,6 +533,7 @@ struct ttm_bo_device {
 	struct list_head device_list;
 	struct ttm_bo_global *glob;
 	struct ttm_bo_driver *driver;
+	struct device *dev;
 	rwlock_t vm_lock;
 	struct ttm_mem_type_manager man[TTM_NUM_MEM_TYPES];
 	/*
diff --git a/include/drm/ttm/ttm_page_alloc.h b/include/drm/ttm/ttm_page_alloc.h
index 8062890f725e..ccb6b7a240e2 100644
--- a/include/drm/ttm/ttm_page_alloc.h
+++ b/include/drm/ttm/ttm_page_alloc.h
@@ -37,12 +37,14 @@
  * @cstate: ttm caching state for the page.
  * @count: number of pages to allocate.
  * @dma_address: The DMA (bus) address of pages (if TTM_PAGE_FLAG_DMA32 set).
+ * @dev: struct device for appropiate DMA accounting.
  */
 int ttm_get_pages(struct list_head *pages,
 		  int flags,
 		  enum ttm_caching_state cstate,
 		  unsigned count,
-		  dma_addr_t *dma_address);
+		  dma_addr_t *dma_address,
+		  struct device *dev);
 /**
  * Put linked list of pages to pool.
  *
@@ -52,12 +54,14 @@ int ttm_get_pages(struct list_head *pages,
  * @flags: ttm flags for page allocation.
  * @cstate: ttm caching state.
  * @dma_address: The DMA (bus) address of pages (if TTM_PAGE_FLAG_DMA32 set).
+ * @dev: struct device for appropiate DMA accounting.
  */
 void ttm_put_pages(struct list_head *pages,
 		   unsigned page_count,
 		   int flags,
 		   enum ttm_caching_state cstate,
-		   dma_addr_t *dma_address);
+		   dma_addr_t *dma_address,
+		   struct device *dev);
 /**
  * Initialize pool allocator.
  */