Experiments with large pagesgk20a_new

author: Alexandre Courbot <acourbot@nvidia.com> 2014-07-16 17:42:06 +0900
committer: Alexandre Courbot <acourbot@nvidia.com> 2014-11-04 11:30:55 +0900
commit: fccca859a42540e08ea40941e4e99b8adb774bf6 (patch)
tree: 77b74642388874e49945f2c1e7d93bd11fdaf09d /nvkm
parent: e51342b86a94c6d2f49a1d0e81440ad9081f2d4b (diff)
download: nouveau-gk20a_new.tar.gz
2 files changed, 30 insertions, 8 deletions
diff --git a/nvkm/subdev/fb/ramgk20a.c b/nvkm/subdev/fb/ramgk20a.c
index 06dfdeced..873b1ebfd 100644
--- a/nvkm/subdev/fb/ramgk20a.c
+++ b/nvkm/subdev/fb/ramgk20a.c
@@ -53,8 +53,8 @@ gk20a_ram_get(struct nouveau_fb *pfb, u64 size, u32 align, u32 ncmin,
 	u32 npages, order;
 	int i;
 
-	nv_debug(pfb, "%s: size: %llx align: %x, ncmin: %x\n", __func__, size,
-		 align, ncmin);
+	nv_debug(pfb, "%s: size: 0x%llx align: 0x%x, ncmin: 0x%x\n", __func__,
+		size, align, ncmin);
 
 	npages = size >> PAGE_SHIFT;
 	if (npages == 0)
@@ -73,14 +73,26 @@ gk20a_ram_get(struct nouveau_fb *pfb, u64 size, u32 align, u32 ncmin,
 	/* ensure returned address is correctly aligned */
 	npages = max(align, npages);
 
+	/* use big pages if we can, since our memory is always contiguous */
+	if (ncmin == 0 && npages % 0x20 == 0)
+		ncmin = 0x20000;
+	else if (ncmin == 0)
+		ncmin = 0x1000;
+	ncmin >>= PAGE_SHIFT;
+
+	/* ensure size is a multiple of ncmin */
+	npages = roundup(npages, ncmin);
+
 	mem = kzalloc(sizeof(*mem), GFP_KERNEL);
 	if (!mem)
 		return -ENOMEM;
 
 	mem->base.size = npages;
 	mem->base.memtype = type;
+	mem->base.page_shift = fls(ncmin << PAGE_SHIFT) - 1;
 
-	mem->base.pages = kzalloc(sizeof(dma_addr_t) * npages, GFP_KERNEL);
+	mem->base.pages = kzalloc(sizeof(dma_addr_t) * npages / ncmin,
+				  GFP_KERNEL);
 	if (!mem->base.pages) {
 		kfree(mem);
 		return -ENOMEM;
@@ -106,8 +118,8 @@ gk20a_ram_get(struct nouveau_fb *pfb, u64 size, u32 align, u32 ncmin,
 	nv_debug(pfb, "alloc size: 0x%x, align: 0x%x, paddr: %pad, vaddr: %p\n",
 		 npages << PAGE_SHIFT, align, &mem->handle, mem->cpuaddr);
 
-	for (i = 0; i < npages; i++)
-		mem->base.pages[i] = mem->handle + (PAGE_SIZE * i);
+	for (i = 0; i < npages / ncmin; i++)
+		mem->base.pages[i] = mem->handle + (PAGE_SIZE * i * ncmin);
 
 	mem->base.offset = (u64)mem->base.pages[0];
 
diff --git a/nvkm/subdev/vm/base.c b/nvkm/subdev/vm/base.c
index f75a683bd..d4a758013 100644
--- a/nvkm/subdev/vm/base.c
+++ b/nvkm/subdev/vm/base.c
@@ -136,16 +136,26 @@ nouveau_vm_map_sg(struct nouveau_vma *vma, u64 delta, u64 length,
 {
 	struct nouveau_vm *vm = vma->vm;
 	struct nouveau_vmmgr *vmm = vm->vmm;
+	/* these pages are always PAGE_SIZE (should be mem->page_shift) sized */
 	dma_addr_t *list = mem->pages;
-	int big = vma->node->type != vmm->spg_shift;
+	/* whether the VMA type matches the small page type or not */
+	/* if not, it should always match the big page type */
+	int big = mem->page_shift != vmm->spg_shift;
+	/* first VMA page to map */
 	u32 offset = vma->node->offset + (delta >> 12);
-	u32 bits = vma->node->type - 12;
-	u32 num  = length >> vma->node->type;
+	/* difference between size of node pages and small pages of VMM */
+	u32 bits = mem->page_shift - vmm->spg_shift;
+	/* number of VMA pages to map */
+	u32 num  = length >> mem->page_shift;
+	/* first pde */
 	u32 pde  = (offset >> vmm->pgt_bits) - vm->fpde;
+	/* first pte */
 	u32 pte  = (offset & ((1 << vmm->pgt_bits) - 1)) >> bits;
+	/* last pte before switching to the next pde */
 	u32 max  = 1 << (vmm->pgt_bits - bits);
 	u32 end, len;
 
+	//printk("%s %d %d %d\n", __func__, big, vma->node->type, mem->page_shift);
 	while (num) {
 		struct nouveau_gpuobj *pgt = vm->pgt[pde].obj[big];
author	Alexandre Courbot <acourbot@nvidia.com>	2014-07-16 17:42:06 +0900
committer	Alexandre Courbot <acourbot@nvidia.com>	2014-11-04 11:30:55 +0900
commit	fccca859a42540e08ea40941e4e99b8adb774bf6 (patch)
tree	77b74642388874e49945f2c1e7d93bd11fdaf09d /nvkm
parent	e51342b86a94c6d2f49a1d0e81440ad9081f2d4b (diff)
download	nouveau-gk20a_new.tar.gz