nv50: force channel vram access through vm

If we ever want to be able to use the 3D engine we have no choice. It appears that the tiling setup (required for 3D on G8x) is in the page tables. The immediate benefit of this change however is that it's now not possible for a client to use the GPU to render over the top of important engine setup tables, which also live in VRAM. G8x VRAM size is limited to 512MiB at the moment, as we use a 1-1 mapping of real vram pages to their offset within the start of a channel's VRAM DMA object and only populate a single PDE for VRAM use.
author: Ben Skeggs <skeggsb@gmail.com> 2008-03-12 23:37:29 +1100
committer: Ben Skeggs <skeggsb@gmail.com> 2008-03-13 00:23:52 +1100
commit: 1766e1c07b03c6ccf545469663334be762c0bddf (patch)
tree: e60ee35ad5f843a882a1f4b65700ab66eea76c60 /shared-core
parent: 88bd1e4a350d011ec44f6786e0bfdf8fb386800c (diff)
6 files changed, 114 insertions, 2 deletions
diff --git a/shared-core/nouveau_drm.h b/shared-core/nouveau_drm.h
index 5f07fcbc..cf762052 100644
--- a/shared-core/nouveau_drm.h
+++ b/shared-core/nouveau_drm.h
@@ -87,6 +87,10 @@ struct drm_nouveau_gpuobj_free {
 #define NOUVEAU_MEM_MAPPED		0x00000100
 #define NOUVEAU_MEM_INSTANCE		0x00000200 /* internal */
 #define NOUVEAU_MEM_NOTIFIER            0x00000400 /* internal */
+#define NOUVEAU_MEM_NOVM		0x00000800 /* internal */
+#define NOUVEAU_MEM_INTERNAL (NOUVEAU_MEM_INSTANCE | \
+			      NOUVEAU_MEM_NOTIFIER | \
+			      NOUVEAU_MEM_NOVM)
 
 struct drm_nouveau_mem_alloc {
 	int flags;
diff --git a/shared-core/nouveau_drv.h b/shared-core/nouveau_drv.h
index 4184aa5b..a51e552c 100644
--- a/shared-core/nouveau_drv.h
+++ b/shared-core/nouveau_drv.h
@@ -136,6 +136,7 @@ struct nouveau_channel
 	/* NV50 VM */
 	struct nouveau_gpuobj     *vm_pd;
 	struct nouveau_gpuobj_ref *vm_gart_pt;
+	struct nouveau_gpuobj_ref *vm_vram_pt;
 
 	/* Objects */
 	struct nouveau_gpuobj_ref *ramin; /* Private instmem */
@@ -290,6 +291,9 @@ struct drm_nouveau_private {
 		unsigned long sg_handle;
 	} gart_info;
 
+	/* G8x global VRAM page table */
+	struct nouveau_gpuobj *vm_vram_pt;
+
 	/* the mtrr covering the FB */
 	int fb_mtrr;
 
diff --git a/shared-core/nouveau_mem.c b/shared-core/nouveau_mem.c
index 4e80ca46..2cf8807d 100644
--- a/shared-core/nouveau_mem.c
+++ b/shared-core/nouveau_mem.c
@@ -468,6 +468,11 @@ int nouveau_mem_init(struct drm_device *dev)
 	/* Init FB */
 	dev_priv->fb_phys=drm_get_resource_start(dev,1);
 	fb_size = nouveau_mem_fb_amount(dev);
+	/* On G80, limit VRAM to 512MiB temporarily due to limits in how
+	 * we handle VRAM page tables.
+	 */
+	if (dev_priv->card_type >= NV_50 && fb_size > (512 * 1024 * 1024))
+		fb_size = (512 * 1024 * 1024);
 	/* On at least NV40, RAMIN is actually at the end of vram.
 	 * We don't want to allocate this... */
 	if (dev_priv->card_type >= NV_40)
@@ -540,6 +545,21 @@ int nouveau_mem_init(struct drm_device *dev)
 		}
 	}
 
+	/* G8x: Allocate shared page table to map real VRAM pages into */
+	if (dev_priv->card_type >= NV_50) {
+		unsigned size = ((512 * 1024 * 1024) / 65536) * 8;
+
+		ret = nouveau_gpuobj_new(dev, NULL, size, 0,
+					 NVOBJ_FLAG_ZERO_ALLOC |
+					 NVOBJ_FLAG_ALLOW_NO_REFS,
+					 &dev_priv->vm_vram_pt);
+		if (ret) {
+			DRM_ERROR("Error creating VRAM page table: %d\n", ret);
+			return ret;
+		}
+	}
+
+
 	return 0;
 }
 
@@ -558,6 +578,12 @@ struct mem_block* nouveau_mem_alloc(struct drm_device *dev, int alignment,
 	if (alignment < PAGE_SHIFT)
 		alignment = PAGE_SHIFT;
 
+	/* Align allocation sizes to 64KiB blocks on G8x.  We use a 64KiB
+	 * page size in the GPU VM.
+	 */
+	if (flags & NOUVEAU_MEM_FB && dev_priv->card_type >= NV_50)
+		size = (size + (64 * 1024)) & ~((64 * 1024) - 1);
+
 	/*
 	 * Warn about 0 sized allocations, but let it go through. It'll return 1 page
 	 */
@@ -612,6 +638,30 @@ struct mem_block* nouveau_mem_alloc(struct drm_device *dev, int alignment,
 alloc_ok:
 	block->flags=type;
 
+	/* On G8x, map memory into VM */
+	if (block->flags & NOUVEAU_MEM_FB && dev_priv->card_type >= NV_50 &&
+	    !(flags & NOUVEAU_MEM_NOVM)) {
+		struct nouveau_gpuobj *pt = dev_priv->vm_vram_pt;
+		unsigned offset = block->start;
+		unsigned count = block->size / 65536;
+
+		if (!pt) {
+			DRM_ERROR("vm alloc without vm pt\n");
+			nouveau_mem_free_block(block);
+			return NULL;
+		}
+
+		while (count--) {
+			unsigned pte = offset / 65536;
+
+			INSTANCE_WR(pt, (pte * 2) + 0, offset | 1);
+			INSTANCE_WR(pt, (pte * 2) + 1, 0x00000000);
+			offset += 65536;
+		}
+	} else {
+		block->flags |= NOUVEAU_MEM_NOVM;
+	}	
+
 	if (flags&NOUVEAU_MEM_MAPPED)
 	{
 		struct drm_map_list *entry;
@@ -653,9 +703,34 @@ alloc_ok:
 
 void nouveau_mem_free(struct drm_device* dev, struct mem_block* block)
 {
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+
 	DRM_DEBUG("freeing 0x%llx type=0x%08x\n", block->start, block->flags);
+
 	if (block->flags&NOUVEAU_MEM_MAPPED)
 		drm_rmmap(dev, block->map);
+
+	/* G8x: Remove pages from vm */
+	if (block->flags & NOUVEAU_MEM_FB && dev_priv->card_type >= NV_50 &&
+	    !(block->flags & NOUVEAU_MEM_NOVM)) {
+		struct nouveau_gpuobj *pt = dev_priv->vm_vram_pt;
+		unsigned offset = block->start;
+		unsigned count = block->size / 65536;
+
+		if (!pt) {
+			DRM_ERROR("vm free without vm pt\n");
+			goto out_free;
+		}
+
+		while (count--) {
+			unsigned pte = offset / 65536;
+			INSTANCE_WR(pt, (pte * 2) + 0, 0);
+			INSTANCE_WR(pt, (pte * 2) + 1, 0);
+			offset += 65536;
+		}
+	}
+
+out_free:
 	nouveau_mem_free_block(block);
 }
 
@@ -670,6 +745,9 @@ int nouveau_ioctl_mem_alloc(struct drm_device *dev, void *data, struct drm_file
 
 	NOUVEAU_CHECK_INITIALISED_WITH_RETURN;
 
+	if (alloc->flags & NOUVEAU_MEM_INTERNAL)
+		return -EINVAL;
+
 	block=nouveau_mem_alloc(dev, alloc->alignment, alloc->size,
 				alloc->flags, file_priv);
 	if (!block)
diff --git a/shared-core/nouveau_object.c b/shared-core/nouveau_object.c
index b6bf759d..09f9027a 100644
--- a/shared-core/nouveau_object.c
+++ b/shared-core/nouveau_object.c
@@ -983,7 +983,11 @@ nouveau_gpuobj_channel_init(struct nouveau_channel *chan,
 			return ret;
 	}
 
-	/* NV50 VM, point offset 0-512MiB at shared PCIEGART table  */
+	/* NV50 VM
+	 *  - Allocate per-channel page-directory
+	 *  - Point offset 0-512MiB at shared PCIEGART table
+	 *  - Point offset 512-1024MiB at shared VRAM table
+	 */
 	if (dev_priv->card_type >= NV_50) {
 		uint32_t vm_offset;
 
@@ -1004,6 +1008,14 @@ nouveau_gpuobj_channel_init(struct nouveau_channel *chan,
 		INSTANCE_WR(chan->vm_pd, (0+0)/4,
 			    chan->vm_gart_pt->instance | 0x03);
 		INSTANCE_WR(chan->vm_pd, (0+4)/4, 0x00000000);
+
+		if ((ret = nouveau_gpuobj_ref_add(dev, NULL, 0,
+						  dev_priv->vm_vram_pt,
+						  &chan->vm_vram_pt)))
+			return ret;
+		INSTANCE_WR(chan->vm_pd, (8+0)/4,
+			    chan->vm_vram_pt->instance | 0x61);
+		INSTANCE_WR(chan->vm_pd, (8+4)/4, 0x00000000);
 	}
 
 	/* RAMHT */
@@ -1022,6 +1034,17 @@ nouveau_gpuobj_channel_init(struct nouveau_channel *chan,
 	}
 
 	/* VRAM ctxdma */
+	if (dev_priv->card_type >= NV_50) {
+		ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY,
+					     512*1024*1024,
+					     dev_priv->fb_available_size,
+					     NV_DMA_ACCESS_RW,
+					     NV_DMA_TARGET_AGP, &vram);
+		if (ret) {
+			DRM_ERROR("Error creating VRAM ctxdma: %d\n", ret);
+			return ret;
+		}
+	} else
 	if ((ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY,
 					  0, dev_priv->fb_available_size,
 					  NV_DMA_ACCESS_RW,
@@ -1084,6 +1107,7 @@ nouveau_gpuobj_channel_takedown(struct nouveau_channel *chan)
 
 	nouveau_gpuobj_del(dev, &chan->vm_pd);
 	nouveau_gpuobj_ref_del(dev, &chan->vm_gart_pt);
+	nouveau_gpuobj_ref_del(dev, &chan->vm_vram_pt);
 
 	if (chan->ramin_heap)
 		nouveau_mem_takedown(&chan->ramin_heap);
diff --git a/shared-core/nouveau_state.c b/shared-core/nouveau_state.c
index 12162167..5ed16d7a 100644
--- a/shared-core/nouveau_state.c
+++ b/shared-core/nouveau_state.c
@@ -384,6 +384,7 @@ static void nouveau_card_takedown(struct drm_device *dev)
 		nouveau_sgdma_takedown(dev);
 
 		nouveau_gpuobj_takedown(dev);
+		nouveau_gpuobj_del(dev, &dev_priv->vm_vram_pt);
 
 		nouveau_mem_close(dev);
 		engine->instmem.takedown(dev);
diff --git a/shared-core/nv50_instmem.c b/shared-core/nv50_instmem.c
index 9687ecbb..b7a51f09 100644
--- a/shared-core/nv50_instmem.c
+++ b/shared-core/nv50_instmem.c
@@ -243,7 +243,8 @@ nv50_instmem_populate(struct drm_device *dev, struct nouveau_gpuobj *gpuobj, uin
 		return -EINVAL;
 
 	gpuobj->im_backing = nouveau_mem_alloc(dev, NV50_INSTMEM_PAGE_SIZE,
-					       *sz, NOUVEAU_MEM_FB,
+					       *sz, NOUVEAU_MEM_FB |
+					       NOUVEAU_MEM_NOVM,
 					       (struct drm_file *)-2);
 	if (!gpuobj->im_backing) {
 		DRM_ERROR("Couldn't allocate vram to back PRAMIN pages\n");
author	Ben Skeggs <skeggsb@gmail.com>	2008-03-12 23:37:29 +1100
committer	Ben Skeggs <skeggsb@gmail.com>	2008-03-13 00:23:52 +1100
commit	1766e1c07b03c6ccf545469663334be762c0bddf (patch)
tree	e60ee35ad5f843a882a1f4b65700ab66eea76c60 /shared-core
parent	88bd1e4a350d011ec44f6786e0bfdf8fb386800c (diff)