6 files changed, 114 insertions, 2 deletions
diff --git a/shared-core/nouveau_drm.h b/shared-core/nouveau_drm.h
index 5f07fcbc..cf762052 100644
--- a/shared-core/nouveau_drm.h
+++ b/shared-core/nouveau_drm.h
@@ -87,6 +87,10 @@ struct drm_nouveau_gpuobj_free {
 #define NOUVEAU_MEM_MAPPED		0x00000100
 #define NOUVEAU_MEM_INSTANCE		0x00000200 /* internal */
 #define NOUVEAU_MEM_NOTIFIER            0x00000400 /* internal */
+#define NOUVEAU_MEM_NOVM		0x00000800 /* internal */
+#define NOUVEAU_MEM_INTERNAL (NOUVEAU_MEM_INSTANCE | \
+			      NOUVEAU_MEM_NOTIFIER | \
+			      NOUVEAU_MEM_NOVM)
 
 struct drm_nouveau_mem_alloc {
 	int flags;
diff --git a/shared-core/nouveau_drv.h b/shared-core/nouveau_drv.h
index 4184aa5b..a51e552c 100644
--- a/shared-core/nouveau_drv.h
+++ b/shared-core/nouveau_drv.h
@@ -136,6 +136,7 @@ struct nouveau_channel
 	/* NV50 VM */
 	struct nouveau_gpuobj     *vm_pd;
 	struct nouveau_gpuobj_ref *vm_gart_pt;
+	struct nouveau_gpuobj_ref *vm_vram_pt;
 
 	/* Objects */
 	struct nouveau_gpuobj_ref *ramin; /* Private instmem */
@@ -290,6 +291,9 @@ struct drm_nouveau_private {
 		unsigned long sg_handle;
 	} gart_info;
 
+	/* G8x global VRAM page table */
+	struct nouveau_gpuobj *vm_vram_pt;
+
 	/* the mtrr covering the FB */
 	int fb_mtrr;
 
diff --git a/shared-core/nouveau_mem.c b/shared-core/nouveau_mem.c
index 4e80ca46..2cf8807d 100644
--- a/shared-core/nouveau_mem.c
+++ b/shared-core/nouveau_mem.c
@@ -468,6 +468,11 @@ int nouveau_mem_init(struct drm_device *dev)
 	/* Init FB */
 	dev_priv->fb_phys=drm_get_resource_start(dev,1);
 	fb_size = nouveau_mem_fb_amount(dev);
+	/* On G80, limit VRAM to 512MiB temporarily due to limits in how
+	 * we handle VRAM page tables.
+	 */
+	if (dev_priv->card_type >= NV_50 && fb_size > (512 * 1024 * 1024))
+		fb_size = (512 * 1024 * 1024);
 	/* On at least NV40, RAMIN is actually at the end of vram.
 	 * We don't want to allocate this... */
 	if (dev_priv->card_type >= NV_40)
@@ -540,6 +545,21 @@ int nouveau_mem_init(struct drm_device *dev)
 		}
 	}
 
+	/* G8x: Allocate shared page table to map real VRAM pages into */
+	if (dev_priv->card_type >= NV_50) {
+		unsigned size = ((512 * 1024 * 1024) / 65536) * 8;
+
+		ret = nouveau_gpuobj_new(dev, NULL, size, 0,
+					 NVOBJ_FLAG_ZERO_ALLOC |
+					 NVOBJ_FLAG_ALLOW_NO_REFS,
+					 &dev_priv->vm_vram_pt);
+		if (ret) {
+			DRM_ERROR("Error creating VRAM page table: %d\n", ret);
+			return ret;
+		}
+	}
+
+
 	return 0;
 }
 
@@ -558,6 +578,12 @@ struct mem_block* nouveau_mem_alloc(struct drm_device *dev, int alignment,
 	if (alignment < PAGE_SHIFT)
 		alignment = PAGE_SHIFT;
 
+	/* Align allocation sizes to 64KiB blocks on G8x.  We use a 64KiB
+	 * page size in the GPU VM.
+	 */
+	if (flags & NOUVEAU_MEM_FB && dev_priv->card_type >= NV_50)
+		size = (size + (64 * 1024)) & ~((64 * 1024) - 1);
+
 	/*
 	 * Warn about 0 sized allocations, but let it go through. It'll return 1 page
 	 */
@@ -612,6 +638,30 @@ struct mem_block* nouveau_mem_alloc(struct drm_device *dev, int alignment,
 alloc_ok:
 	block->flags=type;
 
+	/* On G8x, map memory into VM */
+	if (block->flags & NOUVEAU_MEM_FB && dev_priv->card_type >= NV_50 &&
+	    !(flags & NOUVEAU_MEM_NOVM)) {
+		struct nouveau_gpuobj *pt = dev_priv->vm_vram_pt;
+		unsigned offset = block->start;
+		unsigned count = block->size / 65536;
+
+		if (!pt) {
+			DRM_ERROR("vm alloc without vm pt\n");
+			nouveau_mem_free_block(block);
+			return NULL;
+		}
+
+		while (count--) {
+			unsigned pte = offset / 65536;
+
+			INSTANCE_WR(pt, (pte * 2) + 0, offset | 1);
+			INSTANCE_WR(pt, (pte * 2) + 1, 0x00000000);
+			offset += 65536;
+		}
+	} else {
+		block->flags |= NOUVEAU_MEM_NOVM;
+	}	
+
 	if (flags&NOUVEAU_MEM_MAPPED)
 	{
 		struct drm_map_list *entry;
@@ -653,9 +703,34 @@ alloc_ok:
 
 void nouveau_mem_free(struct drm_device* dev, struct mem_block* block)
 {
+	struct drm_nouveau_private *dev_priv = dev->dev_private;
+
 	DRM_DEBUG("freeing 0x%llx type=0x%08x\n", block->start, block->flags);
+
 	if (block->flags&NOUVEAU_MEM_MAPPED)
 		drm_rmmap(dev, block->map);
+
+	/* G8x: Remove pages from vm */
+	if (block->flags & NOUVEAU_MEM_FB && dev_priv->card_type >= NV_50 &&
+	    !(block->flags & NOUVEAU_MEM_NOVM)) {
+		struct nouveau_gpuobj *pt = dev_priv->vm_vram_pt;
+		unsigned offset = block->start;
+		unsigned count = block->size / 65536;
+
+		if (!pt) {
+			DRM_ERROR("vm free without vm pt\n");
+			goto out_free;
+		}
+
+		while (count--) {
+			unsigned pte = offset / 65536;
+			INSTANCE_WR(pt, (pte * 2) + 0, 0);
+			INSTANCE_WR(pt, (pte * 2) + 1, 0);
+			offset += 65536;
+		}
+	}
+
+out_free:
 	nouveau_mem_free_block(block);
 }
 
@@ -670,6 +745,9 @@ int nouveau_ioctl_mem_alloc(struct drm_device *dev, void *data, struct drm_file
 
 	NOUVEAU_CHECK_INITIALISED_WITH_RETURN;
 
+	if (alloc->flags & NOUVEAU_MEM_INTERNAL)
+		return -EINVAL;
+
 	block=nouveau_mem_alloc(dev, alloc->alignment, alloc->size,
 				alloc->flags, file_priv);
 	if (!block)
diff --git a/shared-core/nouveau_object.c b/shared-core/nouveau_object.c
index b6bf759d..09f9027a 100644
--- a/shared-core/nouveau_object.c
+++ b/shared-core/nouveau_object.c
@@ -983,7 +983,11 @@ nouveau_gpuobj_channel_init(struct nouveau_channel *chan,
 			return ret;
 	}
 
-	/* NV50 VM, point offset 0-512MiB at shared PCIEGART table  */
+	/* NV50 VM
+	 *  - Allocate per-channel page-directory
+	 *  - Point offset 0-512MiB at shared PCIEGART table
+	 *  - Point offset 512-1024MiB at shared VRAM table
+	 */
 	if (dev_priv->card_type >= NV_50) {
 		uint32_t vm_offset;
 
@@ -1004,6 +1008,14 @@ nouveau_gpuobj_channel_init(struct nouveau_channel *chan,
 		INSTANCE_WR(chan->vm_pd, (0+0)/4,
 			    chan->vm_gart_pt->instance | 0x03);
 		INSTANCE_WR(chan->vm_pd, (0+4)/4, 0x00000000);
+
+		if ((ret = nouveau_gpuobj_ref_add(dev, NULL, 0,
+						  dev_priv->vm_vram_pt,
+						  &chan->vm_vram_pt)))
+			return ret;
+		INSTANCE_WR(chan->vm_pd, (8+0)/4,
+			    chan->vm_vram_pt->instance | 0x61);
+		INSTANCE_WR(chan->vm_pd, (8+4)/4, 0x00000000);
 	}
 
 	/* RAMHT */
@@ -1022,6 +1034,17 @@ nouveau_gpuobj_channel_init(struct nouveau_channel *chan,
 	}
 
 	/* VRAM ctxdma */
+	if (dev_priv->card_type >= NV_50) {
+		ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY,
+					     512*1024*1024,
+					     dev_priv->fb_available_size,
+					     NV_DMA_ACCESS_RW,
+					     NV_DMA_TARGET_AGP, &vram);
+		if (ret) {
+			DRM_ERROR("Error creating VRAM ctxdma: %d\n", ret);
+			return ret;
+		}
+	} else
 	if ((ret = nouveau_gpuobj_dma_new(chan, NV_CLASS_DMA_IN_MEMORY,
 					  0, dev_priv->fb_available_size,
 					  NV_DMA_ACCESS_RW,
@@ -1084,6 +1107,7 @@ nouveau_gpuobj_channel_takedown(struct nouveau_channel *chan)
 
 	nouveau_gpuobj_del(dev, &chan->vm_pd);
 	nouveau_gpuobj_ref_del(dev, &chan->vm_gart_pt);
+	nouveau_gpuobj_ref_del(dev, &chan->vm_vram_pt);
 
 	if (chan->ramin_heap)
 		nouveau_mem_takedown(&chan->ramin_heap);
diff --git a/shared-core/nouveau_state.c b/shared-core/nouveau_state.c
index 12162167..5ed16d7a 100644
--- a/shared-core/nouveau_state.c
+++ b/shared-core/nouveau_state.c
@@ -384,6 +384,7 @@ static void nouveau_card_takedown(struct drm_device *dev)
 		nouveau_sgdma_takedown(dev);
 
 		nouveau_gpuobj_takedown(dev);
+		nouveau_gpuobj_del(dev, &dev_priv->vm_vram_pt);
 
 		nouveau_mem_close(dev);
 		engine->instmem.takedown(dev);
diff --git a/shared-core/nv50_instmem.c b/shared-core/nv50_instmem.c
index 9687ecbb..b7a51f09 100644
--- a/shared-core/nv50_instmem.c
+++ b/shared-core/nv50_instmem.c
@@ -243,7 +243,8 @@ nv50_instmem_populate(struct drm_device *dev, struct nouveau_gpuobj *gpuobj, uin
 		return -EINVAL;
 
 	gpuobj->im_backing = nouveau_mem_alloc(dev, NV50_INSTMEM_PAGE_SIZE,
-					       *sz, NOUVEAU_MEM_FB,
+					       *sz, NOUVEAU_MEM_FB |
+					       NOUVEAU_MEM_NOVM,
 					       (struct drm_file *)-2);
 	if (!gpuobj->im_backing) {
 		DRM_ERROR("Couldn't allocate vram to back PRAMIN pages\n");