From c806bba4665bb369168ee0b453fa28e2e0bf2a5d Mon Sep 17 00:00:00 2001
From: Ben Skeggs <skeggsb@gmail.com>
Date: Thu, 5 Jul 2007 00:12:33 +1000
Subject: nouveau/nv50: Initial channel/object support

Should be OK on G84 for a single channel, multiple channels *almost* work.

Untested on G80.
---
 shared-core/nouveau_drv.h      |  46 ++++++--
 shared-core/nouveau_fifo.c     |  99 +++++++++-------
 shared-core/nouveau_irq.c      |  15 ++-
 shared-core/nouveau_mem.c      | 143 +---------------------
 shared-core/nouveau_notifier.c |   6 +-
 shared-core/nouveau_object.c   |  70 ++++++-----
 shared-core/nouveau_reg.h      |  12 +-
 shared-core/nouveau_state.c    |  41 ++++++-
 shared-core/nv04_fifo.c        |  13 +-
 shared-core/nv04_graph.c       |  10 +-
 shared-core/nv04_instmem.c     | 165 ++++++++++++++++++++++++++
 shared-core/nv10_fifo.c        |  12 +-
 shared-core/nv10_graph.c       |   8 +-
 shared-core/nv20_graph.c       |   8 +-
 shared-core/nv30_graph.c       |   8 +-
 shared-core/nv40_fifo.c        |  12 +-
 shared-core/nv40_graph.c       |   8 +-
 shared-core/nv50_fifo.c        | 259 +++++++++++++++++++++++++++++++++++++++-
 shared-core/nv50_graph.c       | 231 ++++++++++++++++++++++++++++++++++--
 shared-core/nv50_instmem.c     | 262 +++++++++++++++++++++++++++++++++++++++++
 20 files changed, 1152 insertions(+), 276 deletions(-)
 create mode 100644 shared-core/nv04_instmem.c
 create mode 100644 shared-core/nv50_instmem.c

(limited to 'shared-core')

diff --git a/shared-core/nouveau_drv.h b/shared-core/nouveau_drv.h
index 73793b34..81972db5 100644
--- a/shared-core/nouveau_drv.h
+++ b/shared-core/nouveau_drv.h
@@ -72,6 +72,7 @@ typedef struct nouveau_gpuobj {
 	int im_channel;
 	struct mem_block *im_pramin;
 	struct mem_block *im_backing;
+	int im_bound;
 
 	uint32_t flags;
 	int refcount;
@@ -92,7 +93,6 @@ typedef struct nouveau_gpuobj_ref {
 
 struct nouveau_fifo
 {
-	int used;
 	/* owner of this fifo */
 	DRMFILE filp;
 	/* mapping of the fifo itself */
@@ -101,8 +101,8 @@ struct nouveau_fifo
 	drm_local_map_t *regs;
 
 	/* DMA push buffer */
-	struct mem_block     *cmdbuf_mem;
 	nouveau_gpuobj_ref_t *pushbuf;
+	struct mem_block     *pushbuf_mem;
 	uint32_t              pushbuf_base;
 
 	/* Notifier memory */
@@ -132,6 +132,19 @@ struct nouveau_config {
 };
 
 typedef struct nouveau_engine_func {
+	struct {
+		void	*priv;
+
+		int	(*init)(drm_device_t *dev);
+		void	(*takedown)(drm_device_t *dev);
+
+		int	(*populate)(drm_device_t *, nouveau_gpuobj_t *,
+				    uint32_t *size);
+		void	(*clear)(drm_device_t *, nouveau_gpuobj_t *);
+		int	(*bind)(drm_device_t *, nouveau_gpuobj_t *);
+		int	(*unbind)(drm_device_t *, nouveau_gpuobj_t *);
+	} instmem;
+
 	struct {
 		int	(*init)(drm_device_t *dev);
 		void	(*takedown)(drm_device_t *dev);
@@ -158,6 +171,8 @@ typedef struct nouveau_engine_func {
 	} graph;
 
 	struct {
+		void	*priv;
+
 		int	(*init)(drm_device_t *);
 		void	(*takedown)(drm_device_t *);
 
@@ -180,13 +195,13 @@ typedef struct drm_nouveau_private {
 	drm_local_map_t *ramin; /* NV40 onwards */
 
 	int fifo_alloc_count;
-	struct nouveau_fifo fifos[NV_MAX_FIFO_NUMBER];
+	struct nouveau_fifo *fifos[NV_MAX_FIFO_NUMBER];
 
 	struct nouveau_engine_func Engine;
 
 	/* RAMIN configuration, RAMFC, RAMHT and RAMRO offsets */
 	nouveau_gpuobj_t *ramht;
-	uint32_t ramin_size;
+	uint32_t ramin_rsvd_vram;
 	uint32_t ramht_offset;
 	uint32_t ramht_size;
 	uint32_t ramht_bits;
@@ -246,11 +261,6 @@ extern struct mem_block* nouveau_mem_alloc(struct drm_device *dev, int alignment
 extern void              nouveau_mem_free(struct drm_device* dev, struct mem_block*);
 extern int               nouveau_mem_init(struct drm_device *dev);
 extern void              nouveau_mem_close(struct drm_device *dev);
-extern int               nouveau_instmem_init(struct drm_device *dev);
-extern struct mem_block* nouveau_instmem_alloc(struct drm_device *dev,
-					       uint32_t size, uint32_t align);
-extern void              nouveau_instmem_free(struct drm_device *dev,
-					      struct mem_block *block);
 
 /* nouveau_notifier.c */
 extern int  nouveau_notifier_init_channel(drm_device_t *, int channel, DRMFILE);
@@ -386,6 +396,24 @@ extern void nv50_graph_destroy_context(drm_device_t *, int channel);
 extern int  nv50_graph_load_context(drm_device_t *, int channel);
 extern int  nv50_graph_save_context(drm_device_t *, int channel);
 
+/* nv04_instmem.c */
+extern int  nv04_instmem_init(drm_device_t *dev);
+extern void nv04_instmem_takedown(drm_device_t *dev);
+extern int  nv04_instmem_populate(drm_device_t*, nouveau_gpuobj_t*,
+				  uint32_t *size);
+extern void nv04_instmem_clear(drm_device_t*, nouveau_gpuobj_t*);
+extern int  nv04_instmem_bind(drm_device_t*, nouveau_gpuobj_t*);
+extern int  nv04_instmem_unbind(drm_device_t*, nouveau_gpuobj_t*);
+
+/* nv50_instmem.c */
+extern int  nv50_instmem_init(drm_device_t *dev);
+extern void nv50_instmem_takedown(drm_device_t *dev);
+extern int  nv50_instmem_populate(drm_device_t*, nouveau_gpuobj_t*,
+				  uint32_t *size);
+extern void nv50_instmem_clear(drm_device_t*, nouveau_gpuobj_t*);
+extern int  nv50_instmem_bind(drm_device_t*, nouveau_gpuobj_t*);
+extern int  nv50_instmem_unbind(drm_device_t*, nouveau_gpuobj_t*);
+
 /* nv04_mc.c */
 extern int  nv04_mc_init(drm_device_t *dev);
 extern void nv04_mc_takedown(drm_device_t *dev);
diff --git a/shared-core/nouveau_fifo.c b/shared-core/nouveau_fifo.c
index 9f916307..c140a634 100644
--- a/shared-core/nouveau_fifo.c
+++ b/shared-core/nouveau_fifo.c
@@ -39,6 +39,8 @@ int nouveau_fifo_number(drm_device_t* dev)
 		case NV_04:
 		case NV_05:
 			return 16;
+		case NV_50:
+			return 128;
 		default:
 			return 32;
 	}
@@ -186,7 +188,7 @@ static int
 nouveau_fifo_cmdbuf_alloc(struct drm_device *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
 	struct nouveau_config *config = &dev_priv->config;
 	struct mem_block *cb;
 	int cb_min_size = max(NV03_FIFO_SIZE,PAGE_SIZE);
@@ -242,8 +244,8 @@ nouveau_fifo_cmdbuf_alloc(struct drm_device *dev, int channel)
 		return ret;
 	}
 
-	dev_priv->fifos[channel].pushbuf_base = 0;
-	dev_priv->fifos[channel].cmdbuf_mem = cb;
+	dev_priv->fifos[channel]->pushbuf_base = 0;
+	dev_priv->fifos[channel]->pushbuf_mem = cb;
 	return 0;
 }
 
@@ -265,22 +267,27 @@ int nouveau_fifo_alloc(drm_device_t* dev, int *chan_ret, DRMFILE filp,
 	 * (woo, full userspace command submission !)
 	 * When there are no more contexts, you lost
 	 */
-	for(channel=0; channel<nouveau_fifo_number(dev); channel++)
-		if (dev_priv->fifos[channel].used==0)
+	for(channel=0; channel<nouveau_fifo_number(dev); channel++) {
+		if ((dev_priv->card_type == NV_50) && (channel == 0))
+			continue;
+		if (dev_priv->fifos[channel] == NULL)
 			break;
+	}
 	/* no more fifos. you lost. */
 	if (channel==nouveau_fifo_number(dev))
 		return DRM_ERR(EINVAL);
 	(*chan_ret) = channel;
-	chan = &dev_priv->fifos[channel];
-	memset(chan, sizeof(*chan), 0);
-
-	DRM_INFO("Allocating FIFO number %d\n", channel);
 
-	/* that fifo is used */
-	chan->used = 1;
+	dev_priv->fifos[channel] = drm_calloc(1, sizeof(struct nouveau_fifo),
+					      DRM_MEM_DRIVER);
+	if (!dev_priv->fifos[channel])
+		return DRM_ERR(ENOMEM);
+	dev_priv->fifo_alloc_count++;
+	chan = dev_priv->fifos[channel];
 	chan->filp = filp;
 
+	DRM_INFO("Allocating FIFO number %d\n", channel);
+
 	/* Setup channel's default objects */
 	ret = nouveau_gpuobj_channel_init(dev, channel, vram_handle, tt_handle);
 	if (ret) {
@@ -324,17 +331,19 @@ int nouveau_fifo_alloc(drm_device_t* dev, int *chan_ret, DRMFILE filp,
 		return ret;
 	}
 
-	/* enable the fifo dma operation */
-	NV_WRITE(NV04_PFIFO_MODE,NV_READ(NV04_PFIFO_MODE)|(1<<channel));
-
 	/* setup channel's default get/put values */
-	NV_WRITE(NV03_FIFO_REGS_DMAPUT(channel), chan->pushbuf_base);
-	NV_WRITE(NV03_FIFO_REGS_DMAGET(channel), chan->pushbuf_base);
+	if (dev_priv->card_type < NV_50) {
+		NV_WRITE(NV03_FIFO_REGS_DMAPUT(channel), chan->pushbuf_base);
+		NV_WRITE(NV03_FIFO_REGS_DMAGET(channel), chan->pushbuf_base);
+	} else {
+		NV_WRITE(NV50_FIFO_REGS_DMAPUT(channel), chan->pushbuf_base);
+		NV_WRITE(NV50_FIFO_REGS_DMAGET(channel), chan->pushbuf_base);
+	}
 
 	/* If this is the first channel, setup PFIFO ourselves.  For any
 	 * other case, the GPU will handle this when it switches contexts.
 	 */
-	if (dev_priv->fifo_alloc_count == 0) {
+	if (dev_priv->fifo_alloc_count == 1) {
 		ret = engine->fifo.load_context(dev, channel);
 		if (ret) {
 			nouveau_fifo_free(dev, channel);
@@ -352,7 +361,7 @@ int nouveau_fifo_alloc(drm_device_t* dev, int *chan_ret, DRMFILE filp,
 		 * set.  Proper fix would be to find which object+method is
 		 * responsible for modifying this state.
 		 */
-		if (dev_priv->chipset >= 0x10) {
+		if (dev_priv->chipset >= 0x10 && dev_priv->chipset < 0x50) {
 			uint32_t tmp;
 			tmp = NV_READ(NV10_PGRAPH_SURFACE) & 0x0007ff00;
 			NV_WRITE(NV10_PGRAPH_SURFACE, tmp);
@@ -361,15 +370,14 @@ int nouveau_fifo_alloc(drm_device_t* dev, int *chan_ret, DRMFILE filp,
 		}
 	}
 
-	NV_WRITE(NV04_PFIFO_CACHE1_DMA_PUSH, 0x00000001);
+	NV_WRITE(NV04_PFIFO_CACHE1_DMA_PUSH,
+		 NV_READ(NV04_PFIFO_CACHE1_DMA_PUSH) | 1);
 	NV_WRITE(NV03_PFIFO_CACHE1_PUSH0, 0x00000001);
 	NV_WRITE(NV04_PFIFO_CACHE1_PULL0, 0x00000001);
 	NV_WRITE(NV04_PFIFO_CACHE1_PULL1, 0x00000001);
 
 	/* reenable the fifo caches */
-	NV_WRITE(NV03_PFIFO_CACHES, 0x00000001);
-
-	dev_priv->fifo_alloc_count++;
+	NV_WRITE(NV03_PFIFO_CACHES, 1);
 
 	DRM_INFO("%s: initialised FIFO %d\n", __func__, channel);
 	return 0;
@@ -380,17 +388,20 @@ void nouveau_fifo_free(drm_device_t* dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
 	nouveau_engine_func_t *engine = &dev_priv->Engine;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
+
+	if (!chan) {
+		DRM_ERROR("Freeing non-existant channel %d\n", channel);
+		return;
+	}
 
-	chan->used = 0;
 	DRM_INFO("%s: freeing fifo %d\n", __func__, channel);
 
 	/* disable the fifo caches */
 	NV_WRITE(NV03_PFIFO_CACHES, 0x00000000);
 
-	NV_WRITE(NV04_PFIFO_MODE, NV_READ(NV04_PFIFO_MODE)&~(1<<channel));
 	// FIXME XXX needs more code
-	
+
 	engine->fifo.destroy_context(dev, channel);
 
 	/* Cleanup PGRAPH state */
@@ -399,13 +410,11 @@ void nouveau_fifo_free(drm_device_t* dev, int channel)
 	/* reenable the fifo caches */
 	NV_WRITE(NV03_PFIFO_CACHES, 0x00000001);
 
-	/* Deallocate command buffer */
-	if (chan->pushbuf)
-		nouveau_gpuobj_ref_del(dev, &chan->pushbuf);
-
-	if (chan->cmdbuf_mem) {
-		nouveau_mem_free(dev, chan->cmdbuf_mem);
-		chan->cmdbuf_mem = NULL;
+	/* Deallocate push buffer */
+	nouveau_gpuobj_ref_del(dev, &chan->pushbuf);
+	if (chan->pushbuf_mem) {
+		nouveau_mem_free(dev, chan->pushbuf_mem);
+		chan->pushbuf_mem = NULL;
 	}
 
 	nouveau_notifier_takedown_channel(dev, channel);
@@ -413,7 +422,9 @@ void nouveau_fifo_free(drm_device_t* dev, int channel)
 	/* Destroy objects belonging to the channel */
 	nouveau_gpuobj_channel_takedown(dev, channel);
 
+	dev_priv->fifos[channel] = NULL;
 	dev_priv->fifo_alloc_count--;
+	drm_free(chan, sizeof(*chan), DRM_MEM_DRIVER);
 }
 
 /* cleanups all the fifos from filp */
@@ -424,7 +435,7 @@ void nouveau_fifo_cleanup(drm_device_t* dev, DRMFILE filp)
 
 	DRM_DEBUG("clearing FIFO enables from filp\n");
 	for(i=0;i<nouveau_fifo_number(dev);i++)
-		if (dev_priv->fifos[i].used && dev_priv->fifos[i].filp==filp)
+		if (dev_priv->fifos[i] && dev_priv->fifos[i]->filp==filp)
 			nouveau_fifo_free(dev,i);
 }
 
@@ -435,9 +446,9 @@ nouveau_fifo_owner(drm_device_t *dev, DRMFILE filp, int channel)
 
 	if (channel >= nouveau_fifo_number(dev))
 		return 0;
-	if (dev_priv->fifos[channel].used == 0)
+	if (dev_priv->fifos[channel] == NULL)
 		return 0;
-	return (dev_priv->fifos[channel].filp == filp);
+	return (dev_priv->fifos[channel]->filp == filp);
 }
 
 /***********************************
@@ -460,22 +471,28 @@ static int nouveau_ioctl_fifo_alloc(DRM_IOCTL_ARGS)
 				 init.tt_ctxdma_handle);
 	if (res)
 		return res;
-	chan = &dev_priv->fifos[init.channel];
+	chan = dev_priv->fifos[init.channel];
 
 	init.put_base = chan->pushbuf_base;
 
 	/* make the fifo available to user space */
 	/* first, the fifo control regs */
-	init.ctrl      = dev_priv->mmio->offset + NV03_FIFO_REGS(init.channel);
-	init.ctrl_size = NV03_FIFO_REGS_SIZE;
+	init.ctrl = dev_priv->mmio->offset;
+	if (dev_priv->card_type < NV_50) {
+		init.ctrl      += NV03_FIFO_REGS(init.channel);
+		init.ctrl_size  = NV03_FIFO_REGS_SIZE;
+	} else {
+		init.ctrl      += NV50_FIFO_REGS(init.channel);
+		init.ctrl_size  = NV50_FIFO_REGS_SIZE;
+	}
 	res = drm_addmap(dev, init.ctrl, init.ctrl_size, _DRM_REGISTERS,
 			 0, &chan->regs);
 	if (res != 0)
 		return res;
 
 	/* pass back FIFO map info to the caller */
-	init.cmdbuf      = chan->cmdbuf_mem->start;
-	init.cmdbuf_size = chan->cmdbuf_mem->size;
+	init.cmdbuf      = chan->pushbuf_mem->start;
+	init.cmdbuf_size = chan->pushbuf_mem->size;
 
 	/* and the notifier block */
 	init.notifier      = chan->notifier_block->start;
diff --git a/shared-core/nouveau_irq.c b/shared-core/nouveau_irq.c
index 72b12e0c..b4102dd8 100644
--- a/shared-core/nouveau_irq.c
+++ b/shared-core/nouveau_irq.c
@@ -251,22 +251,25 @@ nouveau_graph_dump_trap_info(drm_device_t *dev)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
 	uint32_t address;
-	uint32_t channel;
+	uint32_t channel, class;
 	uint32_t method, subc, data;
 
 	address = NV_READ(0x400704);
-	data    = NV_READ(0x400708);
 	channel = (address >> 20) & 0x1F;
 	subc    = (address >> 16) & 0x7;
 	method  = address & 0x1FFC;
+	data    = NV_READ(0x400708);
+	if (dev_priv->card_type < NV_50) {
+		class = NV_READ(0x400160 + subc*4) & 0xFFFF;
+	} else {
+		class = NV_READ(0x400814);
+	}
 
 	DRM_ERROR("NV: nSource: 0x%08x, nStatus: 0x%08x\n",
 			NV_READ(0x400108), NV_READ(0x400104));
 	DRM_ERROR("NV: Channel %d/%d (class 0x%04x) -"
 			"Method 0x%04x, Data 0x%08x\n",
-			channel, subc,
-			NV_READ(0x400160+subc*4) & 0xFFFF,
-			method, data
+			channel, subc, class, method, data
 		 );
 }
 
@@ -294,7 +297,7 @@ static void nouveau_pgraph_irq_handler(drm_device_t *dev)
 			instance = NV_READ(0x00400158);
 			notify   = NV_READ(0x00400150) >> 16;
 			DRM_DEBUG("instance:0x%08x\tnotify:0x%08x\n",
-					nsource, nstatus);
+				  instance, notify);
 		}
 
 		status &= ~NV_PGRAPH_INTR_NOTIFY;
diff --git a/shared-core/nouveau_mem.c b/shared-core/nouveau_mem.c
index 49041862..c75a9356 100644
--- a/shared-core/nouveau_mem.c
+++ b/shared-core/nouveau_mem.c
@@ -353,7 +353,7 @@ no_agp:
 	/* On at least NV40, RAMIN is actually at the end of vram.
 	 * We don't want to allocate this... */
 	if (dev_priv->card_type >= NV_40)
-		fb_size -= dev_priv->ramin_size;
+		fb_size -= dev_priv->ramin_rsvd_vram;
 	dev_priv->fb_available_size = fb_size;
 	DRM_DEBUG("Available VRAM: %dKiB\n", fb_size>>10);
 
@@ -463,147 +463,6 @@ void nouveau_mem_free(struct drm_device* dev, struct mem_block* block)
 	nouveau_mem_free_block(block);
 }
 
-static void
-nouveau_instmem_determine_amount(struct drm_device *dev)
-{
-	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	int i;
-
-	/* Figure out how much instance memory we need */
-	switch (dev_priv->card_type) {
-	case NV_40:
-		/* We'll want more instance memory than this on some NV4x cards.
-		 * There's a 16MB aperture to play with that maps onto the end
-		 * of vram.  For now, only reserve a small piece until we know
-		 * more about what each chipset requires.
-		 */
-		dev_priv->ramin_size = (1*1024* 1024);
-		break;
-	default:
-		/*XXX: what *are* the limits on <NV40 cards?, and does RAMIN
-		 *     exist in vram on those cards as well?
-		 */
-		dev_priv->ramin_size = (512*1024);
-		break;
-	}
-	DRM_DEBUG("RAMIN size: %dKiB\n", dev_priv->ramin_size>>10);
-
-	/* Clear all of it, except the BIOS image that's in the first 64KiB */
-	for (i=(64*1024); i<dev_priv->ramin_size; i+=4)
-		NV_WI32(i, 0x00000000);
-}
-
-static void
-nouveau_instmem_configure_fixed_tables(struct drm_device *dev)
-{
-	drm_nouveau_private_t *dev_priv = dev->dev_private;
-
-	/* FIFO hash table (RAMHT)
-	 *   use 4k hash table at RAMIN+0x10000
-	 *   TODO: extend the hash table
-	 */
-	dev_priv->ramht_offset = 0x10000;
-	dev_priv->ramht_bits   = 9;
-	dev_priv->ramht_size   = (1 << dev_priv->ramht_bits);
-	DRM_DEBUG("RAMHT offset=0x%x, size=%d\n", dev_priv->ramht_offset,
-						  dev_priv->ramht_size);
-
-	/* FIFO runout table (RAMRO) - 512k at 0x11200 */
-	dev_priv->ramro_offset = 0x11200;
-	dev_priv->ramro_size   = 512;
-	DRM_DEBUG("RAMRO offset=0x%x, size=%d\n", dev_priv->ramro_offset,
-						  dev_priv->ramro_size);
-
-	/* FIFO context table (RAMFC)
-	 *   NV40  : Not sure exactly how to position RAMFC on some cards,
-	 *           0x30002 seems to position it at RAMIN+0x20000 on these
-	 *           cards.  RAMFC is 4kb (32 fifos, 128byte entries).
-	 *   Others: Position RAMFC at RAMIN+0x11400
-	 */
-	switch(dev_priv->card_type)
-	{
-		case NV_50:
-		case NV_40:
-		case NV_44:
-			dev_priv->ramfc_offset = 0x20000;
-			dev_priv->ramfc_size   = nouveau_fifo_number(dev) *
-				nouveau_fifo_ctx_size(dev);
-			break;
-		case NV_30:
-		case NV_20:
-		case NV_17:
-		case NV_10:
-		case NV_04:
-		case NV_03:
-		default:
-			dev_priv->ramfc_offset = 0x11400;
-			dev_priv->ramfc_size   = nouveau_fifo_number(dev) *
-				nouveau_fifo_ctx_size(dev);
-			break;
-	}
-	DRM_DEBUG("RAMFC offset=0x%x, size=%d\n", dev_priv->ramfc_offset,
-						  dev_priv->ramfc_size);
-}
-
-int nouveau_instmem_init(struct drm_device *dev)
-{
-	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	uint32_t offset;
-	int ret = 0;
-
-	nouveau_instmem_determine_amount(dev);
-	nouveau_instmem_configure_fixed_tables(dev);
-
-	if ((ret = nouveau_gpuobj_new_fake(dev, dev_priv->ramht_offset,
-						dev_priv->ramht_size,
-						NVOBJ_FLAG_ZERO_ALLOC |
-						NVOBJ_FLAG_ALLOW_NO_REFS,
-						&dev_priv->ramht, NULL)))
-		return ret;
-
-	/* Create a heap to manage RAMIN allocations, we don't allocate
-	 * the space that was reserved for RAMHT/FC/RO.
-	 */
-	offset = dev_priv->ramfc_offset + dev_priv->ramfc_size;
-	ret = nouveau_mem_init_heap(&dev_priv->ramin_heap,
-				    offset, dev_priv->ramin_size - offset);
-	if (ret) {
-		dev_priv->ramin_heap = NULL;
-		DRM_ERROR("Failed to init RAMIN heap\n");
-	}
-
-	return ret;
-}
-
-struct mem_block *nouveau_instmem_alloc(struct drm_device *dev,
-					uint32_t size, uint32_t align)
-{
-	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	struct mem_block *block;
-
-	if (!dev_priv->ramin_heap) {
-		DRM_ERROR("instmem alloc called without init\n");
-		return NULL;
-	}
-
-	block = nouveau_mem_alloc_block(dev_priv->ramin_heap, size, align,
-					(DRMFILE)-2);
-	if (block) {
-		block->flags = NOUVEAU_MEM_INSTANCE;
-		DRM_DEBUG("instance(size=%d, align=%d) alloc'd at 0x%08x\n",
-				size, (1<<align), (uint32_t)block->start);
-	}
-
-	return block;
-}
-
-void nouveau_instmem_free(struct drm_device *dev, struct mem_block *block)
-{
-	if (dev && block) {
-		nouveau_mem_free_block(block);
-	}
-}
-
 /*
  * Ioctls
  */
diff --git a/shared-core/nouveau_notifier.c b/shared-core/nouveau_notifier.c
index 4d5e26ab..9e792e57 100644
--- a/shared-core/nouveau_notifier.c
+++ b/shared-core/nouveau_notifier.c
@@ -33,7 +33,7 @@ int
 nouveau_notifier_init_channel(drm_device_t *dev, int channel, DRMFILE filp)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
 	int flags, ret;
 
 	/*TODO: PCI notifier blocks */
@@ -58,7 +58,7 @@ void
 nouveau_notifier_takedown_channel(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
 
 	if (chan->notifier_block) {
 		nouveau_mem_free(dev, chan->notifier_block);
@@ -73,7 +73,7 @@ nouveau_notifier_alloc(drm_device_t *dev, int channel, uint32_t handle,
 		       int count, uint32_t *b_offset)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
 	nouveau_gpuobj_t *nobj = NULL;
 	struct mem_block *mem;
 	uint32_t offset;
diff --git a/shared-core/nouveau_object.c b/shared-core/nouveau_object.c
index 79875ca1..a394ae6e 100644
--- a/shared-core/nouveau_object.c
+++ b/shared-core/nouveau_object.c
@@ -76,7 +76,8 @@ nouveau_ramht_hash_handle(drm_device_t *dev, int channel, uint32_t handle)
 		hash ^= (handle & ((1 << dev_priv->ramht_bits) - 1));
 		handle >>= dev_priv->ramht_bits;
 	}
-	hash ^= channel << (dev_priv->ramht_bits - 4);
+	if (dev_priv->card_type < NV_50)
+		hash ^= channel << (dev_priv->ramht_bits - 4);
 	hash <<= 3;
 
 	DRM_DEBUG("ch%d handle=0x%08x hash=0x%08x\n", channel, handle, hash);
@@ -99,7 +100,7 @@ static int
 nouveau_ramht_insert(drm_device_t* dev, nouveau_gpuobj_ref_t *ref)
 {
 	drm_nouveau_private_t *dev_priv=dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[ref->channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[ref->channel];
 	nouveau_gpuobj_t *ramht = chan->ramht ? chan->ramht->gpuobj : NULL;
 	nouveau_gpuobj_t *gpuobj = ref->gpuobj;
 	uint32_t ctx, co, ho;
@@ -148,7 +149,7 @@ static void
 nouveau_ramht_remove(drm_device_t* dev, nouveau_gpuobj_ref_t *ref)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[ref->channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[ref->channel];
 	nouveau_gpuobj_t *ramht = chan->ramht ? chan->ramht->gpuobj : NULL;
 	uint32_t co, ho;
 
@@ -183,9 +184,11 @@ nouveau_gpuobj_new(drm_device_t *dev, int channel, int size, int align,
 		   uint32_t flags, nouveau_gpuobj_t **gpuobj_ret)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	nouveau_engine_func_t *engine = &dev_priv->Engine;
 	struct nouveau_fifo *chan = NULL;
 	nouveau_gpuobj_t *gpuobj;
 	struct mem_block *pramin = NULL;
+	int ret;
 
 	DRM_DEBUG("ch%d size=%d align=%d flags=0x%08x\n",
 		  channel, size, align, flags);
@@ -196,7 +199,7 @@ nouveau_gpuobj_new(drm_device_t *dev, int channel, int size, int align,
 	if (channel >= 0) {
 		if (channel > nouveau_fifo_number(dev))
 			return DRM_ERR(EINVAL);
-		chan = &dev_priv->fifos[channel];
+		chan = dev_priv->fifos[channel];
 	}
 
 	gpuobj = drm_calloc(1, sizeof(*gpuobj), DRM_MEM_DRIVER);
@@ -230,6 +233,11 @@ nouveau_gpuobj_new(drm_device_t *dev, int channel, int size, int align,
 		return DRM_ERR(EINVAL);
 	}
 
+	if (!chan && (ret = engine->instmem.populate(dev, gpuobj, &size))) {
+		nouveau_gpuobj_del(dev, &gpuobj);
+		return ret;
+	}
+
 	/* Allocate a chunk of the PRAMIN aperture */
 	gpuobj->im_pramin = nouveau_mem_alloc_block(pramin, size,
 						    drm_order(align),
@@ -240,14 +248,9 @@ nouveau_gpuobj_new(drm_device_t *dev, int channel, int size, int align,
 	}
 	gpuobj->im_pramin->flags = NOUVEAU_MEM_INSTANCE;
 
-	/* On NV50 the PRAMIN aperture is paged.  When allocating from the
-	 * global instmem heap, alloc and bind VRAM pages into the PRAMIN
-	 * aperture.
-	 */
-	if (!chan && dev_priv->card_type >= NV_50) {
-		DRM_ERROR("back aperture with vram pages\n");
+	if (!chan && (ret = engine->instmem.bind(dev, gpuobj))) {
 		nouveau_gpuobj_del(dev, &gpuobj);
-		return DRM_ERR(EINVAL);
+		return ret;
 	}
 
 	if (gpuobj->flags & NVOBJ_FLAG_ZERO_ALLOC) {
@@ -285,6 +288,7 @@ void nouveau_gpuobj_takedown(drm_device_t *dev)
 int nouveau_gpuobj_del(drm_device_t *dev, nouveau_gpuobj_t **pgpuobj)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	nouveau_engine_func_t *engine = &dev_priv->Engine;
 	nouveau_gpuobj_t *gpuobj;
 
 	DRM_DEBUG("gpuobj %p\n", pgpuobj ? *pgpuobj : NULL);
@@ -298,6 +302,8 @@ int nouveau_gpuobj_del(drm_device_t *dev, nouveau_gpuobj_t **pgpuobj)
 		return DRM_ERR(EINVAL);
 	}
 
+	engine->instmem.clear(dev, gpuobj);
+
 	if (gpuobj->im_pramin) {
 		if (gpuobj->flags & NVOBJ_FLAG_FAKE)
 			drm_free(gpuobj->im_pramin, sizeof(*gpuobj->im_pramin),
@@ -306,9 +312,6 @@ int nouveau_gpuobj_del(drm_device_t *dev, nouveau_gpuobj_t **pgpuobj)
 			nouveau_mem_free_block(gpuobj->im_pramin);
 	}
 
-	if (gpuobj->im_backing)
-		nouveau_mem_free(dev, gpuobj->im_backing);
-
 	if (gpuobj->next)
 		gpuobj->next->prev = gpuobj->prev;
 	if (gpuobj->prev)
@@ -342,7 +345,7 @@ nouveau_gpuobj_instance_get(drm_device_t *dev, int channel,
 
 	/* NV50 channel-local instance */
 	if (channel > 0) {
-		cpramin = dev_priv->fifos[channel].ramin->gpuobj;
+		cpramin = dev_priv->fifos[channel]->ramin->gpuobj;
 		*inst = gpuobj->im_pramin->start - cpramin->im_pramin->start;
 		return 0;
 	}
@@ -358,7 +361,7 @@ nouveau_gpuobj_instance_get(drm_device_t *dev, int channel,
 		return 0;
 	} else {
 		/* ...from local heap */
-		cpramin = dev_priv->fifos[gpuobj->im_channel].ramin->gpuobj;
+		cpramin = dev_priv->fifos[gpuobj->im_channel]->ramin->gpuobj;
 		*inst = (cpramin->im_backing->start - dev_priv->fb_phys) +
 			(gpuobj->im_pramin->start - cpramin->im_pramin->start);
 		return 0;
@@ -385,7 +388,7 @@ nouveau_gpuobj_ref_add(drm_device_t *dev, int channel, uint32_t handle,
 	if (channel >= 0) {
 		if (channel > nouveau_fifo_number(dev))
 			return DRM_ERR(EINVAL);
-		chan = &dev_priv->fifos[channel];
+		chan = dev_priv->fifos[channel];
 	} else
 	if (!ref_ret)
 		return DRM_ERR(EINVAL);
@@ -591,9 +594,10 @@ nouveau_gpuobj_dma_new(drm_device_t *dev, int channel, int class,
 		INSTANCE_WR(*gpuobj, 2, frame | pte_flags);
 		INSTANCE_WR(*gpuobj, 3, frame | pte_flags);
 	} else {
-		nouveau_gpuobj_del(dev, gpuobj);
-		DRM_ERROR("stub\n");
-		return DRM_ERR(EINVAL);
+		INSTANCE_WR(*gpuobj, 0, 0x00190000 | class);
+		INSTANCE_WR(*gpuobj, 1, offset + size - 1);
+		INSTANCE_WR(*gpuobj, 2, offset);
+		INSTANCE_WR(*gpuobj, 5, 0x00010000);
 	}
 
 	(*gpuobj)->engine = NVOBJ_ENGINE_SW;
@@ -672,11 +676,9 @@ nouveau_gpuobj_gr_new(drm_device_t *dev, int channel, int class,
 	}
 
 	if (dev_priv->card_type >= NV_50) {
-		nouveau_gpuobj_del(dev, gpuobj);
-		DRM_ERROR("stub!\n");
-		return DRM_ERR(EINVAL);
-	}
-
+		INSTANCE_WR(*gpuobj, 0, class);
+		INSTANCE_WR(*gpuobj, 5, 0x00010000);
+	} else {
 	switch (class) {
 	case NV_CLASS_NULL:
 		INSTANCE_WR(*gpuobj, 0, 0x00001030);
@@ -696,6 +698,7 @@ nouveau_gpuobj_gr_new(drm_device_t *dev, int channel, int class,
 #endif
 		}
 	}
+	}
 
 	(*gpuobj)->engine = NVOBJ_ENGINE_GR;
 	(*gpuobj)->class  = class;
@@ -706,7 +709,7 @@ static int
 nouveau_gpuobj_channel_init_pramin(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
 	nouveau_gpuobj_t *pramin = NULL;
 	int size, base, ret;
 
@@ -719,7 +722,16 @@ nouveau_gpuobj_channel_init_pramin(drm_device_t *dev, int channel)
 	/* PGRAPH context */
 
 	if (dev_priv->card_type == NV_50) {
-		/* RAMHT, RAMFC, PD, funny header thingo */
+		/* Various fixed table thingos */
+		size += 0x1400; /* mostly unknown stuff */
+		size += 0x4000; /* vm pd */
+		base  = 0x6000;
+		/* RAMHT, not sure about setting size yet, 32KiB to be safe */
+		size += 0x8000;
+		/* RAMFC */
+		size += 0x1000;
+		/* PGRAPH context */
+		size += 0x60000;
 	}
 
 	DRM_DEBUG("ch%d PRAMIN size: 0x%08x bytes, base alloc=0x%08x\n",
@@ -748,7 +760,7 @@ nouveau_gpuobj_channel_init(drm_device_t *dev, int channel,
 			    uint32_t vram_h, uint32_t tt_h)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
 	nouveau_gpuobj_t *vram = NULL, *tt = NULL;
 	int ret;
 
@@ -817,7 +829,7 @@ void
 nouveau_gpuobj_channel_takedown(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
 	nouveau_gpuobj_ref_t *ref;
 
 	DRM_DEBUG("ch%d\n", channel);
diff --git a/shared-core/nouveau_reg.h b/shared-core/nouveau_reg.h
index 4c013c53..c2ebc714 100644
--- a/shared-core/nouveau_reg.h
+++ b/shared-core/nouveau_reg.h
@@ -47,11 +47,15 @@
 #define NV_CLASS_DMA_IN_MEMORY                             0x0000003D
 
 #define NV03_FIFO_SIZE                                     0x8000UL
-#define NV_MAX_FIFO_NUMBER                                 32
+#define NV_MAX_FIFO_NUMBER                                 128
 #define NV03_FIFO_REGS_SIZE                                0x10000
 #define NV03_FIFO_REGS(i)                                  (0x00800000+i*NV03_FIFO_REGS_SIZE)
 #    define NV03_FIFO_REGS_DMAPUT(i)                       (NV03_FIFO_REGS(i)+0x40)
 #    define NV03_FIFO_REGS_DMAGET(i)                       (NV03_FIFO_REGS(i)+0x44)
+#define NV50_FIFO_REGS_SIZE                                0x2000
+#define NV50_FIFO_REGS(i)                                  (0x00c00000+i*NV50_FIFO_REGS_SIZE)
+#    define NV50_FIFO_REGS_DMAPUT(i)                       (NV50_FIFO_REGS(i)+0x40)
+#    define NV50_FIFO_REGS_DMAGET(i)                       (NV50_FIFO_REGS(i)+0x44)
 
 #define NV03_PMC_BOOT_0                                    0x00000000
 #define NV03_PMC_INTR_0                                    0x00000100
@@ -332,6 +336,12 @@
 #define NV04_PFIFO_MODE                                    0x00002504
 #define NV04_PFIFO_DMA                                     0x00002508
 #define NV04_PFIFO_SIZE                                    0x0000250c
+#define NV50_PFIFO_CTX_TABLE(c)                        (0x2600+(c)*4)
+#define NV50_PFIFO_CTX_TABLE__SIZE                                128
+#define NV50_PFIFO_CTX_TABLE_CHANNEL_ENABLED                  (1<<31)
+#define NV50_PFIFO_CTX_TABLE_UNK30_BAD                        (1<<30)
+#define NV50_PFIFO_CTX_TABLE_INSTANCE_MASK_G80             0x0FFFFFFF
+#define NV50_PFIFO_CTX_TABLE_INSTANCE_MASK_G84             0x00FFFFFF
 #define NV03_PFIFO_CACHE0_PUSH0                            0x00003000
 #define NV03_PFIFO_CACHE0_PULL0                            0x00003040
 #define NV04_PFIFO_CACHE0_PULL0                            0x00003050
diff --git a/shared-core/nouveau_state.c b/shared-core/nouveau_state.c
index 13bc930a..bcb974bf 100644
--- a/shared-core/nouveau_state.c
+++ b/shared-core/nouveau_state.c
@@ -95,6 +95,12 @@ static int nouveau_init_engine_ptrs(drm_device_t *dev)
 
 	switch (dev_priv->chipset & 0xf0) {
 	case 0x00:
+		engine->instmem.init	= nv04_instmem_init;
+		engine->instmem.takedown= nv04_instmem_takedown;
+		engine->instmem.populate	= nv04_instmem_populate;
+		engine->instmem.clear		= nv04_instmem_clear;
+		engine->instmem.bind		= nv04_instmem_bind;
+		engine->instmem.unbind		= nv04_instmem_unbind;
 		engine->mc.init		= nv04_mc_init;
 		engine->mc.takedown	= nv04_mc_takedown;
 		engine->timer.init	= nv04_timer_init;
@@ -115,6 +121,12 @@ static int nouveau_init_engine_ptrs(drm_device_t *dev)
 		engine->fifo.save_context	= nv04_fifo_save_context;
 		break;
 	case 0x10:
+		engine->instmem.init	= nv04_instmem_init;
+		engine->instmem.takedown= nv04_instmem_takedown;
+		engine->instmem.populate	= nv04_instmem_populate;
+		engine->instmem.clear		= nv04_instmem_clear;
+		engine->instmem.bind		= nv04_instmem_bind;
+		engine->instmem.unbind		= nv04_instmem_unbind;
 		engine->mc.init		= nv04_mc_init;
 		engine->mc.takedown	= nv04_mc_takedown;
 		engine->timer.init	= nv04_timer_init;
@@ -135,6 +147,12 @@ static int nouveau_init_engine_ptrs(drm_device_t *dev)
 		engine->fifo.save_context	= nv10_fifo_save_context;
 		break;
 	case 0x20:
+		engine->instmem.init	= nv04_instmem_init;
+		engine->instmem.takedown= nv04_instmem_takedown;
+		engine->instmem.populate	= nv04_instmem_populate;
+		engine->instmem.clear		= nv04_instmem_clear;
+		engine->instmem.bind		= nv04_instmem_bind;
+		engine->instmem.unbind		= nv04_instmem_unbind;
 		engine->mc.init		= nv04_mc_init;
 		engine->mc.takedown	= nv04_mc_takedown;
 		engine->timer.init	= nv04_timer_init;
@@ -155,6 +173,12 @@ static int nouveau_init_engine_ptrs(drm_device_t *dev)
 		engine->fifo.save_context	= nv10_fifo_save_context;
 		break;
 	case 0x30:
+		engine->instmem.init	= nv04_instmem_init;
+		engine->instmem.takedown= nv04_instmem_takedown;
+		engine->instmem.populate	= nv04_instmem_populate;
+		engine->instmem.clear		= nv04_instmem_clear;
+		engine->instmem.bind		= nv04_instmem_bind;
+		engine->instmem.unbind		= nv04_instmem_unbind;
 		engine->mc.init		= nv04_mc_init;
 		engine->mc.takedown	= nv04_mc_takedown;
 		engine->timer.init	= nv04_timer_init;
@@ -175,6 +199,12 @@ static int nouveau_init_engine_ptrs(drm_device_t *dev)
 		engine->fifo.save_context	= nv10_fifo_save_context;
 		break;
 	case 0x40:
+		engine->instmem.init	= nv04_instmem_init;
+		engine->instmem.takedown= nv04_instmem_takedown;
+		engine->instmem.populate	= nv04_instmem_populate;
+		engine->instmem.clear		= nv04_instmem_clear;
+		engine->instmem.bind		= nv04_instmem_bind;
+		engine->instmem.unbind		= nv04_instmem_unbind;
 		engine->mc.init		= nv40_mc_init;
 		engine->mc.takedown	= nv40_mc_takedown;
 		engine->timer.init	= nv04_timer_init;
@@ -196,6 +226,12 @@ static int nouveau_init_engine_ptrs(drm_device_t *dev)
 		break;
 	case 0x50:
 	case 0x80: /* gotta love NVIDIA's consistency.. */
+		engine->instmem.init	= nv50_instmem_init;
+		engine->instmem.takedown= nv50_instmem_takedown;
+		engine->instmem.populate	= nv50_instmem_populate;
+		engine->instmem.clear		= nv50_instmem_clear;
+		engine->instmem.bind		= nv50_instmem_bind;
+		engine->instmem.unbind		= nv50_instmem_unbind;
 		engine->mc.init		= nv50_mc_init;
 		engine->mc.takedown	= nv50_mc_takedown;
 		engine->timer.init	= nouveau_stub_init;
@@ -249,7 +285,7 @@ static int nouveau_card_init(drm_device_t *dev)
 	 * know exactly how much VRAM we're able to use for "normal"
 	 * purposes.
 	 */
-	ret = nouveau_instmem_init(dev);
+	ret = engine->instmem.init(dev);
 	if (ret) return ret;
 
 	/* Setup the memory manager */
@@ -295,6 +331,7 @@ static void nouveau_card_takedown(drm_device_t *dev)
 	engine->mc.takedown(dev);
 	nouveau_gpuobj_takedown(dev);
 	nouveau_mem_close(dev);
+	engine->instmem.takedown(dev);
 }
 
 /* here a client dies, release the stuff that was allocated for its filp */
@@ -456,6 +493,8 @@ void nouveau_wait_for_idle(struct drm_device *dev)
 		case NV_03:
 			while(NV_READ(NV03_PGRAPH_STATUS));
 			break;
+		case NV_50:
+			break;
 		default:
 			while(NV_READ(NV04_PGRAPH_STATUS));
 			break;
diff --git a/shared-core/nv04_fifo.c b/shared-core/nv04_fifo.c
index b84f74c1..e2e934d7 100644
--- a/shared-core/nv04_fifo.c
+++ b/shared-core/nv04_fifo.c
@@ -39,7 +39,7 @@ int
 nv04_fifo_create_context(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
 	int ret;
 
 	if ((ret = nouveau_gpuobj_new_fake(dev, NV04_RAMFC(channel),
@@ -60,6 +60,9 @@ nv04_fifo_create_context(drm_device_t *dev, int channel)
 			     NV_PFIFO_CACHE1_BIG_ENDIAN |
 #endif
 			     0));
+
+	/* enable the fifo dma operation */
+	NV_WRITE(NV04_PFIFO_MODE,NV_READ(NV04_PFIFO_MODE)|(1<<channel));
 	return 0;
 }
 
@@ -67,7 +70,9 @@ void
 nv04_fifo_destroy_context(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
+
+	NV_WRITE(NV04_PFIFO_MODE, NV_READ(NV04_PFIFO_MODE)&~(1<<channel));
 
 	if (chan->ramfc)
 		nouveau_gpuobj_ref_del(dev, &chan->ramfc);
@@ -77,7 +82,7 @@ int
 nv04_fifo_load_context(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
 	uint32_t tmp;
 
 	NV_WRITE(NV03_PFIFO_CACHE1_PUSH1, (1<<8) | channel);
@@ -105,7 +110,7 @@ int
 nv04_fifo_save_context(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
 	uint32_t tmp;
 
 	RAMFC_WR(DMA_PUT, NV04_PFIFO_CACHE1_DMA_PUT);
diff --git a/shared-core/nv04_graph.c b/shared-core/nv04_graph.c
index 1aaae33c..df23d279 100644
--- a/shared-core/nv04_graph.c
+++ b/shared-core/nv04_graph.c
@@ -309,7 +309,7 @@ void nouveau_nv04_context_switch(drm_device_t *dev)
 	for (i = 0; i<sizeof(nv04_graph_ctx_regs)/sizeof(nv04_graph_ctx_regs[0]); i++)
 		for (j = 0; j<nv04_graph_ctx_regs[i].number; j++)
 		{
-			dev_priv->fifos[channel_old].pgraph_ctx[index] = NV_READ(nv04_graph_ctx_regs[i].reg+j*4);
+			dev_priv->fifos[channel_old]->pgraph_ctx[index] = NV_READ(nv04_graph_ctx_regs[i].reg+j*4);
 			index++;
 		}
 
@@ -321,7 +321,7 @@ void nouveau_nv04_context_switch(drm_device_t *dev)
 	for (i = 0; i<sizeof(nv04_graph_ctx_regs)/sizeof(nv04_graph_ctx_regs[0]); i++)
 		for (j = 0; j<nv04_graph_ctx_regs[i].number; j++)
 		{
-			NV_WRITE(nv04_graph_ctx_regs[i].reg+j*4, dev_priv->fifos[channel].pgraph_ctx[index]);
+			NV_WRITE(nv04_graph_ctx_regs[i].reg+j*4, dev_priv->fifos[channel]->pgraph_ctx[index]);
 			index++;
 		}
 
@@ -340,10 +340,10 @@ int nv04_graph_create_context(drm_device_t *dev, int channel) {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
 	DRM_DEBUG("nv04_graph_context_create %d\n", channel);
 
-	memset(dev_priv->fifos[channel].pgraph_ctx, 0, sizeof(dev_priv->fifos[channel].pgraph_ctx));
+	memset(dev_priv->fifos[channel]->pgraph_ctx, 0, sizeof(dev_priv->fifos[channel]->pgraph_ctx));
 
 	//dev_priv->fifos[channel].pgraph_ctx_user = channel << 24;
-	dev_priv->fifos[channel].pgraph_ctx[0] = 0x0001ffff;
+	dev_priv->fifos[channel]->pgraph_ctx[0] = 0x0001ffff;
 	/* is it really needed ??? */
 	//dev_priv->fifos[channel].pgraph_ctx[1] = NV_READ(NV_PGRAPH_DEBUG_4);
 	//dev_priv->fifos[channel].pgraph_ctx[2] = NV_READ(0x004006b0);
@@ -379,7 +379,7 @@ int nv04_graph_init(drm_device_t *dev) {
 	// check the context is big enough
 	for ( i = 0 ; i<sizeof(nv04_graph_ctx_regs)/sizeof(nv04_graph_ctx_regs[0]); i++)
 		sum+=nv04_graph_ctx_regs[i].number;
-	if ( sum*4>sizeof(dev_priv->fifos[0].pgraph_ctx) )
+	if ( sum*4>sizeof(dev_priv->fifos[0]->pgraph_ctx) )
 		DRM_ERROR("pgraph_ctx too small\n");
 
 	NV_WRITE(NV03_PGRAPH_INTR_EN, 0x00000000);
diff --git a/shared-core/nv04_instmem.c b/shared-core/nv04_instmem.c
new file mode 100644
index 00000000..ac7d4347
--- /dev/null
+++ b/shared-core/nv04_instmem.c
@@ -0,0 +1,165 @@
+#include "drmP.h"
+#include "drm.h"
+#include "nouveau_drv.h"
+
+static void
+nv04_instmem_determine_amount(struct drm_device *dev)
+{
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	int i;
+
+	/* Figure out how much instance memory we need */
+	switch (dev_priv->card_type) {
+	case NV_40:
+		/* We'll want more instance memory than this on some NV4x cards.
+		 * There's a 16MB aperture to play with that maps onto the end
+		 * of vram.  For now, only reserve a small piece until we know
+		 * more about what each chipset requires.
+		 */
+		dev_priv->ramin_rsvd_vram = (1*1024* 1024);
+		break;
+	default:
+		/*XXX: what *are* the limits on <NV40 cards?, and does RAMIN
+		 *     exist in vram on those cards as well?
+		 */
+		dev_priv->ramin_rsvd_vram = (512*1024);
+		break;
+	}
+	DRM_DEBUG("RAMIN size: %dKiB\n", dev_priv->ramin_rsvd_vram>>10);
+
+	/* Clear all of it, except the BIOS image that's in the first 64KiB */
+	for (i=(64*1024); i<dev_priv->ramin_rsvd_vram; i+=4)
+		NV_WI32(i, 0x00000000);
+}
+
+static void
+nv04_instmem_configure_fixed_tables(struct drm_device *dev)
+{
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+
+	/* FIFO hash table (RAMHT)
+	 *   use 4k hash table at RAMIN+0x10000
+	 *   TODO: extend the hash table
+	 */
+	dev_priv->ramht_offset = 0x10000;
+	dev_priv->ramht_bits   = 9;
+	dev_priv->ramht_size   = (1 << dev_priv->ramht_bits);
+	DRM_DEBUG("RAMHT offset=0x%x, size=%d\n", dev_priv->ramht_offset,
+						  dev_priv->ramht_size);
+
+	/* FIFO runout table (RAMRO) - 512k at 0x11200 */
+	dev_priv->ramro_offset = 0x11200;
+	dev_priv->ramro_size   = 512;
+	DRM_DEBUG("RAMRO offset=0x%x, size=%d\n", dev_priv->ramro_offset,
+						  dev_priv->ramro_size);
+
+	/* FIFO context table (RAMFC)
+	 *   NV40  : Not sure exactly how to position RAMFC on some cards,
+	 *           0x30002 seems to position it at RAMIN+0x20000 on these
+	 *           cards.  RAMFC is 4kb (32 fifos, 128byte entries).
+	 *   Others: Position RAMFC at RAMIN+0x11400
+	 */
+	switch(dev_priv->card_type)
+	{
+		case NV_40:
+		case NV_44:
+			dev_priv->ramfc_offset = 0x20000;
+			dev_priv->ramfc_size   = nouveau_fifo_number(dev) *
+				nouveau_fifo_ctx_size(dev);
+			break;
+		case NV_30:
+		case NV_20:
+		case NV_17:
+		case NV_10:
+		case NV_04:
+		case NV_03:
+		default:
+			dev_priv->ramfc_offset = 0x11400;
+			dev_priv->ramfc_size   = nouveau_fifo_number(dev) *
+				nouveau_fifo_ctx_size(dev);
+			break;
+	}
+	DRM_DEBUG("RAMFC offset=0x%x, size=%d\n", dev_priv->ramfc_offset,
+						  dev_priv->ramfc_size);
+}
+
+int nv04_instmem_init(struct drm_device *dev)
+{
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	uint32_t offset;
+	int ret = 0;
+
+	nv04_instmem_determine_amount(dev);
+	nv04_instmem_configure_fixed_tables(dev);
+
+	if ((ret = nouveau_gpuobj_new_fake(dev, dev_priv->ramht_offset,
+						dev_priv->ramht_size,
+						NVOBJ_FLAG_ZERO_ALLOC |
+						NVOBJ_FLAG_ALLOW_NO_REFS,
+						&dev_priv->ramht, NULL)))
+		return ret;
+
+	/* Create a heap to manage RAMIN allocations, we don't allocate
+	 * the space that was reserved for RAMHT/FC/RO.
+	 */
+	offset = dev_priv->ramfc_offset + dev_priv->ramfc_size;
+	ret = nouveau_mem_init_heap(&dev_priv->ramin_heap,
+				    offset, dev_priv->ramin_rsvd_vram - offset);
+	if (ret) {
+		dev_priv->ramin_heap = NULL;
+		DRM_ERROR("Failed to init RAMIN heap\n");
+	}
+
+	return ret;
+}
+
+void
+nv04_instmem_takedown(drm_device_t *dev)
+{
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+
+	nouveau_gpuobj_del(dev, &dev_priv->ramht);
+}
+
+int
+nv04_instmem_populate(drm_device_t *dev, nouveau_gpuobj_t *gpuobj, uint32_t *sz)
+{
+	if (gpuobj->im_backing)
+		return DRM_ERR(EINVAL);
+
+	return 0;
+}
+
+void
+nv04_instmem_clear(drm_device_t *dev, nouveau_gpuobj_t *gpuobj)
+{
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+
+	if (gpuobj && gpuobj->im_backing) {
+		if (gpuobj->im_bound)
+			dev_priv->Engine.instmem.unbind(dev, gpuobj);
+		nouveau_mem_free(dev, gpuobj->im_backing);
+		gpuobj->im_backing = NULL;
+	}	
+}
+
+int
+nv04_instmem_bind(drm_device_t *dev, nouveau_gpuobj_t *gpuobj)
+{
+	if (!gpuobj->im_pramin || gpuobj->im_bound)
+		return DRM_ERR(EINVAL);
+
+	gpuobj->im_bound = 1;
+	return 0;
+}
+
+int
+nv04_instmem_unbind(drm_device_t *dev, nouveau_gpuobj_t *gpuobj)
+{
+	if (gpuobj->im_bound == 0)
+		return DRM_ERR(EINVAL);
+
+	gpuobj->im_bound = 0;
+	return 0;
+}
+
diff --git a/shared-core/nv10_fifo.c b/shared-core/nv10_fifo.c
index 07ec4635..2d8d5a0d 100644
--- a/shared-core/nv10_fifo.c
+++ b/shared-core/nv10_fifo.c
@@ -40,7 +40,7 @@ int
 nv10_fifo_create_context(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
 	int ret;
 
 	if ((ret = nouveau_gpuobj_new_fake(dev, NV10_RAMFC(channel),
@@ -64,6 +64,8 @@ nv10_fifo_create_context(drm_device_t *dev, int channel)
 #endif
 				 0);
 
+	/* enable the fifo dma operation */
+	NV_WRITE(NV04_PFIFO_MODE,NV_READ(NV04_PFIFO_MODE)|(1<<channel));
 	return 0;
 }
 
@@ -71,7 +73,9 @@ void
 nv10_fifo_destroy_context(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
+
+	NV_WRITE(NV04_PFIFO_MODE, NV_READ(NV04_PFIFO_MODE)&~(1<<channel));
 
 	if (chan->ramfc)
 		nouveau_gpuobj_ref_del(dev, &chan->ramfc);
@@ -81,7 +85,7 @@ int
 nv10_fifo_load_context(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
 	uint32_t tmp;
 
 	NV_WRITE(NV03_PFIFO_CACHE1_PUSH1            , 0x00000100 | channel);
@@ -123,7 +127,7 @@ int
 nv10_fifo_save_context(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
 	uint32_t tmp;
 
 	RAMFC_WR(DMA_PUT          , NV_READ(NV04_PFIFO_CACHE1_DMA_PUT));
diff --git a/shared-core/nv10_graph.c b/shared-core/nv10_graph.c
index d1fe0a54..c544afac 100644
--- a/shared-core/nv10_graph.c
+++ b/shared-core/nv10_graph.c
@@ -547,7 +547,7 @@ static int nv10_graph_ctx_regs_find_offset(drm_device_t *dev, int reg)
 static void restore_ctx_regs(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	struct nouveau_fifo *fifo = &dev_priv->fifos[channel];
+	struct nouveau_fifo *fifo = dev_priv->fifos[channel];
 	int i, j;
 	for (i = 0; i < sizeof(nv10_graph_ctx_regs)/sizeof(nv10_graph_ctx_regs[0]); i++)
 		NV_WRITE(nv10_graph_ctx_regs[i], fifo->pgraph_ctx[i]);
@@ -577,10 +577,10 @@ void nouveau_nv10_context_switch(drm_device_t *dev)
 
 	// save PGRAPH context
 	for (i = 0; i < sizeof(nv10_graph_ctx_regs)/sizeof(nv10_graph_ctx_regs[0]); i++)
-		dev_priv->fifos[channel_old].pgraph_ctx[i] = NV_READ(nv10_graph_ctx_regs[i]);
+		dev_priv->fifos[channel_old]->pgraph_ctx[i] = NV_READ(nv10_graph_ctx_regs[i]);
 	if (dev_priv->chipset>=0x17) {
 		for (j = 0; j < sizeof(nv17_graph_ctx_regs)/sizeof(nv17_graph_ctx_regs[0]); i++,j++)
-			dev_priv->fifos[channel_old].pgraph_ctx[i] = NV_READ(nv17_graph_ctx_regs[j]);
+			dev_priv->fifos[channel_old]->pgraph_ctx[i] = NV_READ(nv17_graph_ctx_regs[j]);
 	}
 	
 	nouveau_wait_for_idle(dev);
@@ -613,7 +613,7 @@ void nouveau_nv10_context_switch(drm_device_t *dev)
 	} while (0)
 int nv10_graph_create_context(drm_device_t *dev, int channel) {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	struct nouveau_fifo *fifo = &dev_priv->fifos[channel];
+	struct nouveau_fifo *fifo = dev_priv->fifos[channel];
 	uint32_t tmp, vramsz;
 
 	DRM_DEBUG("nv10_graph_context_create %d\n", channel);
diff --git a/shared-core/nv20_graph.c b/shared-core/nv20_graph.c
index 13271051..06d7e440 100644
--- a/shared-core/nv20_graph.c
+++ b/shared-core/nv20_graph.c
@@ -32,7 +32,7 @@
 int nv20_graph_create_context(drm_device_t *dev, int channel) {
 	drm_nouveau_private_t *dev_priv =
 		(drm_nouveau_private_t *)dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
 	unsigned int ctx_size = NV20_GRCTX_SIZE;
 	int ret;
 
@@ -51,7 +51,7 @@ int nv20_graph_create_context(drm_device_t *dev, int channel) {
 
 void nv20_graph_destroy_context(drm_device_t *dev, int channel) {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
 
 	if (chan->ramin_grctx)
 		nouveau_gpuobj_ref_del(dev, &chan->ramin_grctx);
@@ -76,7 +76,7 @@ static void nv20_graph_rdi(drm_device_t *dev) {
 int nv20_graph_save_context(drm_device_t *dev, int channel) {
 	drm_nouveau_private_t *dev_priv =
 		(drm_nouveau_private_t *)dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
 	uint32_t instance;
 
 	instance = INSTANCE_RD(dev_priv->ctx_table->gpuobj, channel);
@@ -97,7 +97,7 @@ int nv20_graph_save_context(drm_device_t *dev, int channel) {
 int nv20_graph_load_context(drm_device_t *dev, int channel) {
 	drm_nouveau_private_t *dev_priv =
 		(drm_nouveau_private_t *)dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
 	uint32_t instance;
 
 	instance = INSTANCE_RD(dev_priv->ctx_table->gpuobj, channel);
diff --git a/shared-core/nv30_graph.c b/shared-core/nv30_graph.c
index 65f4f868..a83ad714 100644
--- a/shared-core/nv30_graph.c
+++ b/shared-core/nv30_graph.c
@@ -104,7 +104,7 @@ int nv30_graph_create_context(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv =
 		(drm_nouveau_private_t *)dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
 	void (*ctx_init)(drm_device_t *, nouveau_gpuobj_t *);
 	unsigned int ctx_size;
 	int ret;
@@ -135,7 +135,7 @@ void nv30_graph_destroy_context(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv =
 		(drm_nouveau_private_t *)dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
 
 	if (chan->ramin_grctx)
 		nouveau_gpuobj_ref_del(dev, &chan->ramin_grctx);
@@ -164,7 +164,7 @@ nouveau_graph_wait_idle(drm_device_t *dev)
 int nv30_graph_load_context(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
 	uint32_t inst;
 
 	if (!chan->ramin_grctx)
@@ -181,7 +181,7 @@ int nv30_graph_load_context(drm_device_t *dev, int channel)
 int nv30_graph_save_context(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
 	uint32_t inst;
 
 	if (!chan->ramin_grctx)
diff --git a/shared-core/nv40_fifo.c b/shared-core/nv40_fifo.c
index eed3e45b..818a9024 100644
--- a/shared-core/nv40_fifo.c
+++ b/shared-core/nv40_fifo.c
@@ -40,7 +40,7 @@ int
 nv40_fifo_create_context(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
 	int ret;
 
 	if ((ret = nouveau_gpuobj_new_fake(dev, NV40_RAMFC(channel),
@@ -67,6 +67,8 @@ nv40_fifo_create_context(drm_device_t *dev, int channel)
 	RAMFC_WR(GRCTX_INSTANCE, chan->ramin_grctx->instance >> 4);
 	RAMFC_WR(DMA_TIMESLICE , 0x0001FFFF);
 
+	/* enable the fifo dma operation */
+	NV_WRITE(NV04_PFIFO_MODE,NV_READ(NV04_PFIFO_MODE)|(1<<channel));
 	return 0;
 }
 
@@ -74,7 +76,9 @@ void
 nv40_fifo_destroy_context(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
+
+	NV_WRITE(NV04_PFIFO_MODE, NV_READ(NV04_PFIFO_MODE)&~(1<<channel));
 
 	if (chan->ramfc)
 		nouveau_gpuobj_ref_del(dev, &chan->ramfc);
@@ -84,7 +88,7 @@ int
 nv40_fifo_load_context(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
 	uint32_t tmp, tmp2;
 
 	NV_WRITE(NV04_PFIFO_CACHE1_DMA_GET          , RAMFC_RD(DMA_GET));
@@ -143,7 +147,7 @@ int
 nv40_fifo_save_context(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
 	uint32_t tmp;
 
 	RAMFC_WR(DMA_PUT          , NV_READ(NV04_PFIFO_CACHE1_DMA_PUT));
diff --git a/shared-core/nv40_graph.c b/shared-core/nv40_graph.c
index 3f33cee6..94d76505 100644
--- a/shared-core/nv40_graph.c
+++ b/shared-core/nv40_graph.c
@@ -1228,7 +1228,7 @@ nv40_graph_create_context(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv =
 		(drm_nouveau_private_t *)dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
 	void (*ctx_init)(drm_device_t *, nouveau_gpuobj_t *);
 	unsigned int ctx_size;
 	int ret;
@@ -1287,7 +1287,7 @@ void
 nv40_graph_destroy_context(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
 
 	if (chan->ramin_grctx)
 		nouveau_gpuobj_ref_del(dev, &chan->ramin_grctx);
@@ -1330,7 +1330,7 @@ int
 nv40_graph_save_context(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
 	uint32_t inst;
 
 	if (!chan->ramin_grctx)
@@ -1347,7 +1347,7 @@ int
 nv40_graph_load_context(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
 	uint32_t inst;
 	int ret;
 
diff --git a/shared-core/nv50_fifo.c b/shared-core/nv50_fifo.c
index e5d37949..d4c3ca87 100644
--- a/shared-core/nv50_fifo.c
+++ b/shared-core/nv50_fifo.c
@@ -28,55 +28,306 @@
 #include "drm.h"
 #include "nouveau_drv.h"
 
+typedef struct {
+	nouveau_gpuobj_ref_t *thingo;
+	nouveau_gpuobj_ref_t *dummyctx;
+} nv50_fifo_priv;
+
+#define IS_G80 ((dev_priv->chipset & 0xf0) == 0x50)
+
+static void
+nv50_fifo_init_thingo(drm_device_t *dev)
+{
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	nv50_fifo_priv *priv = dev_priv->Engine.fifo.priv;
+	nouveau_gpuobj_ref_t *thingo = priv->thingo;
+	int i, fi=2;
+
+	DRM_DEBUG("\n");
+
+	INSTANCE_WR(thingo->gpuobj, 0, 0x7e);
+	INSTANCE_WR(thingo->gpuobj, 1, 0x7e);
+	for (i = 0; i <NV_MAX_FIFO_NUMBER; i++, fi) {
+		if (dev_priv->fifos[i]) {
+			INSTANCE_WR(thingo->gpuobj, fi, i);
+			fi++;
+		}
+	}
+
+	NV_WRITE(0x32f4, thingo->instance >> 12);
+	NV_WRITE(0x32ec, fi);
+	NV_WRITE(0x2500, 0x101);
+}
+
+static int
+nv50_fifo_channel_enable(drm_device_t *dev, int channel)
+{
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
+
+	DRM_DEBUG("ch%d\n", channel);
+
+	if (IS_G80) {
+		if (!chan->ramin)
+			return DRM_ERR(EINVAL);
+
+		NV_WRITE(NV50_PFIFO_CTX_TABLE(channel),
+			 (chan->ramin->instance >> 12) |
+			 NV50_PFIFO_CTX_TABLE_CHANNEL_ENABLED);
+	} else {
+		if (!chan->ramfc)
+			return DRM_ERR(EINVAL);
+
+		NV_WRITE(NV50_PFIFO_CTX_TABLE(channel),
+			 (chan->ramfc->instance >> 8) |
+			 NV50_PFIFO_CTX_TABLE_CHANNEL_ENABLED);
+	}
+
+	nv50_fifo_init_thingo(dev);
+	return 0;
+}
+
+static void
+nv50_fifo_channel_disable(drm_device_t *dev, int channel, int nt)
+{
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+
+	DRM_DEBUG("ch%d, nt=%d\n", channel, nt);
+
+	if (IS_G80) {
+		NV_WRITE(NV50_PFIFO_CTX_TABLE(channel),
+			 NV50_PFIFO_CTX_TABLE_INSTANCE_MASK_G80);
+	} else {
+		NV_WRITE(NV50_PFIFO_CTX_TABLE(channel),
+			 NV50_PFIFO_CTX_TABLE_INSTANCE_MASK_G84);
+	}
+
+	if (!nt) nv50_fifo_init_thingo(dev);
+}
+
 static void
 nv50_fifo_init_reset(drm_device_t *dev)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
 	uint32_t pmc_e;
 
+	DRM_DEBUG("\n");
+
 	pmc_e = NV_READ(NV03_PMC_ENABLE);
 	NV_WRITE(NV03_PMC_ENABLE, pmc_e & ~NV_PMC_ENABLE_PFIFO);
 	pmc_e = NV_READ(NV03_PMC_ENABLE);
 	NV_WRITE(NV03_PMC_ENABLE, pmc_e |  NV_PMC_ENABLE_PFIFO);
 }
 
+static void
+nv50_fifo_init_context_table(drm_device_t *dev)
+{
+	int i;
+
+	DRM_DEBUG("\n");
+
+	for (i = 0; i < NV50_PFIFO_CTX_TABLE__SIZE; i++)
+		nv50_fifo_channel_disable(dev, i, 1);
+	nv50_fifo_init_thingo(dev);
+}
+
+static void
+nv50_fifo_init_regs__nv(drm_device_t *dev)
+{
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+
+	DRM_DEBUG("\n");
+
+	NV_WRITE(0x250c, 0x6f3cfc34);
+}
+
+static int
+nv50_fifo_init_regs(drm_device_t *dev)
+{
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	nv50_fifo_priv *priv = dev_priv->Engine.fifo.priv;
+	int ret;
+
+	DRM_DEBUG("\n");
+
+	if ((ret = nouveau_gpuobj_new_ref(dev, -1, -1, 0, 0x1000,
+					  0x1000,
+					  NVOBJ_FLAG_ZERO_ALLOC |
+					  NVOBJ_FLAG_ZERO_FREE,
+					  &priv->dummyctx)))
+		return ret;
+
+	NV_WRITE(0x2500, 0);
+	NV_WRITE(0x3250, 0);
+	NV_WRITE(0x3220, 0);
+	NV_WRITE(0x3204, 0);
+	NV_WRITE(0x3210, 0);
+	NV_WRITE(0x3270, 0);
+
+	if (IS_G80) {
+		NV_WRITE(0x2600, (priv->dummyctx->instance>>8) | (1<<31));
+		NV_WRITE(0x27fc, (priv->dummyctx->instance>>8) | (1<<31));
+	} else {
+		NV_WRITE(0x2600, (priv->dummyctx->instance>>12) | (1<<31));
+		NV_WRITE(0x27fc, (priv->dummyctx->instance>>12) | (1<<31));
+	}
+
+	return 0;
+}
+
 int
 nv50_fifo_init(drm_device_t *dev)
 {
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	nv50_fifo_priv *priv;
+	int ret;
+
+	DRM_DEBUG("\n");
+
+	priv = drm_calloc(1, sizeof(*priv), DRM_MEM_DRIVER);
+	if (!priv)
+		return DRM_ERR(ENOMEM);
+	dev_priv->Engine.fifo.priv = priv;
+
 	nv50_fifo_init_reset(dev);
 
-	DRM_ERROR("stub!\n");
+	if ((ret = nouveau_gpuobj_new_ref(dev, -1, -1, 0, (128+2)*4, 0x1000,
+				   NVOBJ_FLAG_ZERO_ALLOC,
+				   &priv->thingo))) {
+		DRM_ERROR("error creating thingo: %d\n", ret);
+		return ret;
+	}
+	nv50_fifo_init_context_table(dev);
+
+	nv50_fifo_init_regs__nv(dev);
+	if ((ret = nv50_fifo_init_regs(dev)))
+		return ret;
+
 	return 0;
 }
 
 void
 nv50_fifo_takedown(drm_device_t *dev)
 {
-	DRM_ERROR("stub!\n");
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	nv50_fifo_priv *priv = dev_priv->Engine.fifo.priv;
+
+	DRM_DEBUG("\n");
+
+	if (!priv)
+		return;
+
+	nouveau_gpuobj_ref_del(dev, &priv->thingo);
+	nouveau_gpuobj_ref_del(dev, &priv->dummyctx);
+
+	dev_priv->Engine.fifo.priv = NULL;
+	drm_free(priv, sizeof(*priv), DRM_MEM_DRIVER);
 }
 
 int
 nv50_fifo_create_context(drm_device_t *dev, int channel)
 {
-	DRM_ERROR("stub!\n");
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
+	nouveau_gpuobj_t *ramfc = NULL;
+	int ret;
+
+	DRM_DEBUG("ch%d\n", channel);
+
+	if (IS_G80) {
+		uint32_t ramfc_offset;
+		ramfc_offset = chan->ramin->gpuobj->im_pramin->start + 0x1000;
+		if ((ret = nouveau_gpuobj_new_fake(dev, ramfc_offset, 0x100,
+						   NVOBJ_FLAG_ZERO_ALLOC |
+						   NVOBJ_FLAG_ZERO_FREE,
+						   &ramfc, &chan->ramfc)))
+				return ret;
+	} else {
+		if ((ret = nouveau_gpuobj_new_ref(dev, channel, -1, 0, 0x100,
+						  256,
+						  NVOBJ_FLAG_ZERO_ALLOC |
+						  NVOBJ_FLAG_ZERO_FREE,
+						  &chan->ramfc)))
+			return ret;
+		ramfc = chan->ramfc->gpuobj;
+	}
+
+	INSTANCE_WR(ramfc, 0x48/4, chan->pushbuf->instance >> 4);
+	INSTANCE_WR(ramfc, 0x80/4, (0xc << 24) | (chan->ramht->instance >> 4));
+	INSTANCE_WR(ramfc, 0x3c/4, 0x000f0078); /* fetch? */
+	INSTANCE_WR(ramfc, 0x44/4, 0x2101ffff);
+	INSTANCE_WR(ramfc, 0x60/4, 0x7fffffff);
+	INSTANCE_WR(ramfc, 0x10/4, 0x00000000);
+	INSTANCE_WR(ramfc, 0x08/4, 0x00000000);
+	INSTANCE_WR(ramfc, 0x40/4, 0x00000000);
+	INSTANCE_WR(ramfc, 0x50/4, 0x2039b2e0);
+	INSTANCE_WR(ramfc, 0x54/4, 0x000f0000);
+	INSTANCE_WR(ramfc, 0x7c/4, 0x30000001);
+	INSTANCE_WR(ramfc, 0x78/4, 0x00000000);
+	INSTANCE_WR(ramfc, 0x4c/4, 0x00007fff);
+
+	if (!IS_G80) {
+		INSTANCE_WR(chan->ramin->gpuobj, 0, channel);
+		INSTANCE_WR(chan->ramin->gpuobj, 1, chan->ramfc->instance);
+
+		INSTANCE_WR(ramfc, 0x88/4, 0x3d520); /* some vram addy >> 10 */
+		INSTANCE_WR(ramfc, 0x98/4, chan->ramin->instance >> 12);
+	}
+
+	if ((ret = nv50_fifo_channel_enable(dev, channel))) {
+		DRM_ERROR("error enabling ch%d: %d\n", channel, ret);
+		nouveau_gpuobj_ref_del(dev, &chan->ramfc);
+		return ret;
+	}
+
 	return 0;
 }
 
 void
 nv50_fifo_destroy_context(drm_device_t *dev, int channel)
 {
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
+
+	DRM_DEBUG("ch%d\n", channel);
+
+	nv50_fifo_channel_disable(dev, channel, 0);
+	nouveau_gpuobj_ref_del(dev, &chan->ramfc);
 }
 
 int
 nv50_fifo_load_context(drm_device_t *dev, int channel)
 {
-	DRM_ERROR("stub!\n");
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
+	nouveau_gpuobj_t *ramfc = chan->ramfc->gpuobj;
+
+	DRM_DEBUG("ch%d\n", channel);
+
+	/*XXX: incomplete, only touches the regs that NV does */
+
+	NV_WRITE(0x3244, 0);
+	NV_WRITE(0x3240, 0);
+
+	NV_WRITE(0x3224, INSTANCE_RD(ramfc, 0x3c/4));
+	NV_WRITE(NV04_PFIFO_CACHE1_DMA_INSTANCE, INSTANCE_RD(ramfc, 0x48/4));
+	NV_WRITE(0x3234, INSTANCE_RD(ramfc, 0x4c/4));
+	NV_WRITE(0x3254, 1);
+	NV_WRITE(NV03_PFIFO_RAMHT, INSTANCE_RD(ramfc, 0x80/4));
+
+	if (!IS_G80) {
+		NV_WRITE(0x340c, INSTANCE_RD(ramfc, 0x88/4));
+		NV_WRITE(0x3410, INSTANCE_RD(ramfc, 0x98/4));
+	}
+
+	NV_WRITE(NV03_PFIFO_CACHE1_PUSH1, channel | (1<<16));
 	return 0;
 }
 
 int
 nv50_fifo_save_context(drm_device_t *dev, int channel)
 {
+	DRM_DEBUG("ch%d\n", channel);
 	DRM_ERROR("stub!\n");
 	return 0;
 }
diff --git a/shared-core/nv50_graph.c b/shared-core/nv50_graph.c
index 8c3e2b9b..271ed733 100644
--- a/shared-core/nv50_graph.c
+++ b/shared-core/nv50_graph.c
@@ -28,57 +28,274 @@
 #include "drm.h"
 #include "nouveau_drv.h"
 
+#define IS_G80 ((dev_priv->chipset & 0xf0) == 0x50)
+
 static void
 nv50_graph_init_reset(drm_device_t *dev)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
 	uint32_t pmc_e;
 
+	DRM_DEBUG("\n");
+
 	pmc_e = NV_READ(NV03_PMC_ENABLE);
 	NV_WRITE(NV03_PMC_ENABLE, pmc_e & ~NV_PMC_ENABLE_PGRAPH);
 	pmc_e = NV_READ(NV03_PMC_ENABLE);
 	NV_WRITE(NV03_PMC_ENABLE, pmc_e |  NV_PMC_ENABLE_PGRAPH);
 }
 
+static void
+nv50_graph_init_regs__nv(drm_device_t *dev)
+{
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+
+	DRM_DEBUG("\n");
+
+	NV_WRITE(0x400804, 0xc0000000);
+	NV_WRITE(0x406800, 0xc0000000);
+	NV_WRITE(0x400c04, 0xc0000000);
+	NV_WRITE(0x401804, 0xc0000000);
+	NV_WRITE(0x405018, 0xc0000000);
+	NV_WRITE(0x402000, 0xc0000000);
+
+	NV_WRITE(0x400108, 0xffffffff);
+	NV_WRITE(0x400100, 0xffffffff);
+
+	NV_WRITE(0x400824, 0x00004000);
+	NV_WRITE(0x400500, 0x00010001);
+}
+
+static void
+nv50_graph_init_regs(drm_device_t *dev)
+{
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+
+	DRM_DEBUG("\n");
+
+	NV_WRITE(NV04_PGRAPH_DEBUG_3, (1<<2) /* HW_CONTEXT_SWITCH_ENABLED */);
+}
+
+static uint32_t nv84_ctx_voodoo[] = {
+	0x0070008e, 0x0070009c, 0x00200020, 0x00600008, 0x0050004c, 0x00400e89,
+	0x00200000, 0x00600007, 0x00300000, 0x00c000ff, 0x00200000, 0x008000ff,
+	0x00700009, 0x0041634d, 0x00402944, 0x00402905, 0x0040290d, 0x00413e06,
+	0x00600005, 0x004015c5, 0x00600011, 0x0040270b, 0x004021c5, 0x00700000,
+	0x00700081, 0x00600004, 0x0050004a, 0x00216f40, 0x00600007, 0x00c02801,
+	0x0020002e, 0x00800001, 0x005000cb, 0x0090ffff, 0x0091ffff, 0x00200020,
+	0x00600008, 0x0050004c, 0x00600009, 0x00413e45, 0x0041594d, 0x0070009d,
+	0x00402dcf, 0x0070009f, 0x0050009f, 0x00402ac0, 0x00200200, 0x00600008,
+	0x00402a4f, 0x00402ac0, 0x004030cc, 0x00700081, 0x00200000, 0x00600006,
+	0x00700000, 0x00111bfc, 0x00700083, 0x00300000, 0x00216f40, 0x00600007,
+	0x00c00b01, 0x0020001e, 0x00800001, 0x005000cb, 0x00c000ff, 0x00700080,
+	0x00700083, 0x00200047, 0x00600006, 0x0011020a, 0x00200480, 0x00600007,
+	0x00300000, 0x00c000ff, 0x00c800ff, 0x00414907, 0x00202916, 0x008000ff,
+	0x0040508c, 0x005000cb, 0x00a0023f, 0x00200040, 0x00600006, 0x0070000f,
+	0x00170202, 0x0011020a, 0x00200032, 0x0010020d, 0x001c0242, 0x00120302,
+	0x00140402, 0x00180500, 0x00130509, 0x00150550, 0x00110605, 0x0020000f,
+	0x00100607, 0x00110700, 0x00110900, 0x00120902, 0x00110a00, 0x00160b02,
+	0x00120b28, 0x00140b2b, 0x00110c01, 0x00111400, 0x00111405, 0x00111407,
+	0x00111409, 0x0011140b, 0x002000cb, 0x00101500, 0x0040790f, 0x0040794b,
+	0x00214d40, 0x00600007, 0x0020043e, 0x008800ff, 0x0070008f, 0x0040798c,
+	0x005000cb, 0x00000000, 0x0020002b, 0x00101a05, 0x00131c00, 0x00121c04,
+	0x00141c20, 0x00111c25, 0x00131c40, 0x00121c44, 0x00141c60, 0x00111c65,
+	0x00131c80, 0x00121c84, 0x00141ca0, 0x00111ca5, 0x00131cc0, 0x00121cc4,
+	0x00141ce0, 0x00111ce5, 0x00131f00, 0x00191f40, 0x0040a1e0, 0x002001ed,
+	0x00600006, 0x00200044, 0x00102080, 0x001120c6, 0x001520c9, 0x001920d0,
+	0x00122100, 0x00122103, 0x00162200, 0x00122207, 0x00112280, 0x00112300,
+	0x00112302, 0x00122380, 0x0011238b, 0x00112394, 0x0011239c, 0x0040bee1,
+	0x00200254, 0x00600006, 0x00200044, 0x00102480, 0x0040af0f, 0x0040af4b,
+	0x00214d40, 0x00600007, 0x0020043e, 0x008800ff, 0x0070008f, 0x0040af8c,
+	0x005000cb, 0x00000000, 0x001124c6, 0x001524c9, 0x001924d0, 0x00122500,
+	0x00122503, 0x00162600, 0x00122607, 0x00112680, 0x00112700, 0x00112702,
+	0x00122780, 0x0011278b, 0x00112794, 0x0011279c, 0x0040d1e2, 0x002002bb,
+	0x00600006, 0x00200044, 0x00102880, 0x001128c6, 0x001528c9, 0x001928d0,
+	0x00122900, 0x00122903, 0x00162a00, 0x00122a07, 0x00112a80, 0x00112b00,
+	0x00112b02, 0x00122b80, 0x00112b8b, 0x00112b94, 0x00112b9c, 0x0040eee3,
+	0x00200322, 0x00600006, 0x00200044, 0x00102c80, 0x0040df0f, 0x0040df4b,
+	0x00214d40, 0x00600007, 0x0020043e, 0x008800ff, 0x0070008f, 0x0040df8c,
+	0x005000cb, 0x00000000, 0x00112cc6, 0x00152cc9, 0x00192cd0, 0x00122d00,
+	0x00122d03, 0x00162e00, 0x00122e07, 0x00112e80, 0x00112f00, 0x00112f02,
+	0x00122f80, 0x00112f8b, 0x00112f94, 0x00112f9c, 0x004101e4, 0x00200389,
+	0x00600006, 0x00200044, 0x00103080, 0x001130c6, 0x001530c9, 0x001930d0,
+	0x00123100, 0x00123103, 0x00163200, 0x00123207, 0x00113280, 0x00113300,
+	0x00113302, 0x00123380, 0x0011338b, 0x00113394, 0x0011339c, 0x00411ee5,
+	0x002003f0, 0x00600006, 0x00200044, 0x00103480, 0x00410f0f, 0x00410f4b,
+	0x00214d40, 0x00600007, 0x0020043e, 0x008800ff, 0x0070008f, 0x00410f8c,
+	0x005000cb, 0x00000000, 0x001134c6, 0x001534c9, 0x001934d0, 0x00123500,
+	0x00123503, 0x00163600, 0x00123607, 0x00113680, 0x00113700, 0x00113702,
+	0x00123780, 0x0011378b, 0x00113794, 0x0011379c, 0x00000000, 0x0041250f,
+	0x005000cb, 0x00214d40, 0x00600007, 0x0020043e, 0x008800ff, 0x005000cb,
+	0x00412887, 0x0060000a, 0x00000000, 0x00413700, 0x007000a0, 0x00700080,
+	0x00200480, 0x00600007, 0x00200004, 0x00c000ff, 0x008000ff, 0x005000cb,
+	0x00700000, 0x00200000, 0x00600006, 0x00111bfe, 0x0041594d, 0x00700000,
+	0x00200000, 0x00600006, 0x00111bfe, 0x00700080, 0x0070001d, 0x0040114d,
+	0x00700081, 0x00600004, 0x0050004a, 0x00414388, 0x0060000b, 0x00200000,
+	0x00600006, 0x00700000, 0x0041590b, 0x00111bfd, 0x0040424d, 0x00202916,
+	0x008000fd, 0x005000cb, 0x00c00002, 0x00200480, 0x00600007, 0x00200160,
+	0x00800002, 0x005000cb, 0x00c01802, 0x002027b6, 0x00800002, 0x005000cb,
+	0x00404e4d, 0x0060000b, 0x0041574d, 0x00700001, 0x005000cf, 0x00700003,
+	0x00415e06, 0x00415f05, 0x0060000d, 0x00700005, 0x0070000d, 0x00700006,
+	0x0070000b, 0x0070000e, 0x0070001c, 0x0060000c, ~0
+};
+
+static void
+nv50_graph_init_ctxctl(drm_device_t *dev)
+{
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	uint32_t *voodoo;
+
+	DRM_DEBUG("\n");
+
+	switch (dev_priv->chipset) {
+	case 0x84:
+		voodoo = nv84_ctx_voodoo;
+		break;
+	default:
+		DRM_ERROR("no voodoo for chipset NV%02x\n", dev_priv->chipset);
+		break;
+	}
+
+	if (voodoo) {
+		NV_WRITE(NV40_PGRAPH_CTXCTL_UCODE_INDEX, 0);
+		while (*voodoo != ~0) {
+			NV_WRITE(NV40_PGRAPH_CTXCTL_UCODE_DATA, *voodoo);
+			voodoo++;
+		}
+	}
+
+	NV_WRITE(0x400320, 4);
+	NV_WRITE(NV40_PGRAPH_CTXCTL_CUR, 0);
+	NV_WRITE(NV20_PGRAPH_CHANNEL_CTX_POINTER, 0);
+}
+
 int 
 nv50_graph_init(drm_device_t *dev)
 {
+	DRM_DEBUG("\n");
+
 	nv50_graph_init_reset(dev);
+	nv50_graph_init_regs__nv(dev);
+	nv50_graph_init_regs(dev);
+	nv50_graph_init_ctxctl(dev);
 
-	DRM_ERROR("stub!\n");
 	return 0;
 }
 
 void
 nv50_graph_takedown(drm_device_t *dev)
 {
-	DRM_ERROR("stub!\n");
+	DRM_DEBUG("\n");
 }
 
 int
 nv50_graph_create_context(drm_device_t *dev, int channel)
 {
-	DRM_ERROR("stub!\n");
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
+	nouveau_gpuobj_t *ramin = chan->ramin->gpuobj;
+	int grctx_size = 0x60000, hdr;
+	int ret;
+
+	DRM_DEBUG("ch%d\n", channel);
+
+	if ((ret = nouveau_gpuobj_new_ref(dev, channel, -1, 0,
+					  grctx_size, 0x1000,
+					  NVOBJ_FLAG_ZERO_ALLOC |
+					  NVOBJ_FLAG_ZERO_FREE,
+					  &chan->ramin_grctx)))
+		return ret;
+
+	hdr = IS_G80 ? 0x200 : 0x20;
+	INSTANCE_WR(ramin, (hdr + 0x00)/4, 0x00190002);
+	INSTANCE_WR(ramin, (hdr + 0x04)/4, chan->ramin_grctx->instance +
+					   grctx_size - 1);
+	INSTANCE_WR(ramin, (hdr + 0x08)/4, chan->ramin_grctx->instance);
+	INSTANCE_WR(ramin, (hdr + 0x0c)/4, 0);
+	INSTANCE_WR(ramin, (hdr + 0x10)/4, 0);
+	INSTANCE_WR(ramin, (hdr + 0x14)/4, 0x00010000);
+
 	return 0;
 }
 
 void
 nv50_graph_destroy_context(drm_device_t *dev, int channel)
 {
-	DRM_ERROR("stub!\n");
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
+	int i, hdr;
+
+	DRM_DEBUG("ch%d\n", channel);
+
+	hdr = IS_G80 ? 0x200 : 0x20;
+	for (i=hdr; i<hdr+24; i+=4)
+		INSTANCE_WR(chan->ramin->gpuobj, i/4, 0);
+
+	nouveau_gpuobj_ref_del(dev, &chan->ramin_grctx);
+}
+
+static int
+nv50_graph_transfer_context(drm_device_t *dev, uint32_t inst, int save)
+{
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	uint32_t old_cp, tv = 20000;
+	int i;
+
+	DRM_DEBUG("inst=0x%08x, save=%d\n", inst, save);
+
+	old_cp = NV_READ(NV20_PGRAPH_CHANNEL_CTX_POINTER);
+	NV_WRITE(NV20_PGRAPH_CHANNEL_CTX_POINTER, inst | (1<<31));
+	NV_WRITE(0x400824, NV_READ(0x400824) |
+		 (save ? NV40_PGRAPH_CTXCTL_0310_XFER_SAVE :
+		  	 NV40_PGRAPH_CTXCTL_0310_XFER_LOAD));
+	NV_WRITE(NV40_PGRAPH_CTXCTL_0304, NV40_PGRAPH_CTXCTL_0304_XFER_CTX);
+
+	for (i = 0; i < tv; i++) {
+		if (NV_READ(NV40_PGRAPH_CTXCTL_030C) == 0)
+			break;
+	}
+	NV_WRITE(NV20_PGRAPH_CHANNEL_CTX_POINTER, old_cp);
+
+	if (i == tv) {
+		DRM_ERROR("failed: inst=0x%08x save=%d\n", inst, save);
+		DRM_ERROR("0x40030C = 0x%08x\n",
+			  NV_READ(NV40_PGRAPH_CTXCTL_030C));
+		return DRM_ERR(EBUSY);
+	}
+
+	return 0;
 }
 
 int
 nv50_graph_load_context(drm_device_t *dev, int channel)
 {
-	DRM_ERROR("stub!\n");
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
+	uint32_t inst = ((chan->ramin->instance >> 12) | (1<<31));
+	int ret;
+
+	DRM_DEBUG("ch%d\n", channel);
+
+#if 0
+	if ((ret = nv50_graph_transfer_context(dev, inst, 0)))
+		return ret;
+#endif
+
+	NV_WRITE(NV20_PGRAPH_CHANNEL_CTX_POINTER, inst);
+	NV_WRITE(0x400320, 4);
+	NV_WRITE(NV40_PGRAPH_CTXCTL_CUR, inst);
+
 	return 0;
 }
 
 int
 nv50_graph_save_context(drm_device_t *dev, int channel)
 {
-	DRM_ERROR("stub!\n");
-	return 0;
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	struct nouveau_fifo *chan = dev_priv->fifos[channel];
+	uint32_t inst = ((chan->ramin->instance >> 12) | (1<<31));
+
+	DRM_DEBUG("ch%d\n", channel);
+
+	return nv50_graph_transfer_context(dev, inst, 1);
 }
 
diff --git a/shared-core/nv50_instmem.c b/shared-core/nv50_instmem.c
new file mode 100644
index 00000000..4aca9e7d
--- /dev/null
+++ b/shared-core/nv50_instmem.c
@@ -0,0 +1,262 @@
+/*
+ * Copyright (C) 2007 Ben Skeggs.
+ *
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining
+ * a copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sublicense, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial
+ * portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+ * IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE
+ * LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
+ * OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
+ * WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ */
+
+#include "drmP.h"
+#include "drm.h"
+#include "nouveau_drv.h"
+
+typedef struct {
+	uint32_t save1700[5]; /* 0x1700->0x1710 */
+} nv50_instmem_priv;
+
+#define NV50_INSTMEM_PAGE_SHIFT 12
+#define NV50_INSTMEM_PAGE_SIZE  (1 << NV50_INSTMEM_PAGE_SHIFT)
+#define NV50_INSTMEM_RSVD_SIZE	(64 * 1024)
+#define NV50_INSTMEM_PT_SIZE(a)	(((a) >> 12) << 3)
+
+int
+nv50_instmem_init(drm_device_t *dev)
+{
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	nv50_instmem_priv *priv;
+	uint32_t rv, pt, pts, cb, cb0, cb1, unk, as;
+	uint32_t i, v;
+	int ret;
+
+	priv = drm_calloc(1, sizeof(*priv), DRM_MEM_DRIVER);
+	if (!priv)
+		return DRM_ERR(ENOMEM);
+	dev_priv->Engine.instmem.priv = priv;
+
+	/* Save current state */
+	for (i = 0x1700; i <= 0x1710; i+=4)
+		priv->save1700[(i-0x1700)/4] = NV_READ(i);
+
+	as  = dev_priv->ramin->size;
+	rv  = nouveau_mem_fb_amount(dev) - (1*1024*1024);
+	pt  = rv + 0xd0000;
+	pts = NV50_INSTMEM_PT_SIZE(as);
+	cb  = rv + 0xc8000;
+	if ((dev_priv->chipset & 0xf0) != 0x50) {
+		unk = cb + 0x4200;
+		cb0 = cb + 0x4240;
+		cb1 = cb + 0x278;
+	} else {
+		unk = cb + 0x5400;
+		cb0 = cb + 0x5440;
+		cb1 = cb + 0x1438;
+	}
+
+	DRM_DEBUG("PRAMIN config:\n");
+	DRM_DEBUG(" Rsvd VRAM base: 0x%08x\n", rv);
+	DRM_DEBUG("  Aperture size: %i MiB\n", as >> 20);
+	DRM_DEBUG("        PT base: 0x%08x\n", pt);
+	DRM_DEBUG("        PT size: %d KiB\n", pts >> 10);
+	DRM_DEBUG("     BIOS image: 0x%08x\n", (NV_READ(0x619f04)&~0xff)<<8);
+	DRM_DEBUG("    Config base: 0x%08x\n", cb);
+	DRM_DEBUG(" ctxdma Config0: 0x%08x\n", cb0);
+	DRM_DEBUG("        Config1: 0x%08x\n", cb1);
+
+	/* Map first MiB of reserved vram into BAR0 PRAMIN aperture */
+	NV_WRITE(0x1700, (rv>>16));
+	/* Poke some regs.. */
+	NV_WRITE(0x1704, (cb>>12));
+	NV_WRITE(0x1710, (((unk-cb)>>4))|(1<<31));
+	NV_WRITE(0x1704, (cb>>12)|(1<<30));
+
+	/* CB0, some DMA object, NFI what it points at... Needed however,
+	 * or the PRAMIN aperture doesn't operate as expected.
+	 */
+	NV_WRITE(NV_RAMIN + (cb0 - rv) + 0x00, 0x7fc00000);
+	NV_WRITE(NV_RAMIN + (cb0 - rv) + 0x04, 0xe1ffffff);
+	NV_WRITE(NV_RAMIN + (cb0 - rv) + 0x08, 0xe0000000);
+	NV_WRITE(NV_RAMIN + (cb0 - rv) + 0x0c, 0x01000001);
+	NV_WRITE(NV_RAMIN + (cb0 - rv) + 0x10, 0x00000000);
+	NV_WRITE(NV_RAMIN + (cb0 - rv) + 0x14, 0x00000000);
+
+	/* CB1, points at PRAMIN PT */
+	NV_WRITE(NV_RAMIN + (cb1 - rv) + 0, pt | 0x63);
+	NV_WRITE(NV_RAMIN + (cb1 - rv) + 4, 0x00000000);
+
+	/* Zero PRAMIN page table */
+	v  = NV_RAMIN + (pt - rv);
+	for (i = v; i < v + pts; i += 8) {
+		NV_WRITE(i + 0x00, 0x00000009);
+		NV_WRITE(i + 0x04, 0x00000000);
+	}
+
+	/* Map page table into PRAMIN aperture */
+	for (i = pt; i < pt + pts; i += 0x1000) {
+		uint32_t pte = NV_RAMIN + (pt-rv) + (((i-pt) >> 12) << 3);
+		DRM_DEBUG("PRAMIN PTE = 0x%08x @ 0x%08x\n", i, pte);
+		NV_WRITE(pte + 0x00,      i | 1);
+		NV_WRITE(pte + 0x04, 0x00000000);
+	}
+
+	/* Points at CB0 */
+	NV_WRITE(0x170c, (((cb0 - cb)>>4)|(1<<31)));
+
+	/* Confirm it all worked, should be able to read back the page table's
+	 * PTEs from the PRAMIN BAR
+	 */
+	NV_WRITE(0x1700, pt >> 16);
+	if (NV_READ(0x700000) != NV_RI32(0)) {
+		DRM_ERROR("Failed to init PRAMIN page table\n");
+		return DRM_ERR(EINVAL);
+	}
+
+	/* Create a heap to manage PRAMIN aperture allocations */
+	ret = nouveau_mem_init_heap(&dev_priv->ramin_heap, pts, as-pts);
+	if (ret) {
+		DRM_ERROR("Failed to init PRAMIN heap\n");
+		return DRM_ERR(ENOMEM);
+	}
+	DRM_DEBUG("NV50: PRAMIN setup ok\n");
+
+	/* Don't alloc the last MiB of VRAM, probably too much, but be safe
+	 * at least for now.
+	 */
+	dev_priv->ramin_rsvd_vram = 1*1024*1024;
+
+	/*XXX: probably incorrect, but needed to make hash func "work" */
+	dev_priv->ramht_offset = 0x10000;
+	dev_priv->ramht_bits   = 9;
+	dev_priv->ramht_size   = (1 << dev_priv->ramht_bits);
+	return 0;
+}
+
+void
+nv50_instmem_takedown(drm_device_t *dev)
+{
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	nv50_instmem_priv *priv = dev_priv->Engine.instmem.priv;
+	int i;
+
+	if (!priv)
+		return;
+
+	/* Restore state from before init */
+	for (i = 0x1700; i <= 0x1710; i+=4)
+		NV_WRITE(i, priv->save1700[(i-0x1700)/4]);
+
+	dev_priv->Engine.instmem.priv = NULL;
+	drm_free(priv, sizeof(*priv), DRM_MEM_DRIVER);
+}
+
+int
+nv50_instmem_populate(drm_device_t *dev, nouveau_gpuobj_t *gpuobj, uint32_t *sz)
+{
+	if (gpuobj->im_backing)
+		return DRM_ERR(EINVAL);
+
+	*sz = (*sz + (NV50_INSTMEM_PAGE_SIZE-1)) & ~(NV50_INSTMEM_PAGE_SIZE-1);
+	if (*sz == 0)
+		return DRM_ERR(EINVAL);
+
+	gpuobj->im_backing = nouveau_mem_alloc(dev, NV50_INSTMEM_PAGE_SIZE,
+					       *sz, NOUVEAU_MEM_FB,
+					       (DRMFILE)-2);
+	if (!gpuobj->im_backing) {
+		DRM_ERROR("Couldn't allocate vram to back PRAMIN pages\n");
+		return DRM_ERR(ENOMEM);
+	}
+
+	return 0;
+}
+
+void
+nv50_instmem_clear(drm_device_t *dev, nouveau_gpuobj_t *gpuobj)
+{
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+
+	if (gpuobj && gpuobj->im_backing) {
+		if (gpuobj->im_bound)
+			dev_priv->Engine.instmem.unbind(dev, gpuobj);
+		nouveau_mem_free(dev, gpuobj->im_backing);
+		gpuobj->im_backing = NULL;
+	}	
+}
+
+int
+nv50_instmem_bind(drm_device_t *dev, nouveau_gpuobj_t *gpuobj)
+{
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	uint32_t pte, pte_end, vram;
+
+	if (!gpuobj->im_backing || !gpuobj->im_pramin || gpuobj->im_bound)
+		return DRM_ERR(EINVAL);
+
+	DRM_DEBUG("st=0x%0llx sz=0x%0llx\n",
+		  gpuobj->im_pramin->start, gpuobj->im_pramin->size);
+
+	pte     = (gpuobj->im_pramin->start >> 12) << 3;
+	pte_end = ((gpuobj->im_pramin->size >> 12) << 3) + pte;
+	vram    = gpuobj->im_backing->start - dev_priv->fb_phys;
+
+	if (pte == pte_end) {
+		DRM_ERROR("WARNING: badness in bind() pte calc\n");
+		pte_end++;
+	}
+
+	DRM_DEBUG("pramin=0x%llx, pte=%d, pte_end=%d\n",
+		  gpuobj->im_pramin->start, pte, pte_end);
+	DRM_DEBUG("first vram page: 0x%llx\n",
+		  gpuobj->im_backing->start);
+
+	while (pte < pte_end) {
+		NV_WI32(pte + 0, vram | 1);
+		NV_WI32(pte + 4, 0x00000000);
+
+		pte += 8;
+		vram += NV50_INSTMEM_PAGE_SIZE;
+	}
+
+	gpuobj->im_bound = 1;
+	return 0;
+}
+
+int
+nv50_instmem_unbind(drm_device_t *dev, nouveau_gpuobj_t *gpuobj)
+{
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	uint32_t pte, pte_end;
+
+	if (gpuobj->im_bound == 0)
+		return DRM_ERR(EINVAL);
+
+	pte     = (gpuobj->im_pramin->start >> 12) << 3;
+	pte_end = ((gpuobj->im_pramin->size >> 12) << 3) + pte;
+	while (pte < pte_end) {
+		NV_WI32(pte + 0, 0x00000000);
+		NV_WI32(pte + 4, 0x00000000);
+		pte += 8;
+	}
+
+	gpuobj->im_bound = 0;
+	return 0;
+}
+
-- 
cgit v1.2.3