12 files changed, 844 insertions, 488 deletions
diff --git a/shared-core/nouveau_drv.h b/shared-core/nouveau_drv.h
index 3cca07fc..73793b34 100644
--- a/shared-core/nouveau_drv.h
+++ b/shared-core/nouveau_drv.h
@@ -57,18 +57,38 @@ enum nouveau_flags {
 	NV_NFORCE2  =0x20000000
 };
 
-struct nouveau_object
-{
-	struct nouveau_object *next;
-	struct nouveau_object *prev;
-	int channel;
+#define NVOBJ_ENGINE_SW		0
+#define NVOBJ_ENGINE_GR  	1
+#define NVOBJ_ENGINE_INT	0xdeadbeef
 
-	struct mem_block *instance;
+#define NVOBJ_FLAG_ALLOW_NO_REFS	(1 << 0)
+#define NVOBJ_FLAG_ZERO_ALLOC		(1 << 1)
+#define NVOBJ_FLAG_ZERO_FREE		(1 << 2)
+#define NVOBJ_FLAG_FAKE			(1 << 3)
+typedef struct nouveau_gpuobj {
+	struct nouveau_gpuobj *next;
+	struct nouveau_gpuobj *prev;
 
-	uint32_t handle;
-	int      class;
-	int      engine;
-};
+	int im_channel;
+	struct mem_block *im_pramin;
+	struct mem_block *im_backing;
+
+	uint32_t flags;
+	int refcount;
+
+	uint32_t engine;
+	uint32_t class;
+} nouveau_gpuobj_t;
+
+typedef struct nouveau_gpuobj_ref {
+	struct nouveau_gpuobj_ref *next;
+
+	nouveau_gpuobj_t *gpuobj;
+	uint32_t instance;
+
+	int channel;
+	int handle;
+} nouveau_gpuobj_ref_t;
 
 struct nouveau_fifo
 {
@@ -79,21 +99,29 @@ struct nouveau_fifo
 	drm_local_map_t *map;
 	/* mapping of the regs controling the fifo */
 	drm_local_map_t *regs;
-	/* dma object for the command buffer itself */
-	struct mem_block      *cmdbuf_mem;
-	struct nouveau_object *cmdbuf_obj;
-	uint32_t pushbuf_base;
-	/* notifier memory */
+
+	/* DMA push buffer */
+	struct mem_block     *cmdbuf_mem;
+	nouveau_gpuobj_ref_t *pushbuf;
+	uint32_t              pushbuf_base;
+
+	/* Notifier memory */
 	struct mem_block *notifier_block;
 	struct mem_block *notifier_heap;
 	drm_local_map_t  *notifier_map;
-	/* PGRAPH context, for cards that keep it in RAMIN */
-	struct mem_block *ramin_grctx;
-	/* objects belonging to this fifo */
-	struct nouveau_object *objs;
 
-	/* XXX dynamic alloc ? */
-	uint32_t pgraph_ctx [340];
+	/* PFIFO context */
+	nouveau_gpuobj_ref_t *ramfc;
+
+	/* PGRAPH context */
+	nouveau_gpuobj_ref_t *ramin_grctx;
+	uint32_t pgraph_ctx [340]; /* XXX dynamic alloc ? */
+
+	/* Objects */
+	nouveau_gpuobj_ref_t *ramin; /* Private instmem */
+	struct mem_block     *ramin_heap; /* Private PRAMIN heap */
+	nouveau_gpuobj_ref_t *ramht; /* Hash table */
+	nouveau_gpuobj_ref_t *ramht_refs; /* Objects referenced by RAMHT */
 };
 
 struct nouveau_config {
@@ -157,6 +185,7 @@ typedef struct drm_nouveau_private {
 	struct nouveau_engine_func Engine;
 
 	/* RAMIN configuration, RAMFC, RAMHT and RAMRO offsets */
+	nouveau_gpuobj_t *ramht;
 	uint32_t ramin_size;
 	uint32_t ramht_offset;
 	uint32_t ramht_size;
@@ -182,9 +211,11 @@ typedef struct drm_nouveau_private {
 
         /* context table pointed to be NV_PGRAPH_CHANNEL_CTX_TABLE (0x400780) */
         uint32_t ctx_table_size;
-        struct mem_block *ctx_table;
+	nouveau_gpuobj_ref_t *ctx_table;
 
 	struct nouveau_config config;
+
+	nouveau_gpuobj_t *gpuobj_all;
 }
 drm_nouveau_private_t;
 
@@ -205,6 +236,7 @@ extern int               nouveau_mem_init_heap(struct mem_block **,
 extern struct mem_block *nouveau_mem_alloc_block(struct mem_block *,
 						 uint64_t size, int align2,
 						 DRMFILE);
+extern void              nouveau_mem_takedown(struct mem_block **heap);
 extern void              nouveau_mem_free_block(struct mem_block *);
 extern uint64_t          nouveau_mem_fb_amount(struct drm_device *dev);
 extern void              nouveau_mem_release(DRMFILE filp, struct mem_block *heap);
@@ -236,22 +268,28 @@ extern int  nouveau_fifo_owner(drm_device_t *dev, DRMFILE filp, int channel);
 extern void nouveau_fifo_free(drm_device_t *dev, int channel);
 
 /* nouveau_object.c */
-extern int  nouveau_object_init_channel(drm_device_t *, int channel,
-					uint32_t vram_handle,
-					uint32_t tt_handle);
-extern void nouveau_object_takedown_channel(drm_device_t *dev, int channel);
-extern void nouveau_object_cleanup(drm_device_t *dev, int channel);
-extern int  nouveau_ramht_insert(drm_device_t *, int channel,
-				 uint32_t handle, struct nouveau_object *);
-extern struct nouveau_object *
-nouveau_object_gr_create(drm_device_t *dev, int channel, int class);
-extern struct nouveau_object *
-nouveau_object_dma_create(drm_device_t *dev, int channel, int class,
-			  uint32_t offset, uint32_t size,
-			  int access, int target);
-extern void nouveau_object_free(drm_device_t *dev, struct nouveau_object *obj);
-extern int  nouveau_ioctl_grobj_alloc(DRM_IOCTL_ARGS);
-extern uint32_t nouveau_chip_instance_get(drm_device_t *dev, struct mem_block *mem);
+extern void nouveau_gpuobj_takedown(drm_device_t *dev);
+extern int nouveau_gpuobj_channel_init(drm_device_t *, int channel,
+				       uint32_t vram_h, uint32_t tt_h);
+extern void nouveau_gpuobj_channel_takedown(drm_device_t *, int channel);
+extern int nouveau_gpuobj_new(drm_device_t *, int channel, int size, int align,
+			      uint32_t flags, nouveau_gpuobj_t **);
+extern int nouveau_gpuobj_del(drm_device_t *, nouveau_gpuobj_t **);
+extern int nouveau_gpuobj_ref_add(drm_device_t *, int channel, uint32_t handle,
+				  nouveau_gpuobj_t *, nouveau_gpuobj_ref_t **);
+extern int nouveau_gpuobj_ref_del(drm_device_t *, nouveau_gpuobj_ref_t **);
+extern int nouveau_gpuobj_new_ref(drm_device_t *, int chan_obj, int chan_ref,
+				  uint32_t handle, int size, int align,
+				  uint32_t flags, nouveau_gpuobj_ref_t **);
+extern int nouveau_gpuobj_new_fake(drm_device_t *, uint32_t offset,
+				   uint32_t size, uint32_t flags,
+				   nouveau_gpuobj_t**, nouveau_gpuobj_ref_t**);
+extern int nouveau_gpuobj_dma_new(drm_device_t *, int channel, int class,
+				  uint64_t offset, uint64_t size,
+				  int access, int target, nouveau_gpuobj_t **);
+extern int nouveau_gpuobj_gr_new(drm_device_t *, int channel, int class,
+				 nouveau_gpuobj_t **);
+extern int nouveau_ioctl_grobj_alloc(DRM_IOCTL_ARGS);
 
 /* nouveau_irq.c */
 extern irqreturn_t nouveau_irq_handler(DRM_IRQ_ARGS);
@@ -384,8 +422,8 @@ extern long nouveau_compat_ioctl(struct file *filp, unsigned int cmd,
 #define NV_WI32(o,v) DRM_WRITE32(dev_priv->ramin, (o), (v))
 #endif
 
-#define INSTANCE_RD(o,i) NV_RI32((o)->start + ((i)<<2))
-#define INSTANCE_WR(o,i,v) NV_WI32((o)->start + ((i)<<2), (v))
+#define INSTANCE_RD(o,i) NV_RI32((o)->im_pramin->start + ((i)<<2))
+#define INSTANCE_WR(o,i,v) NV_WI32((o)->im_pramin->start + ((i)<<2), (v))
 
 #endif /* __NOUVEAU_DRV_H__ */
 
diff --git a/shared-core/nouveau_fifo.c b/shared-core/nouveau_fifo.c
index 81dbfcda..9f916307 100644
--- a/shared-core/nouveau_fifo.c
+++ b/shared-core/nouveau_fifo.c
@@ -186,10 +186,12 @@ static int
 nouveau_fifo_cmdbuf_alloc(struct drm_device *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
 	struct nouveau_config *config = &dev_priv->config;
 	struct mem_block *cb;
-	struct nouveau_object *cb_dma = NULL;
 	int cb_min_size = max(NV03_FIFO_SIZE,PAGE_SIZE);
+	nouveau_gpuobj_t *pushbuf = NULL;
+	int ret;
 
 	/* Defaults for unconfigured values */
 	if (!config->cmdbuf.location)
@@ -206,37 +208,42 @@ nouveau_fifo_cmdbuf_alloc(struct drm_device *dev, int channel)
 	}
 
 	if (cb->flags & NOUVEAU_MEM_AGP) {
-		cb_dma = nouveau_object_dma_create(dev, channel,
-				NV_CLASS_DMA_IN_MEMORY,
-				cb->start - dev_priv->agp_phys,
-				cb->size,
-				NV_DMA_ACCESS_RO, NV_DMA_TARGET_AGP);
+		ret = nouveau_gpuobj_dma_new
+			(dev, channel, NV_CLASS_DMA_IN_MEMORY,
+			 cb->start - dev_priv->agp_phys,
+			 cb->size, NV_DMA_ACCESS_RO, NV_DMA_TARGET_AGP,
+			 &pushbuf);
 	} else if (dev_priv->card_type != NV_04) {
-		cb_dma = nouveau_object_dma_create(dev, channel,
-				NV_CLASS_DMA_IN_MEMORY,
-				cb->start - drm_get_resource_start(dev, 1),
-				cb->size,
-				NV_DMA_ACCESS_RO, NV_DMA_TARGET_VIDMEM);
+		ret = nouveau_gpuobj_dma_new
+			(dev, channel, NV_CLASS_DMA_IN_MEMORY,
+			 cb->start - drm_get_resource_start(dev, 1),
+			 cb->size, NV_DMA_ACCESS_RO, NV_DMA_TARGET_VIDMEM,
+			 &pushbuf);
 	} else {
 		/* NV04 cmdbuf hack, from original ddx.. not sure of it's
 		 * exact reason for existing :)  PCI access to cmdbuf in
 		 * VRAM.
 		 */
-		cb_dma = nouveau_object_dma_create(dev, channel,
-				NV_CLASS_DMA_IN_MEMORY,
-				cb->start, cb->size,
-				NV_DMA_ACCESS_RO, NV_DMA_TARGET_PCI);
+		ret = nouveau_gpuobj_dma_new
+			(dev, channel, NV_CLASS_DMA_IN_MEMORY,
+			 cb->start, cb->size, NV_DMA_ACCESS_RO,
+			 NV_DMA_TARGET_PCI, &pushbuf);
 	}
 
-	if (!cb_dma) {
+	if (ret) {
 		nouveau_mem_free(dev, cb);
-		DRM_ERROR("Failed to alloc DMA object for command buffer\n");
-		return DRM_ERR(ENOMEM);
+		DRM_ERROR("Error creating push buffer ctxdma: %d\n", ret);
+		return ret;
+	}
+
+	if ((ret = nouveau_gpuobj_ref_add(dev, channel, 0, pushbuf,
+					  &chan->pushbuf))) {
+		DRM_ERROR("Error referencing push buffer ctxdma: %d\n", ret);
+		return ret;
 	}
 
 	dev_priv->fifos[channel].pushbuf_base = 0;
 	dev_priv->fifos[channel].cmdbuf_mem = cb;
-	dev_priv->fifos[channel].cmdbuf_obj = cb_dma;
 	return 0;
 }
 
@@ -266,6 +273,7 @@ int nouveau_fifo_alloc(drm_device_t* dev, int *chan_ret, DRMFILE filp,
 		return DRM_ERR(EINVAL);
 	(*chan_ret) = channel;
 	chan = &dev_priv->fifos[channel];
+	memset(chan, sizeof(*chan), 0);
 
 	DRM_INFO("Allocating FIFO number %d\n", channel);
 
@@ -273,18 +281,15 @@ int nouveau_fifo_alloc(drm_device_t* dev, int *chan_ret, DRMFILE filp,
 	chan->used = 1;
 	chan->filp = filp;
 
-	/* FIFO has no objects yet */
-	chan->objs = NULL;
-
-	/* allocate a command buffer, and create a dma object for the gpu */
-	ret = nouveau_fifo_cmdbuf_alloc(dev, channel);
+	/* Setup channel's default objects */
+	ret = nouveau_gpuobj_channel_init(dev, channel, vram_handle, tt_handle);
 	if (ret) {
 		nouveau_fifo_free(dev, channel);
 		return ret;
 	}
 
-	/* Setup channel's default objects */
-	ret = nouveau_object_init_channel(dev, channel, vram_handle, tt_handle);
+	/* allocate a command buffer, and create a dma object for the gpu */
+	ret = nouveau_fifo_cmdbuf_alloc(dev, channel);
 	if (ret) {
 		nouveau_fifo_free(dev, channel);
 		return ret;
@@ -395,13 +400,18 @@ void nouveau_fifo_free(drm_device_t* dev, int channel)
 	NV_WRITE(NV03_PFIFO_CACHES, 0x00000001);
 
 	/* Deallocate command buffer */
-	if (chan->cmdbuf_mem)
+	if (chan->pushbuf)
+		nouveau_gpuobj_ref_del(dev, &chan->pushbuf);
+
+	if (chan->cmdbuf_mem) {
 		nouveau_mem_free(dev, chan->cmdbuf_mem);
+		chan->cmdbuf_mem = NULL;
+	}
 
 	nouveau_notifier_takedown_channel(dev, channel);
 
 	/* Destroy objects belonging to the channel */
-	nouveau_object_cleanup(dev, channel);
+	nouveau_gpuobj_channel_takedown(dev, channel);
 
 	dev_priv->fifo_alloc_count--;
 }
diff --git a/shared-core/nouveau_mem.c b/shared-core/nouveau_mem.c
index d8ae52b7..49041862 100644
--- a/shared-core/nouveau_mem.c
+++ b/shared-core/nouveau_mem.c
@@ -189,7 +189,7 @@ void nouveau_mem_release(DRMFILE filp, struct mem_block *heap)
 /* 
  * Cleanup everything
  */
-static void nouveau_mem_takedown(struct mem_block **heap)
+void nouveau_mem_takedown(struct mem_block **heap)
 {
 	struct mem_block *p;
 
@@ -554,6 +554,13 @@ int nouveau_instmem_init(struct drm_device *dev)
 	nouveau_instmem_determine_amount(dev);
 	nouveau_instmem_configure_fixed_tables(dev);
 
+	if ((ret = nouveau_gpuobj_new_fake(dev, dev_priv->ramht_offset,
+						dev_priv->ramht_size,
+						NVOBJ_FLAG_ZERO_ALLOC |
+						NVOBJ_FLAG_ALLOW_NO_REFS,
+						&dev_priv->ramht, NULL)))
+		return ret;
+
 	/* Create a heap to manage RAMIN allocations, we don't allocate
 	 * the space that was reserved for RAMHT/FC/RO.
 	 */
diff --git a/shared-core/nouveau_notifier.c b/shared-core/nouveau_notifier.c
index 0cfe733e..4d5e26ab 100644
--- a/shared-core/nouveau_notifier.c
+++ b/shared-core/nouveau_notifier.c
@@ -74,10 +74,10 @@ nouveau_notifier_alloc(drm_device_t *dev, int channel, uint32_t handle,
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
 	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
-	struct nouveau_object *obj;
+	nouveau_gpuobj_t *nobj = NULL;
 	struct mem_block *mem;
 	uint32_t offset;
-	int target;
+	int target, ret;
 
 	if (!chan->notifier_heap) {
 		DRM_ERROR("Channel %d doesn't have a notifier heap!\n",
@@ -105,21 +105,19 @@ nouveau_notifier_alloc(drm_device_t *dev, int channel, uint32_t handle,
 		return DRM_ERR(EINVAL);
 	}
 
-	obj = nouveau_object_dma_create(dev, channel, NV_CLASS_DMA_IN_MEMORY,
-					offset, mem->size, NV_DMA_ACCESS_RW,
-					target);
-	if (!obj) {
+	if ((ret = nouveau_gpuobj_dma_new(dev, channel, NV_CLASS_DMA_IN_MEMORY,
+					  offset, mem->size,
+					  NV_DMA_ACCESS_RW, target, &nobj))) {
 		nouveau_mem_free_block(mem);
-		DRM_ERROR("Error creating notifier ctxdma\n");
-		return DRM_ERR(ENOMEM);
+		DRM_ERROR("Error creating notifier ctxdma: %d\n", ret);
+		return ret;
 	}
 
-	obj->handle = handle;
-	if (nouveau_ramht_insert(dev, channel, handle, obj)) {
-		nouveau_object_free(dev, obj);
+	if ((ret = nouveau_gpuobj_ref_add(dev, channel, handle, nobj, NULL))) {
+		nouveau_gpuobj_del(dev, &nobj);
 		nouveau_mem_free_block(mem);
-		DRM_ERROR("Error inserting notifier ctxdma into RAMHT\n");
-		return DRM_ERR(ENOMEM);
+		DRM_ERROR("Error referencing notifier ctxdma: %d\n", ret);
+		return ret;
 	}
 
 	*b_offset = mem->start;
diff --git a/shared-core/nouveau_object.c b/shared-core/nouveau_object.c
index dac08df4..79875ca1 100644
--- a/shared-core/nouveau_object.c
+++ b/shared-core/nouveau_object.c
@@ -35,79 +35,6 @@
 #include "nouveau_drv.h"
 #include "nouveau_drm.h"
 
-/* TODO
- *  - Check object class, deny unsafe objects (add card-specific versioning?)
- *  - Get rid of DMA object creation, this should be wrapped by MM routines.
- */
-
-/* Translate a RAMIN offset into a value the card understands, will be useful
- * in the future when we can access more instance ram which isn't mapped into
- * the PRAMIN aperture
- */
-uint32_t
-nouveau_chip_instance_get(drm_device_t *dev, struct mem_block *mem)
-{
-	uint32_t inst = (uint32_t)mem->start >> 4;
-	DRM_DEBUG("****** on-chip instance for 0x%016llx = 0x%08x\n",
-			mem->start, inst);
-	return inst;
-}
-
-static void
-nouveau_object_link(drm_device_t *dev, struct nouveau_object *obj)
-{
-	drm_nouveau_private_t *dev_priv=dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[obj->channel];
-
-	if (!chan->objs) {
-		chan->objs = obj;
-		return;
-	}
-
-	obj->prev = NULL;
-	obj->next = chan->objs;
-
-	chan->objs->prev = obj;
-	chan->objs = obj;
-}
-
-static void
-nouveau_object_unlink(drm_device_t *dev, struct nouveau_object *obj)
-{
-	drm_nouveau_private_t *dev_priv=dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[obj->channel];
-
-	if (obj->prev == NULL) {	
-		if (obj->next)
-			obj->next->prev = NULL;
-		chan->objs = obj->next;
-	} else if (obj->next == NULL) {
-		if (obj->prev)
-			obj->prev->next = NULL;
-	} else {
-		obj->prev->next = obj->next;
-		obj->next->prev = obj->prev;
-	}
-}
-
-static struct nouveau_object *
-nouveau_object_handle_find(drm_device_t *dev, int channel, uint32_t handle)
-{
-	drm_nouveau_private_t *dev_priv=dev->dev_private;
-	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
-	struct nouveau_object *obj = chan->objs;
-
-	DRM_DEBUG("Looking for handle 0x%08x\n", handle);
-	while (obj) {
-		if (obj->handle == handle)
-			return obj;
-		obj = obj->next;
-	}
-
-	DRM_DEBUG("...couldn't find handle\n");
-	return NULL;
-}
-
 /* NVidia uses context objects to drive drawing operations.
 
    Context objects can be selected into 8 subchannels in the FIFO,
@@ -150,146 +77,439 @@ nouveau_ramht_hash_handle(drm_device_t *dev, int channel, uint32_t handle)
 		handle >>= dev_priv->ramht_bits;
 	}
 	hash ^= channel << (dev_priv->ramht_bits - 4);
-	return hash << 3;
+	hash <<= 3;
+
+	DRM_DEBUG("ch%d handle=0x%08x hash=0x%08x\n", channel, handle, hash);
+	return hash;
 }
 
 static int
-nouveau_ramht_entry_valid(drm_device_t *dev, uint32_t ramht, uint32_t offset)
+nouveau_ramht_entry_valid(drm_device_t *dev, nouveau_gpuobj_t *ramht,
+			  uint32_t offset)
 {
 	drm_nouveau_private_t *dev_priv=dev->dev_private;
-	uint32_t ctx = NV_RI32(ramht + offset + 4);
+	uint32_t ctx = INSTANCE_RD(ramht, (offset + 4)/4);
 
 	if (dev_priv->card_type < NV_40)
 		return ((ctx & NV_RAMHT_CONTEXT_VALID) != 0);
 	return (ctx != 0);
 }
 
-int
-nouveau_ramht_insert(drm_device_t* dev, int channel, uint32_t handle,
-		     struct nouveau_object *obj)
+static int
+nouveau_ramht_insert(drm_device_t* dev, nouveau_gpuobj_ref_t *ref)
 {
 	drm_nouveau_private_t *dev_priv=dev->dev_private;
-	uint32_t ramht = dev_priv->ramht_offset;
+	struct nouveau_fifo *chan = &dev_priv->fifos[ref->channel];
+	nouveau_gpuobj_t *ramht = chan->ramht ? chan->ramht->gpuobj : NULL;
+	nouveau_gpuobj_t *gpuobj = ref->gpuobj;
 	uint32_t ctx, co, ho;
-	uint32_t inst;
 
-	inst = nouveau_chip_instance_get(dev, obj->instance);
+	if (!ramht) {
+		DRM_ERROR("No hash table!\n");
+		return DRM_ERR(EINVAL);
+	}
+
 	if (dev_priv->card_type < NV_40) {
-		ctx = NV_RAMHT_CONTEXT_VALID | inst |
-		      (channel     << NV_RAMHT_CONTEXT_CHANNEL_SHIFT) |
-		      (obj->engine << NV_RAMHT_CONTEXT_ENGINE_SHIFT);
+		ctx = NV_RAMHT_CONTEXT_VALID | (ref->instance >> 4) |
+		      (ref->channel   << NV_RAMHT_CONTEXT_CHANNEL_SHIFT) |
+		      (gpuobj->engine << NV_RAMHT_CONTEXT_ENGINE_SHIFT);
 	} else
 	if (dev_priv->card_type < NV_50) {
-		ctx = inst |
-		      (channel     << NV40_RAMHT_CONTEXT_CHANNEL_SHIFT) |
-		      (obj->engine << NV40_RAMHT_CONTEXT_ENGINE_SHIFT);
+		ctx = (ref->instance >> 4) |
+		      (ref->channel   << NV40_RAMHT_CONTEXT_CHANNEL_SHIFT) |
+		      (gpuobj->engine << NV40_RAMHT_CONTEXT_ENGINE_SHIFT);
 	} else {
-		ctx = inst |
-		      (obj->engine << NV40_RAMHT_CONTEXT_ENGINE_SHIFT);
+		ctx = (ref->instance  >> 4) |
+		      (gpuobj->engine << NV40_RAMHT_CONTEXT_ENGINE_SHIFT);
 	}
 
-	co = ho = nouveau_ramht_hash_handle(dev, channel, handle);
+	co = ho = nouveau_ramht_hash_handle(dev, ref->channel, ref->handle);
 	do {
 		if (!nouveau_ramht_entry_valid(dev, ramht, co)) {
 			DRM_DEBUG("insert ch%d 0x%08x: h=0x%08x, c=0x%08x\n",
-				  channel, co, handle, ctx);
-			NV_WI32(ramht + co + 0, handle);
-			NV_WI32(ramht + co + 4, ctx);
-			obj->handle = handle;
+				  ref->channel, co, ref->handle, ctx);
+			INSTANCE_WR(ramht, (co + 0)/4, ref->handle);
+			INSTANCE_WR(ramht, (co + 4)/4, ctx);
 			return 0;
 		}
 		DRM_DEBUG("collision ch%d 0x%08x: h=0x%08x\n",
-			  channel, co, NV_RI32(ramht + co));
+			  ref->channel, co, INSTANCE_RD(ramht, co/4));
 
 		co += 8;
-		if (co == dev_priv->ramht_size)
+		if (co >= dev_priv->ramht_size)
 			co = 0;
 	} while (co != ho);
 
-	DRM_ERROR("RAMHT space exhausted. ch=%d\n", channel);
+	DRM_ERROR("RAMHT space exhausted. ch=%d\n", ref->channel);
 	return DRM_ERR(ENOMEM);
 }
 
 static void
-nouveau_ramht_remove(drm_device_t* dev, struct nouveau_object *obj)
+nouveau_ramht_remove(drm_device_t* dev, nouveau_gpuobj_ref_t *ref)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	uint32_t ramht = dev_priv->ramht_offset;
+	struct nouveau_fifo *chan = &dev_priv->fifos[ref->channel];
+	nouveau_gpuobj_t *ramht = chan->ramht ? chan->ramht->gpuobj : NULL;
 	uint32_t co, ho;
 
-	co = ho = nouveau_ramht_hash_handle(dev, obj->channel, obj->handle);
+	if (!ramht) {
+		DRM_ERROR("No hash table!\n");
+		return;
+	}
+
+	co = ho = nouveau_ramht_hash_handle(dev, ref->channel, ref->handle);
 	do {
 		if (nouveau_ramht_entry_valid(dev, ramht, co) &&
-		    (obj->handle == NV_RI32(ramht + co))) {
+		    (ref->handle == INSTANCE_RD(ramht, (co/4)))) {
 			DRM_DEBUG("remove ch%d 0x%08x: h=0x%08x, c=0x%08x\n",
-				  obj->channel, co, obj->handle,
-				  NV_RI32(ramht + co + 4));
-			NV_WI32(ramht + co + 0, 0x00000000);
-			NV_WI32(ramht + co + 4, 0x00000000);
-			obj->handle = ~0;
+				  ref->channel, co, ref->handle,
+				  INSTANCE_RD(ramht, (co + 4)));
+			INSTANCE_WR(ramht, (co + 0)/4, 0x00000000);
+			INSTANCE_WR(ramht, (co + 4)/4, 0x00000000);
 			return;
 		}
 
 		co += 8;
-		if (co == dev_priv->ramht_size)
+		if (co >= dev_priv->ramht_size)
 			co = 0;
 	} while (co != ho);
 
 	DRM_ERROR("RAMHT entry not found. ch=%d, handle=0x%08x\n",
-		  obj->channel, obj->handle);
+		  ref->channel, ref->handle);
 }
 
-static struct nouveau_object *
-nouveau_object_instance_alloc(drm_device_t* dev, int channel)
+int
+nouveau_gpuobj_new(drm_device_t *dev, int channel, int size, int align,
+		   uint32_t flags, nouveau_gpuobj_t **gpuobj_ret)
 {
-	drm_nouveau_private_t *dev_priv=dev->dev_private;
-	struct nouveau_object       *obj;
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	struct nouveau_fifo *chan = NULL;
+	nouveau_gpuobj_t *gpuobj;
+	struct mem_block *pramin = NULL;
+
+	DRM_DEBUG("ch%d size=%d align=%d flags=0x%08x\n",
+		  channel, size, align, flags);
 
-	/* Create object struct */
-	obj = drm_calloc(1, sizeof(struct nouveau_object), DRM_MEM_DRIVER);
-	if (!obj) {
-		DRM_ERROR("couldn't alloc memory for object\n");
-		return NULL;
+	if (!dev_priv || !gpuobj_ret || *gpuobj_ret != NULL)
+		return DRM_ERR(EINVAL);
+
+	if (channel >= 0) {
+		if (channel > nouveau_fifo_number(dev))
+			return DRM_ERR(EINVAL);
+		chan = &dev_priv->fifos[channel];
 	}
 
-	/* Allocate instance memory */
-	obj->instance  = nouveau_instmem_alloc(dev,
-			(dev_priv->card_type >= NV_40 ? 32 : 16), 4);
-	if (!obj->instance) {
-		DRM_ERROR("couldn't alloc RAMIN for object\n");
-		drm_free(obj, sizeof(struct nouveau_object), DRM_MEM_DRIVER);
-		return NULL;
+	gpuobj = drm_calloc(1, sizeof(*gpuobj), DRM_MEM_DRIVER);
+	if (!gpuobj)
+		return DRM_ERR(ENOMEM);
+	DRM_DEBUG("gpuobj %p\n", gpuobj);
+	gpuobj->flags = flags;
+	gpuobj->im_channel = channel;
+
+	/* Choose between global instmem heap, and per-channel private
+	 * instmem heap.  On <NV50 allow requests for private instmem
+	 * to be satisfied from global heap if no per-channel area
+	 * available.
+	 */
+	if (chan) {
+		if (chan->ramin_heap) {
+			DRM_DEBUG("private heap\n");
+			pramin = chan->ramin_heap;
+		} else
+		if (dev_priv->card_type < NV_50) {
+			DRM_DEBUG("global heap fallback\n");
+			pramin = dev_priv->ramin_heap;
+		}
+	} else {
+		DRM_DEBUG("global heap\n");
+		pramin = dev_priv->ramin_heap;
+	}
+
+	if (!pramin) {
+		DRM_ERROR("No PRAMIN heap!\n");
+		return DRM_ERR(EINVAL);
 	}
 
-	/* Bind object to channel */
-	obj->channel = channel;
-	obj->handle  = ~0;
-	nouveau_object_link(dev, obj);
+	/* Allocate a chunk of the PRAMIN aperture */
+	gpuobj->im_pramin = nouveau_mem_alloc_block(pramin, size,
+						    drm_order(align),
+						    (DRMFILE)-2);
+	if (!gpuobj->im_pramin) {
+		nouveau_gpuobj_del(dev, &gpuobj);
+		return DRM_ERR(ENOMEM);
+	}
+	gpuobj->im_pramin->flags = NOUVEAU_MEM_INSTANCE;
+
+	/* On NV50 the PRAMIN aperture is paged.  When allocating from the
+	 * global instmem heap, alloc and bind VRAM pages into the PRAMIN
+	 * aperture.
+	 */
+	if (!chan && dev_priv->card_type >= NV_50) {
+		DRM_ERROR("back aperture with vram pages\n");
+		nouveau_gpuobj_del(dev, &gpuobj);
+		return DRM_ERR(EINVAL);
+	}
 
-	return obj;
+	if (gpuobj->flags & NVOBJ_FLAG_ZERO_ALLOC) {
+		int i;
+
+		for (i = 0; i < gpuobj->im_pramin->size; i += 4)
+			INSTANCE_WR(gpuobj, i/4, 0);
+	}
+
+	if (dev_priv->gpuobj_all) {
+		gpuobj->next = dev_priv->gpuobj_all;
+		gpuobj->next->prev = gpuobj;
+	}
+	dev_priv->gpuobj_all = gpuobj;
+
+	*gpuobj_ret = gpuobj;
+	return 0;
 }
 
-static void
-nouveau_object_instance_free(drm_device_t *dev, struct nouveau_object *obj)
+void nouveau_gpuobj_takedown(drm_device_t *dev)
 {
-	drm_nouveau_private_t *dev_priv=dev->dev_private;
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	nouveau_gpuobj_t *gpuobj = NULL;
+
+	DRM_DEBUG("\n");
+
+	while ((gpuobj = dev_priv->gpuobj_all)) {
+		DRM_ERROR("gpuobj %p still exists at takedown, refs=%d\n",
+			  gpuobj, gpuobj->refcount);
+		gpuobj->refcount = 0;
+		nouveau_gpuobj_del(dev, &gpuobj);
+	}
+}
+
+int nouveau_gpuobj_del(drm_device_t *dev, nouveau_gpuobj_t **pgpuobj)
+{
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	nouveau_gpuobj_t *gpuobj;
+
+	DRM_DEBUG("gpuobj %p\n", pgpuobj ? *pgpuobj : NULL);
+
+	if (!dev_priv || !pgpuobj || !(*pgpuobj))
+		return DRM_ERR(EINVAL);
+	gpuobj = *pgpuobj;
+
+	if (gpuobj->refcount != 0) {
+		DRM_ERROR("gpuobj refcount is %d\n", gpuobj->refcount);
+		return DRM_ERR(EINVAL);
+	}
+
+	if (gpuobj->im_pramin) {
+		if (gpuobj->flags & NVOBJ_FLAG_FAKE)
+			drm_free(gpuobj->im_pramin, sizeof(*gpuobj->im_pramin),
+				 DRM_MEM_DRIVER);
+		else
+			nouveau_mem_free_block(gpuobj->im_pramin);
+	}
+
+	if (gpuobj->im_backing)
+		nouveau_mem_free(dev, gpuobj->im_backing);
+
+	if (gpuobj->next)
+		gpuobj->next->prev = gpuobj->prev;
+	if (gpuobj->prev)
+		gpuobj->prev->next = gpuobj->next;
+	else
+		dev_priv->gpuobj_all = gpuobj->next;
+
+	*pgpuobj = NULL;
+	drm_free(gpuobj, sizeof(*gpuobj), DRM_MEM_DRIVER);
+	return 0;
+}
+
+static int
+nouveau_gpuobj_instance_get(drm_device_t *dev, int channel,
+			    nouveau_gpuobj_t *gpuobj, uint32_t *inst)
+{
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	nouveau_gpuobj_t *cpramin;
+
+	if ((channel > 0) && gpuobj->im_channel != channel) {
+		DRM_ERROR("Channel mismatch: obj %d, ref %d\n",
+			  gpuobj->im_channel, channel);
+		return DRM_ERR(EINVAL);
+	}
+
+	/* <NV50 use PRAMIN address everywhere */
+	if (dev_priv->card_type < NV_50) {
+		*inst = gpuobj->im_pramin->start;
+		return 0;
+	}
+
+	/* NV50 channel-local instance */
+	if (channel > 0) {
+		cpramin = dev_priv->fifos[channel].ramin->gpuobj;
+		*inst = gpuobj->im_pramin->start - cpramin->im_pramin->start;
+		return 0;
+	}
+
+	/* NV50 global (VRAM) instance */
+	if (gpuobj->im_channel < 0) {
+		/* ...from global heap */
+		if (!gpuobj->im_backing) {
+			DRM_ERROR("AII, no VRAM backing gpuobj\n");
+			return DRM_ERR(EINVAL);
+		}
+		*inst = gpuobj->im_backing->start - dev_priv->fb_phys;
+		return 0;
+	} else {
+		/* ...from local heap */
+		cpramin = dev_priv->fifos[gpuobj->im_channel].ramin->gpuobj;
+		*inst = (cpramin->im_backing->start - dev_priv->fb_phys) +
+			(gpuobj->im_pramin->start - cpramin->im_pramin->start);
+		return 0;
+	}
+
+	return DRM_ERR(EINVAL);
+}
+
+int
+nouveau_gpuobj_ref_add(drm_device_t *dev, int channel, uint32_t handle,
+		       nouveau_gpuobj_t *gpuobj, nouveau_gpuobj_ref_t **ref_ret)
+{
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	struct nouveau_fifo *chan = NULL;
+	nouveau_gpuobj_ref_t *ref;
+	uint32_t instance;
+	int ret;
+
+	DRM_DEBUG("ch%d h=0x%08x gpuobj=%p\n", channel, handle, gpuobj);
+
+	if (!dev_priv || !gpuobj || (ref_ret && *ref_ret != NULL))
+		return DRM_ERR(EINVAL);
+
+	if (channel >= 0) {
+		if (channel > nouveau_fifo_number(dev))
+			return DRM_ERR(EINVAL);
+		chan = &dev_priv->fifos[channel];
+	} else
+	if (!ref_ret)
+		return DRM_ERR(EINVAL);
+
+	ret = nouveau_gpuobj_instance_get(dev, channel, gpuobj, &instance);
+	if (ret)
+		return ret;
+
+	ref = drm_calloc(1, sizeof(*ref), DRM_MEM_DRIVER);
+	if (!ref)
+		return DRM_ERR(ENOMEM);
+	ref->gpuobj   = gpuobj;
+	ref->channel  = channel;
+	ref->instance = instance;
+
+	if (!ref_ret) {
+		ref->handle = handle;
+
+		ret = nouveau_ramht_insert(dev, ref);
+		if (ret) {
+			drm_free(ref, sizeof(*ref), DRM_MEM_DRIVER);
+			return ret;
+		}
+
+		ref->next = chan->ramht_refs;
+		chan->ramht_refs = ref;
+	} else {
+		ref->handle = ~0;
+		*ref_ret = ref;
+	}
+
+	ref->gpuobj->refcount++;
+	return 0;
+}
+
+int nouveau_gpuobj_ref_del(drm_device_t *dev, nouveau_gpuobj_ref_t **pref)
+{
+	nouveau_gpuobj_ref_t *ref;
+
+	DRM_DEBUG("ref %p\n", pref ? *pref : NULL);
+
+	if (!dev || !pref || *pref == NULL)
+		return DRM_ERR(EINVAL);
+	ref = *pref;
+
+	if (ref->handle != ~0)
+		nouveau_ramht_remove(dev, ref);
+
+	if (ref->gpuobj) {
+		ref->gpuobj->refcount--;
+
+		if (ref->gpuobj->refcount == 0) {
+			if (!(ref->gpuobj->flags & NVOBJ_FLAG_ALLOW_NO_REFS))
+				nouveau_gpuobj_del(dev, &ref->gpuobj);
+		}
+	}
+
+	*pref = NULL;
+	drm_free(ref, sizeof(ref), DRM_MEM_DRIVER);
+	return 0;
+}
+
+int
+nouveau_gpuobj_new_ref(drm_device_t *dev, int oc, int rc, uint32_t handle,
+		       int size, int align, uint32_t flags,
+		       nouveau_gpuobj_ref_t **ref)
+{
+	nouveau_gpuobj_t *gpuobj = NULL;
+	int ret;
+
+	if ((ret = nouveau_gpuobj_new(dev, oc, size, align, flags, &gpuobj)))
+		return ret;
+
+	if ((ret = nouveau_gpuobj_ref_add(dev, rc, handle, gpuobj, ref))) {
+		nouveau_gpuobj_del(dev, &gpuobj);
+		return ret;
+	}
+
+	return 0;
+}
+
+int
+nouveau_gpuobj_new_fake(drm_device_t *dev, uint32_t offset, uint32_t size,
+			uint32_t flags, nouveau_gpuobj_t **pgpuobj,
+			nouveau_gpuobj_ref_t **pref)
+{
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	nouveau_gpuobj_t *gpuobj = NULL;
 	int i;
 
-	/* Unbind object from channel */
-	nouveau_object_unlink(dev, obj);
+	DRM_DEBUG("offset=0x%08x size=0x%08x flags=0x%08x\n",
+		  offset, size, flags);
+
+	gpuobj = drm_calloc(1, sizeof(*gpuobj), DRM_MEM_DRIVER);
+	if (!gpuobj)
+		return DRM_ERR(ENOMEM);
+	DRM_DEBUG("gpuobj %p\n", gpuobj);
+	gpuobj->im_channel = -1;
+	gpuobj->flags      = flags | NVOBJ_FLAG_FAKE;
+
+	gpuobj->im_pramin = drm_calloc(1, sizeof(struct mem_block),
+				       DRM_MEM_DRIVER);
+	if (!gpuobj->im_pramin) {
+		nouveau_gpuobj_del(dev, &gpuobj);
+		return DRM_ERR(ENOMEM);
+	}
+	gpuobj->im_pramin->start = offset;
+	gpuobj->im_pramin->size  = size;
+
+	if (gpuobj->flags & NVOBJ_FLAG_ZERO_ALLOC) {
+		for (i = 0; i < gpuobj->im_pramin->size; i += 4)
+			INSTANCE_WR(gpuobj, i/4, 0);
+	}
 
-	/* Clean RAMIN entry */
-	DRM_DEBUG("Instance entry for 0x%08x"
-		"(engine %d, class 0x%x) before destroy:\n",
-		obj->handle, obj->engine, obj->class);
-	for (i=0; i<(obj->instance->size/4); i++) {
-		DRM_DEBUG("  +0x%02x: 0x%08x\n", (i*4),
-			INSTANCE_RD(obj->instance, i));
-		INSTANCE_WR(obj->instance, i, 0x00000000);
+	if (pref) {
+		if ((i = nouveau_gpuobj_ref_add(dev, -1, 0, gpuobj, pref))) {
+			nouveau_gpuobj_del(dev, &gpuobj);
+			return i;
+		}
 	}
 
-	/* Free RAMIN */
-	nouveau_instmem_free(dev, obj->instance);
+	if (pgpuobj)
+		*pgpuobj = gpuobj;
+	return 0;
 }
 
 /*
@@ -317,64 +537,70 @@ nouveau_object_instance_free(drm_device_t *dev, struct nouveau_object *obj)
    to it that can be used to set up context objects.
 */
 
-struct nouveau_object *
-nouveau_object_dma_create(drm_device_t* dev, int channel, int class,
-			  uint32_t offset, uint32_t size,
-			  int access, int target)
+static int
+nouveau_gpuobj_class_instmem_size(drm_device_t *dev, int class)
 {
-	drm_nouveau_private_t *dev_priv=dev->dev_private;
-	struct   nouveau_object *obj;
-	uint32_t frame, adjust;
-	uint32_t pte_flags = 0;
-
-	DRM_DEBUG("offset:0x%08x, size:0x%08x, target:%d, access:%d\n",
-			offset, size, target, access);
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
 
-	switch (target) {
-	case NV_DMA_TARGET_AGP:
-		offset += dev_priv->agp_phys;
-		break;
-	default:
-		break;
-	}
+	/*XXX: dodgy hack for now */
+	if (dev_priv->card_type >= NV_50)
+		return 24;
+	if (dev_priv->card_type >= NV_40)
+		return 32;
+	return 16;
+}
 
-	switch (access) {
-	case NV_DMA_ACCESS_RO:
-		break;
-	case NV_DMA_ACCESS_WO:
-	case NV_DMA_ACCESS_RW:
-		pte_flags  |= (1 << 1);
-		break;
-	default:
-		DRM_ERROR("invalid access mode=%d\n", access);
-		return NULL;
-	}
+int
+nouveau_gpuobj_dma_new(drm_device_t *dev, int channel, int class,
+		       uint64_t offset, uint64_t size, int access, int target,
+		       nouveau_gpuobj_t **gpuobj)
+{
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	int ret;
 
-	frame  = offset & ~0x00000FFF;
-	adjust = offset &  0x00000FFF;
+	DRM_DEBUG("ch%d class=0x%04x offset=0x%llx size=0x%llx\n",
+		  channel, class, offset, size);
+	DRM_DEBUG("access=%d target=%d\n", access, target);
 
-	obj = nouveau_object_instance_alloc(dev, channel);
-	if (!obj) {
-		DRM_ERROR("couldn't allocate DMA object\n");
-		return obj;
+	ret = nouveau_gpuobj_new(dev, channel,
+				 nouveau_gpuobj_class_instmem_size(dev, class),
+				 16,
+				 NVOBJ_FLAG_ZERO_ALLOC | NVOBJ_FLAG_ZERO_FREE,
+				 gpuobj);
+	if (ret) {
+		DRM_ERROR("Error creating gpuobj: %d\n", ret);
+		return ret;
 	}
 
-	obj->engine = 0;
-	obj->class  = class;
-
-	INSTANCE_WR(obj->instance, 0, ((1<<12) | (1<<13) |
-				(adjust << 20) |
-				(access << 14) |
-				(target << 16) |
-				class));
-	INSTANCE_WR(obj->instance, 1, size-1);
-	INSTANCE_WR(obj->instance, 2, frame | pte_flags);
-	INSTANCE_WR(obj->instance, 3, frame | pte_flags);
+	if (dev_priv->card_type < NV_50) {
+		uint32_t frame, adjust, pte_flags = 0;
+
+		if (target == NV_DMA_TARGET_AGP)
+			offset += dev_priv->agp_phys;
+		if (access != NV_DMA_ACCESS_RO)
+			pte_flags |= (1<<1);
+		frame  = offset & ~0x00000fff;
+		adjust = offset &  0x00000fff;
+
+		INSTANCE_WR(*gpuobj, 0, ((1<<12) | (1<<13) |
+					 (adjust << 20) |
+					 (access << 14) |
+					 (target << 16) |
+					  class));
+		INSTANCE_WR(*gpuobj, 1, size - 1);
+		INSTANCE_WR(*gpuobj, 2, frame | pte_flags);
+		INSTANCE_WR(*gpuobj, 3, frame | pte_flags);
+	} else {
+		nouveau_gpuobj_del(dev, gpuobj);
+		DRM_ERROR("stub\n");
+		return DRM_ERR(EINVAL);
+	}
 
-	return obj;
+	(*gpuobj)->engine = NVOBJ_ENGINE_SW;
+	(*gpuobj)->class  = class;
+	return 0;
 }
 
-
 /* Context objects in the instance RAM have the following structure.
  * On NV40 they are 32 byte long, on NV30 and smaller 16 bytes.
 
@@ -426,89 +652,142 @@ nouveau_object_dma_create(drm_device_t* dev, int channel, int class,
    entry[5]:
    set to 0?
 */
-struct nouveau_object *
-nouveau_object_gr_create(drm_device_t* dev, int channel, int class)
+int
+nouveau_gpuobj_gr_new(drm_device_t *dev, int channel, int class,
+		      nouveau_gpuobj_t **gpuobj)
 {
-	drm_nouveau_private_t *dev_priv=dev->dev_private;
-	struct   nouveau_object *obj;
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	int ret;
 
-	DRM_DEBUG("class=%x\n",	class);
+	DRM_DEBUG("ch%d class=0x%04x\n", channel, class);
 
-	obj = nouveau_object_instance_alloc(dev, channel);
-	if (!obj) {
-		DRM_ERROR("couldn't allocate context object\n");
-		return obj;
+	ret = nouveau_gpuobj_new(dev, channel,
+				 nouveau_gpuobj_class_instmem_size(dev, class),
+				 16,
+				 NVOBJ_FLAG_ZERO_ALLOC | NVOBJ_FLAG_ZERO_FREE,
+				 gpuobj);
+	if (ret) {
+		DRM_ERROR("Error creating gpuobj: %d\n", ret);
+		return ret;
 	}
 
-	obj->engine = 1;
-	obj->class  = class;
+	if (dev_priv->card_type >= NV_50) {
+		nouveau_gpuobj_del(dev, gpuobj);
+		DRM_ERROR("stub!\n");
+		return DRM_ERR(EINVAL);
+	}
 
 	switch (class) {
 	case NV_CLASS_NULL:
-		INSTANCE_WR(obj->instance, 0, 0x00001030);
-		INSTANCE_WR(obj->instance, 1, 0xFFFFFFFF);
-		INSTANCE_WR(obj->instance, 2, 0x00000000);
-		INSTANCE_WR(obj->instance, 2, 0x00000000);
+		INSTANCE_WR(*gpuobj, 0, 0x00001030);
+		INSTANCE_WR(*gpuobj, 1, 0xFFFFFFFF);
 		break;
 	default:
 		if (dev_priv->card_type >= NV_40) {
-			INSTANCE_WR(obj->instance, 0,  obj->class);
-			INSTANCE_WR(obj->instance, 1, 0x00000000);
+			INSTANCE_WR(*gpuobj, 0, class);
 #ifdef __BIG_ENDIAN
-			INSTANCE_WR(obj->instance, 2, 0x01000000);
-#else
-			INSTANCE_WR(obj->instance, 2, 0x00000000);
+			INSTANCE_WR(*gpuobj, 2, 0x01000000);
 #endif
-			INSTANCE_WR(obj->instance, 3, 0x00000000);
-			INSTANCE_WR(obj->instance, 4, 0x00000000);
-			INSTANCE_WR(obj->instance, 5, 0x00000000);
-			INSTANCE_WR(obj->instance, 6, 0x00000000);
-			INSTANCE_WR(obj->instance, 7, 0x00000000);
 		} else {
 #ifdef __BIG_ENDIAN
-			INSTANCE_WR(obj->instance, 0, obj->class | 0x00080000);
+			INSTANCE_WR(*gpuobj, 0, class | 0x00080000);
 #else
-			INSTANCE_WR(obj->instance, 0, obj->class);
+			INSTANCE_WR(*gpuobj, 0, class);
 #endif
-			INSTANCE_WR(obj->instance, 1, 0x00000000);
-			INSTANCE_WR(obj->instance, 2, 0x00000000);
-			INSTANCE_WR(obj->instance, 3, 0x00000000);
 		}
 	}
 
-	return obj;
+	(*gpuobj)->engine = NVOBJ_ENGINE_GR;
+	(*gpuobj)->class  = class;
+	return 0;
 }
 
-void
-nouveau_object_free(drm_device_t *dev, struct nouveau_object *obj)
+static int
+nouveau_gpuobj_channel_init_pramin(drm_device_t *dev, int channel)
 {
-	nouveau_object_instance_free(dev, obj);
-	if (obj->handle != ~0)
-		nouveau_ramht_remove(dev, obj);
-	drm_free(obj, sizeof(struct nouveau_object), DRM_MEM_DRIVER);
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	nouveau_gpuobj_t *pramin = NULL;
+	int size, base, ret;
+
+	DRM_DEBUG("ch%d\n", channel);
+
+	/* Base amount for object storage (4KiB enough?) */
+	size = 0x1000;
+	base = 0;
+
+	/* PGRAPH context */
+
+	if (dev_priv->card_type == NV_50) {
+		/* RAMHT, RAMFC, PD, funny header thingo */
+	}
+
+	DRM_DEBUG("ch%d PRAMIN size: 0x%08x bytes, base alloc=0x%08x\n",
+		  channel, size, base);
+	ret = nouveau_gpuobj_new_ref(dev, -1, -1, 0, size, 0x1000, 0,
+				     &chan->ramin);
+	if (ret) {
+		DRM_ERROR("Error allocating channel PRAMIN: %d\n", ret);
+		return ret;
+	}
+	pramin = chan->ramin->gpuobj;
+
+	ret = nouveau_mem_init_heap(&chan->ramin_heap,
+				    pramin->im_pramin->start + base, size);
+	if (ret) {
+		DRM_ERROR("Error creating PRAMIN heap: %d\n", ret);
+		nouveau_gpuobj_ref_del(dev, &chan->ramin);
+		return ret;
+	}
+
+	return 0;
 }
 
 int
-nouveau_object_init_channel(drm_device_t *dev, int channel,
-			    uint32_t vram_handle,
-			    uint32_t tt_handle)
+nouveau_gpuobj_channel_init(drm_device_t *dev, int channel,
+			    uint32_t vram_h, uint32_t tt_h)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	struct nouveau_object *gpuobj;
+	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	nouveau_gpuobj_t *vram = NULL, *tt = NULL;
 	int ret;
 
+	DRM_DEBUG("ch%d vram=0x%08x tt=0x%08x\n", channel, vram_h, tt_h);
+
+	/* Reserve a block of PRAMIN for the channel
+	 *XXX: maybe on <NV50 too at some point
+	 */
+	if (0 || dev_priv->card_type == NV_50) {
+		ret = nouveau_gpuobj_channel_init_pramin(dev, channel);
+		if (ret)
+			return ret;
+	}
+
+	/* RAMHT */
+	if (dev_priv->card_type < NV_50) {
+		ret = nouveau_gpuobj_ref_add(dev, -1, 0, dev_priv->ramht,
+					     &chan->ramht);
+		if (ret)
+			return ret;
+	} else {
+		ret = nouveau_gpuobj_new_ref(dev, channel, channel, 0,
+					     0x8000, 16,
+					     NVOBJ_FLAG_ZERO_ALLOC,
+					     &chan->ramht);
+		if (ret)
+			return ret;
+	}
+
 	/* VRAM ctxdma */
-	gpuobj = nouveau_object_dma_create(dev, channel, NV_CLASS_DMA_IN_MEMORY,
-					   0, dev_priv->fb_available_size,
-					   NV_DMA_ACCESS_RW,
-					   NV_DMA_TARGET_VIDMEM);
-	if (!gpuobj) {
-		DRM_ERROR("Error creating VRAM ctxdma: %d\n", DRM_ERR(ENOMEM));
-		return DRM_ERR(ENOMEM);
+	if ((ret = nouveau_gpuobj_dma_new(dev, channel, NV_CLASS_DMA_IN_MEMORY,
+					  0, dev_priv->fb_available_size,
+					  NV_DMA_ACCESS_RW,
+					  NV_DMA_TARGET_VIDMEM, &vram))) {
+		DRM_ERROR("Error creating VRAM ctxdma: %d\n", ret);
+		return ret;
 	}
 
-	ret = nouveau_ramht_insert(dev, channel, vram_handle, gpuobj);
-	if (ret) {
+	if ((ret = nouveau_gpuobj_ref_add(dev, channel, vram_h, vram, NULL))) {
 		DRM_ERROR("Error referencing VRAM ctxdma: %d\n", ret);
 		return ret;
 	}
@@ -518,17 +797,15 @@ nouveau_object_init_channel(drm_device_t *dev, int channel,
 		return 0;
 
 	/* GART ctxdma */
-	gpuobj = nouveau_object_dma_create(dev, channel, NV_CLASS_DMA_IN_MEMORY,
-					   0, dev_priv->agp_available_size,
-					   NV_DMA_ACCESS_RW,
-					   NV_DMA_TARGET_AGP);
-	if (!gpuobj) {
-		DRM_ERROR("Error creating TT ctxdma: %d\n", DRM_ERR(ENOMEM));
-		return DRM_ERR(ENOMEM);
+	if ((ret = nouveau_gpuobj_dma_new(dev, channel, NV_CLASS_DMA_IN_MEMORY,
+					  0, dev_priv->agp_available_size,
+					  NV_DMA_ACCESS_RW, NV_DMA_TARGET_AGP,
+					  &tt))) {
+		DRM_ERROR("Error creating TT ctxdma: %d\n", ret);
+		return ret;
 	}
 
-	ret = nouveau_ramht_insert(dev, channel, tt_handle, gpuobj);
-	if (ret) {
+	if ((ret = nouveau_gpuobj_ref_add(dev, channel, tt_h, tt, NULL))) {
 		DRM_ERROR("Error referencing TT ctxdma: %d\n", ret);
 		return ret;
 	}
@@ -536,20 +813,34 @@ nouveau_object_init_channel(drm_device_t *dev, int channel,
 	return 0;
 }
 
-void nouveau_object_cleanup(drm_device_t *dev, int channel)
+void
+nouveau_gpuobj_channel_takedown(drm_device_t *dev, int channel)
 {
-	drm_nouveau_private_t *dev_priv=dev->dev_private;
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
+	nouveau_gpuobj_ref_t *ref;
+
+	DRM_DEBUG("ch%d\n", channel);
 
-	while (dev_priv->fifos[channel].objs) {
-		nouveau_object_free(dev, dev_priv->fifos[channel].objs);
+	while ((ref = chan->ramht_refs)) {
+		chan->ramht_refs = ref->next;
+		nouveau_gpuobj_ref_del(dev, &ref);
 	}
+	nouveau_gpuobj_ref_del(dev, &chan->ramht);
+
+	if (chan->ramin_heap)
+		nouveau_mem_takedown(&chan->ramin_heap);
+	if (chan->ramin)
+		nouveau_gpuobj_ref_del(dev, &chan->ramin);
+
 }
 
 int nouveau_ioctl_grobj_alloc(DRM_IOCTL_ARGS)
 {
 	DRM_DEVICE;
 	drm_nouveau_grobj_alloc_t init;
-	struct nouveau_object *obj;
+	nouveau_gpuobj_t *gr = NULL;
+	int ret;
 
 	DRM_COPY_FROM_USER_IOCTL(init, (drm_nouveau_grobj_alloc_t __user *)
 		data, sizeof(init));
@@ -561,20 +852,20 @@ int nouveau_ioctl_grobj_alloc(DRM_IOCTL_ARGS)
 	}
 
 	//FIXME: check args, only allow trusted objects to be created
+	//FIXME: check for pre-existing handle
 
-	if (nouveau_object_handle_find(dev, init.channel, init.handle)) {
-		DRM_ERROR("Channel %d: handle 0x%08x already exists\n",
-			init.channel, init.handle);
-		return DRM_ERR(EINVAL);
+	if ((ret = nouveau_gpuobj_gr_new(dev, init.channel, init.class, &gr))) {
+		DRM_ERROR("Error creating gr object: %d (%d/0x%08x)\n",
+			  ret, init.channel, init.handle);
+		return ret;
 	}
 
-	obj = nouveau_object_gr_create(dev, init.channel, init.class);
-	if (!obj)
-		return DRM_ERR(ENOMEM);
-
-	if (nouveau_ramht_insert(dev, init.channel, init.handle, obj)) {
-		nouveau_object_free(dev, obj);
-		return DRM_ERR(ENOMEM);
+	if ((ret = nouveau_gpuobj_ref_add(dev, init.channel, init.handle,
+					  gr, NULL))) {
+		DRM_ERROR("Error referencing gr object: %d (%d/0x%08x\n)",
+			  ret, init.channel, init.handle);
+		nouveau_gpuobj_del(dev, &gr);
+		return ret;
 	}
 
 	return 0;
diff --git a/shared-core/nouveau_state.c b/shared-core/nouveau_state.c
index fa773d28..13bc930a 100644
--- a/shared-core/nouveau_state.c
+++ b/shared-core/nouveau_state.c
@@ -283,6 +283,20 @@ static int nouveau_card_init(drm_device_t *dev)
 	return 0;
 }
 
+static void nouveau_card_takedown(drm_device_t *dev)
+{
+	drm_nouveau_private_t *dev_priv = dev->dev_private;
+	nouveau_engine_func_t *engine = &dev_priv->Engine;
+
+	engine->fifo.takedown(dev);
+	engine->graph.takedown(dev);
+	engine->fb.takedown(dev);
+	engine->timer.takedown(dev);
+	engine->mc.takedown(dev);
+	nouveau_gpuobj_takedown(dev);
+	nouveau_mem_close(dev);
+}
+
 /* here a client dies, release the stuff that was allocated for its filp */
 void nouveau_preclose(drm_device_t * dev, DRMFILE filp)
 {
@@ -314,11 +328,10 @@ int nouveau_load(struct drm_device *dev, unsigned long flags)
 	if (flags==NV_UNKNOWN)
 		return DRM_ERR(EINVAL);
 
-	dev_priv = drm_alloc(sizeof(drm_nouveau_private_t), DRM_MEM_DRIVER);
+	dev_priv = drm_calloc(1, sizeof(*dev_priv), DRM_MEM_DRIVER);
 	if (!dev_priv)                   
 		return DRM_ERR(ENOMEM);
 
-	memset(dev_priv, 0, sizeof(drm_nouveau_private_t));
 	dev_priv->card_type=flags&NOUVEAU_FAMILY;
 	dev_priv->flags=flags&NOUVEAU_FLAGS;
 
@@ -338,6 +351,9 @@ int nouveau_load(struct drm_device *dev, unsigned long flags)
 void nouveau_lastclose(struct drm_device *dev)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
+
+	nouveau_card_takedown(dev);
+
 	if(dev_priv->fb_mtrr>0)
 	{
 		drm_mtrr_del(dev_priv->fb_mtrr, drm_get_resource_start(dev, 1),nouveau_mem_fb_amount(dev), DRM_MTRR_WC);
diff --git a/shared-core/nv04_fifo.c b/shared-core/nv04_fifo.c
index bfae432e..b84f74c1 100644
--- a/shared-core/nv04_fifo.c
+++ b/shared-core/nv04_fifo.c
@@ -28,8 +28,10 @@
 #include "drm.h"
 #include "nouveau_drv.h"
 
-#define RAMFC_WR(offset, val) NV_WI32(fifoctx + NV04_RAMFC_##offset, (val))
-#define RAMFC_RD(offset)      NV_RI32(fifoctx + NV04_RAMFC_##offset)
+#define RAMFC_WR(offset,val) INSTANCE_WR(chan->ramfc->gpuobj, \
+					 NV04_RAMFC_##offset/4, (val))
+#define RAMFC_RD(offset)     INSTANCE_RD(chan->ramfc->gpuobj, \
+					 NV04_RAMFC_##offset/4)
 #define NV04_RAMFC(c) (dev_priv->ramfc_offset + ((c) * NV04_RAMFC__SIZE))
 #define NV04_RAMFC__SIZE 32
 
@@ -38,21 +40,19 @@ nv04_fifo_create_context(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
 	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
-	struct nouveau_object *pb = chan->cmdbuf_obj;
-	uint32_t fifoctx = NV04_RAMFC(channel);
-	int i;
+	int ret;
 
-	if (!pb || !pb->instance)
-		return DRM_ERR(EINVAL);
+	if ((ret = nouveau_gpuobj_new_fake(dev, NV04_RAMFC(channel),
+						NV04_RAMFC__SIZE,
+						NVOBJ_FLAG_ZERO_ALLOC |
+						NVOBJ_FLAG_ZERO_FREE,
+						NULL, &chan->ramfc)))
+		return ret;
 
-	/* Clear RAMFC */
-	for (i=0; i<NV04_RAMFC__SIZE; i+=4)
-		NV_WI32(fifoctx + i, 0);
-	
 	/* Setup initial state */
 	RAMFC_WR(DMA_PUT, chan->pushbuf_base);
 	RAMFC_WR(DMA_GET, chan->pushbuf_base);
-	RAMFC_WR(DMA_INSTANCE, nouveau_chip_instance_get(dev, pb->instance));
+	RAMFC_WR(DMA_INSTANCE, chan->pushbuf->instance >> 4);
 	RAMFC_WR(DMA_FETCH, (NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
 			     NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
 			     NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8 |
@@ -67,18 +67,17 @@ void
 nv04_fifo_destroy_context(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	uint32_t fifoctx = NV04_RAMFC(channel);
-	int i;
+	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
 
-	for (i=0; i<NV04_RAMFC__SIZE; i+=4)
-		NV_WI32(fifoctx + i, 0);
+	if (chan->ramfc)
+		nouveau_gpuobj_ref_del(dev, &chan->ramfc);
 }
 
 int
 nv04_fifo_load_context(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	uint32_t fifoctx = NV04_RAMFC(channel);
+	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
 	uint32_t tmp;
 
 	NV_WRITE(NV03_PFIFO_CACHE1_PUSH1, (1<<8) | channel);
@@ -106,7 +105,7 @@ int
 nv04_fifo_save_context(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	uint32_t fifoctx = NV04_RAMFC(channel);
+	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
 	uint32_t tmp;
 
 	RAMFC_WR(DMA_PUT, NV04_PFIFO_CACHE1_DMA_PUT);
diff --git a/shared-core/nv10_fifo.c b/shared-core/nv10_fifo.c
index b84971de..07ec4635 100644
--- a/shared-core/nv10_fifo.c
+++ b/shared-core/nv10_fifo.c
@@ -28,8 +28,11 @@
 #include "drm.h"
 #include "nouveau_drv.h"
 
-#define RAMFC_WR(offset, val)	NV_WI32(fifoctx + NV10_RAMFC_##offset, (val))
-#define RAMFC_RD(offset)	NV_RI32(fifoctx + NV10_RAMFC_##offset)
+
+#define RAMFC_WR(offset,val) INSTANCE_WR(chan->ramfc->gpuobj, \
+					 NV10_RAMFC_##offset/4, (val))
+#define RAMFC_RD(offset)     INSTANCE_RD(chan->ramfc->gpuobj, \
+					 NV10_RAMFC_##offset/4)
 #define NV10_RAMFC(c) (dev_priv->ramfc_offset + NV10_RAMFC__SIZE)
 #define NV10_RAMFC__SIZE ((dev_priv->chipset) >= 0x17 ? 64 : 32)
 
@@ -38,20 +41,21 @@ nv10_fifo_create_context(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
 	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
-	uint32_t fifoctx = NV10_RAMFC(channel), pushbuf;
-	int i;
-
-	pushbuf = nouveau_chip_instance_get(dev, chan->cmdbuf_obj->instance);
+	int ret;
 
-	for (i=0; i<NV10_RAMFC__SIZE; i+=4)
-		NV_WI32(fifoctx + i, 0);
+	if ((ret = nouveau_gpuobj_new_fake(dev, NV10_RAMFC(channel),
+						NV10_RAMFC__SIZE,
+						NVOBJ_FLAG_ZERO_ALLOC |
+						NVOBJ_FLAG_ZERO_FREE,
+						NULL, &chan->ramfc)))
+		return ret;
 
 	/* Fill entries that are seen filled in dumps of nvidia driver just
 	 * after channel's is put into DMA mode
 	 */
 	RAMFC_WR(DMA_PUT       , chan->pushbuf_base);
 	RAMFC_WR(DMA_GET       , chan->pushbuf_base);
-	RAMFC_WR(DMA_INSTANCE  , pushbuf);
+	RAMFC_WR(DMA_INSTANCE  , chan->pushbuf->instance >> 4);
 	RAMFC_WR(DMA_FETCH     , NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
 				 NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
 				 NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8 |
@@ -67,18 +71,17 @@ void
 nv10_fifo_destroy_context(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	uint32_t fifoctx = NV10_RAMFC(channel);
-	int i;
+	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
 
-	for (i=0; i<NV10_RAMFC__SIZE; i+=4)
-		NV_WI32(fifoctx + i, 0);
+	if (chan->ramfc)
+		nouveau_gpuobj_ref_del(dev, &chan->ramfc);
 }
 
 int
 nv10_fifo_load_context(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	uint32_t fifoctx = NV10_RAMFC(channel);
+	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
 	uint32_t tmp;
 
 	NV_WRITE(NV03_PFIFO_CACHE1_PUSH1            , 0x00000100 | channel);
@@ -120,7 +123,7 @@ int
 nv10_fifo_save_context(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	uint32_t fifoctx = NV10_RAMFC(channel);
+	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
 	uint32_t tmp;
 
 	RAMFC_WR(DMA_PUT          , NV_READ(NV04_PFIFO_CACHE1_DMA_PUT));
diff --git a/shared-core/nv20_graph.c b/shared-core/nv20_graph.c
index 1b8a6727..13271051 100644
--- a/shared-core/nv20_graph.c
+++ b/shared-core/nv20_graph.c
@@ -34,19 +34,18 @@ int nv20_graph_create_context(drm_device_t *dev, int channel) {
 		(drm_nouveau_private_t *)dev->dev_private;
 	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
 	unsigned int ctx_size = NV20_GRCTX_SIZE;
-	int i;
+	int ret;
 
-	/* Alloc and clear RAMIN to store the context */
-	chan->ramin_grctx = nouveau_instmem_alloc(dev, ctx_size, 4);
-	if (!chan->ramin_grctx)
-		return DRM_ERR(ENOMEM);
-	for (i=0; i<ctx_size; i+=4)
-		INSTANCE_WR(chan->ramin_grctx, i/4, 0x00000000);
+	if ((ret = nouveau_gpuobj_new_ref(dev, channel, -1, 0, ctx_size, 16,
+					  NVOBJ_FLAG_ZERO_ALLOC,
+					  &chan->ramin_grctx)))
+		return ret;
 
 	/* Initialise default context values */
-	INSTANCE_WR(chan->ramin_grctx, 10, channel << 24); /* CTX_USER */
+	INSTANCE_WR(chan->ramin_grctx->gpuobj, 10, channel<<24); /* CTX_USER */
 
-	INSTANCE_WR(dev_priv->ctx_table, channel, nouveau_chip_instance_get(dev, chan->ramin_grctx));
+	INSTANCE_WR(dev_priv->ctx_table->gpuobj, channel,
+		    chan->ramin_grctx->instance >> 4);
 	return 0;
 }
 
@@ -54,12 +53,10 @@ void nv20_graph_destroy_context(drm_device_t *dev, int channel) {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
 	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
 
-	if (chan->ramin_grctx) {
-		nouveau_instmem_free(dev, chan->ramin_grctx);
-		chan->ramin_grctx = NULL;
-	}
+	if (chan->ramin_grctx)
+		nouveau_gpuobj_ref_del(dev, &chan->ramin_grctx);
 
-	INSTANCE_WR(dev_priv->ctx_table, channel, 0);
+	INSTANCE_WR(dev_priv->ctx_table->gpuobj, channel, 0);
 }
 
 static void nv20_graph_rdi(drm_device_t *dev) {
@@ -79,13 +76,14 @@ static void nv20_graph_rdi(drm_device_t *dev) {
 int nv20_graph_save_context(drm_device_t *dev, int channel) {
 	drm_nouveau_private_t *dev_priv =
 		(drm_nouveau_private_t *)dev->dev_private;
+	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
 	uint32_t instance;
 
-	instance = INSTANCE_RD(dev_priv->ctx_table, channel);
+	instance = INSTANCE_RD(dev_priv->ctx_table->gpuobj, channel);
 	if (!instance) {
 		return DRM_ERR(EINVAL);
 	}
-	if (instance != nouveau_chip_instance_get(dev, dev_priv->fifos[channel].ramin_grctx))
+	if (instance != (chan->ramin_grctx->instance >> 4))
 		DRM_ERROR("nv20_graph_save_context : bad instance\n");
 
 	NV_WRITE(NV10_PGRAPH_CHANNEL_CTX_SIZE, instance);
@@ -99,13 +97,14 @@ int nv20_graph_save_context(drm_device_t *dev, int channel) {
 int nv20_graph_load_context(drm_device_t *dev, int channel) {
 	drm_nouveau_private_t *dev_priv =
 		(drm_nouveau_private_t *)dev->dev_private;
+	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
 	uint32_t instance;
 
-	instance = INSTANCE_RD(dev_priv->ctx_table, channel);
+	instance = INSTANCE_RD(dev_priv->ctx_table->gpuobj, channel);
 	if (!instance) {
 		return DRM_ERR(EINVAL);
 	}
-	if (instance != nouveau_chip_instance_get(dev, dev_priv->fifos[channel].ramin_grctx))
+	if (instance != (chan->ramin_grctx->instance >> 4))
 		DRM_ERROR("nv20_graph_load_context_current : bad instance\n");
 
 	NV_WRITE(NV10_PGRAPH_CTX_USER, channel << 24);
@@ -148,8 +147,8 @@ void nouveau_nv20_context_switch(drm_device_t *dev)
 int nv20_graph_init(drm_device_t *dev) {
 	drm_nouveau_private_t *dev_priv =
 		(drm_nouveau_private_t *)dev->dev_private;
-	int i;
 	uint32_t tmp, vramsz;
+	int ret, i;
 
 	NV_WRITE(NV03_PMC_ENABLE, NV_READ(NV03_PMC_ENABLE) &
 			~NV_PMC_ENABLE_PGRAPH);
@@ -158,14 +157,14 @@ int nv20_graph_init(drm_device_t *dev) {
 
 	/* Create Context Pointer Table */
 	dev_priv->ctx_table_size = 32 * 4;
-	dev_priv->ctx_table = nouveau_instmem_alloc(dev, dev_priv->ctx_table_size, 4);
-	if (!dev_priv->ctx_table)
-		return DRM_ERR(ENOMEM);
-
-	for (i=0; i< dev_priv->ctx_table_size; i+=4)
-		INSTANCE_WR(dev_priv->ctx_table, i/4, 0x00000000);
-
-	NV_WRITE(NV10_PGRAPH_CHANNEL_CTX_TABLE, nouveau_chip_instance_get(dev, dev_priv->ctx_table));
+	if ((ret = nouveau_gpuobj_new_ref(dev, -1, -1, 0,
+					  dev_priv->ctx_table_size, 16,
+					  NVOBJ_FLAG_ZERO_ALLOC,
+					  &dev_priv->ctx_table)))
+		return ret;
+
+	NV_WRITE(NV10_PGRAPH_CHANNEL_CTX_TABLE,
+		 dev_priv->ctx_table->instance >> 4);
 
 	//XXX need to be done and save/restore for each fifo ???
 	nv20_graph_rdi(dev);
diff --git a/shared-core/nv30_graph.c b/shared-core/nv30_graph.c
index 7a87990a..65f4f868 100644
--- a/shared-core/nv30_graph.c
+++ b/shared-core/nv30_graph.c
@@ -16,7 +16,7 @@
  *      contexts are taken from dumps just after the 3D object is
  *      created.
  */
-static void nv30_graph_context_init(drm_device_t *dev, struct mem_block *ctx)
+static void nv30_graph_context_init(drm_device_t *dev, nouveau_gpuobj_t *ctx)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
 	int i;
@@ -105,9 +105,9 @@ int nv30_graph_create_context(drm_device_t *dev, int channel)
 	drm_nouveau_private_t *dev_priv =
 		(drm_nouveau_private_t *)dev->dev_private;
 	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
-	void (*ctx_init)(drm_device_t *, struct mem_block *);
+	void (*ctx_init)(drm_device_t *, nouveau_gpuobj_t *);
 	unsigned int ctx_size;
-	int i;
+	int ret;
 
 	switch (dev_priv->chipset) {
 	default:
@@ -116,18 +116,17 @@ int nv30_graph_create_context(drm_device_t *dev, int channel)
 		break;
 	}
 
-	/* Alloc and clear RAMIN to store the context */
-	chan->ramin_grctx = nouveau_instmem_alloc(dev, ctx_size, 4);
-	if (!chan->ramin_grctx)
-		return DRM_ERR(ENOMEM);
-	for (i=0; i<ctx_size; i+=4)
-		INSTANCE_WR(chan->ramin_grctx, i/4, 0x00000000);
+	if ((ret = nouveau_gpuobj_new_ref(dev, channel, -1, 0, ctx_size, 16,
+					  NVOBJ_FLAG_ZERO_ALLOC,
+					  &chan->ramin_grctx)))
+		return ret;
 
 	/* Initialise default context values */
-	ctx_init(dev, chan->ramin_grctx);
+	ctx_init(dev, chan->ramin_grctx->gpuobj);
         
-        INSTANCE_WR(chan->ramin_grctx, 10, channel << 24); /* CTX_USER */
-        INSTANCE_WR(dev_priv->ctx_table, channel, nouveau_chip_instance_get(dev, chan->ramin_grctx));
+        INSTANCE_WR(chan->ramin_grctx->gpuobj, 10, channel<<24); /* CTX_USER */
+        INSTANCE_WR(dev_priv->ctx_table->gpuobj, channel,
+		    chan->ramin_grctx->instance >> 4);
 
 	return 0;
 }
@@ -138,12 +137,10 @@ void nv30_graph_destroy_context(drm_device_t *dev, int channel)
 		(drm_nouveau_private_t *)dev->dev_private;
 	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
 
-	if (chan->ramin_grctx) {
-		nouveau_instmem_free(dev, chan->ramin_grctx);
-		chan->ramin_grctx = NULL;
-	}
+	if (chan->ramin_grctx)
+		nouveau_gpuobj_ref_del(dev, &chan->ramin_grctx);
 
-	INSTANCE_WR(dev_priv->ctx_table, channel, 0);
+	INSTANCE_WR(dev_priv->ctx_table->gpuobj, channel, 0);
 }
 
 static int
@@ -172,7 +169,7 @@ int nv30_graph_load_context(drm_device_t *dev, int channel)
 
 	if (!chan->ramin_grctx)
 		return DRM_ERR(EINVAL);
-	inst = nouveau_chip_instance_get(dev, chan->ramin_grctx);
+	inst = chan->ramin_grctx->instance >> 4;
 
 	NV_WRITE(NV20_PGRAPH_CHANNEL_CTX_POINTER, inst);
 	NV_WRITE(NV20_PGRAPH_CHANNEL_CTX_XFER,
@@ -189,7 +186,7 @@ int nv30_graph_save_context(drm_device_t *dev, int channel)
 
 	if (!chan->ramin_grctx)
 		return DRM_ERR(EINVAL);
-	inst = nouveau_chip_instance_get(dev, chan->ramin_grctx);
+	inst = chan->ramin_grctx->instance >> 4;
 
 	NV_WRITE(NV20_PGRAPH_CHANNEL_CTX_POINTER, inst);
 	NV_WRITE(NV20_PGRAPH_CHANNEL_CTX_XFER,
@@ -203,7 +200,7 @@ int nv30_graph_init(drm_device_t *dev)
 	drm_nouveau_private_t *dev_priv =
 		(drm_nouveau_private_t *)dev->dev_private;
 	uint32_t vramsz, tmp;
-	int i;
+	int ret, i;
 
 	NV_WRITE(NV03_PMC_ENABLE, NV_READ(NV03_PMC_ENABLE) &
 			~NV_PMC_ENABLE_PGRAPH);
@@ -212,14 +209,14 @@ int nv30_graph_init(drm_device_t *dev)
 
         /* Create Context Pointer Table */
         dev_priv->ctx_table_size = 32 * 4;
-        dev_priv->ctx_table = nouveau_instmem_alloc(dev, dev_priv->ctx_table_size, 4);
-        if (!dev_priv->ctx_table)
-                return DRM_ERR(ENOMEM);
-
-        for (i=0; i< dev_priv->ctx_table_size; i+=4)
-                INSTANCE_WR(dev_priv->ctx_table, i/4, 0x00000000);
-
-        NV_WRITE(NV10_PGRAPH_CHANNEL_CTX_TABLE, nouveau_chip_instance_get(dev, dev_priv->ctx_table));
+	if ((ret = nouveau_gpuobj_new_ref(dev, -1, -1, 0,
+					  dev_priv->ctx_table_size, 16,
+					  NVOBJ_FLAG_ZERO_ALLOC,
+					  &dev_priv->ctx_table)))
+		return ret;
+
+        NV_WRITE(NV10_PGRAPH_CHANNEL_CTX_TABLE,
+		 dev_priv->ctx_table->instance >> 4);
 
 	NV_WRITE(NV03_PGRAPH_INTR_EN, 0x00000000);
 	NV_WRITE(NV03_PGRAPH_INTR   , 0xFFFFFFFF);
diff --git a/shared-core/nv40_fifo.c b/shared-core/nv40_fifo.c
index 6f25349c..eed3e45b 100644
--- a/shared-core/nv40_fifo.c
+++ b/shared-core/nv40_fifo.c
@@ -28,8 +28,11 @@
 #include "nouveau_drv.h"
 #include "nouveau_drm.h"
 
-#define RAMFC_WR(offset, val)	NV_WI32(fifoctx + NV40_RAMFC_##offset, (val))
-#define RAMFC_RD(offset)	NV_RI32(fifoctx + NV40_RAMFC_##offset)
+
+#define RAMFC_WR(offset,val) INSTANCE_WR(chan->ramfc->gpuobj, \
+					 NV40_RAMFC_##offset/4, (val))
+#define RAMFC_RD(offset)     INSTANCE_RD(chan->ramfc->gpuobj, \
+					 NV40_RAMFC_##offset/4)
 #define NV40_RAMFC(c) (dev_priv->ramfc_offset + ((c)*NV40_RAMFC__SIZE))
 #define NV40_RAMFC__SIZE 128
 
@@ -38,21 +41,21 @@ nv40_fifo_create_context(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
 	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
-	uint32_t fifoctx = NV40_RAMFC(channel), grctx, pushbuf;
-	int i;
-
-	for (i = 0; i < NV40_RAMFC__SIZE; i+=4)
-		NV_WI32(fifoctx + i, 0);
+	int ret;
 
-	grctx   = nouveau_chip_instance_get(dev, chan->ramin_grctx);
-	pushbuf = nouveau_chip_instance_get(dev, chan->cmdbuf_obj->instance);
+	if ((ret = nouveau_gpuobj_new_fake(dev, NV40_RAMFC(channel),
+						NV40_RAMFC__SIZE,
+						NVOBJ_FLAG_ZERO_ALLOC |
+						NVOBJ_FLAG_ZERO_FREE,
+						NULL, &chan->ramfc)))
+		return ret;
 
 	/* Fill entries that are seen filled in dumps of nvidia driver just
 	 * after channel's is put into DMA mode
 	 */
 	RAMFC_WR(DMA_PUT       , chan->pushbuf_base);
 	RAMFC_WR(DMA_GET       , chan->pushbuf_base);
-	RAMFC_WR(DMA_INSTANCE  , pushbuf);
+	RAMFC_WR(DMA_INSTANCE  , chan->pushbuf->instance >> 4);
 	RAMFC_WR(DMA_FETCH     , NV_PFIFO_CACHE1_DMA_FETCH_TRIG_128_BYTES |
 				 NV_PFIFO_CACHE1_DMA_FETCH_SIZE_128_BYTES |
 				 NV_PFIFO_CACHE1_DMA_FETCH_MAX_REQS_8 |
@@ -61,7 +64,7 @@ nv40_fifo_create_context(drm_device_t *dev, int channel)
 #endif
 				 0x30000000 /* no idea.. */);
 	RAMFC_WR(DMA_SUBROUTINE, 0);
-	RAMFC_WR(GRCTX_INSTANCE, grctx);
+	RAMFC_WR(GRCTX_INSTANCE, chan->ramin_grctx->instance >> 4);
 	RAMFC_WR(DMA_TIMESLICE , 0x0001FFFF);
 
 	return 0;
@@ -71,18 +74,17 @@ void
 nv40_fifo_destroy_context(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	uint32_t fifoctx = NV40_RAMFC(channel);
-	int i;
+	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
 
-	for (i = 0; i < NV40_RAMFC__SIZE; i+=4)
-		NV_WI32(fifoctx + i, 0);
+	if (chan->ramfc)
+		nouveau_gpuobj_ref_del(dev, &chan->ramfc);
 }
 
 int
 nv40_fifo_load_context(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	uint32_t fifoctx = NV40_RAMFC(channel);
+	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
 	uint32_t tmp, tmp2;
 
 	NV_WRITE(NV04_PFIFO_CACHE1_DMA_GET          , RAMFC_RD(DMA_GET));
@@ -141,7 +143,7 @@ int
 nv40_fifo_save_context(drm_device_t *dev, int channel)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
-	uint32_t fifoctx = NV40_RAMFC(channel);
+	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
 	uint32_t tmp;
 
 	RAMFC_WR(DMA_PUT          , NV_READ(NV04_PFIFO_CACHE1_DMA_PUT));
diff --git a/shared-core/nv40_graph.c b/shared-core/nv40_graph.c
index 6fb575db..3f33cee6 100644
--- a/shared-core/nv40_graph.c
+++ b/shared-core/nv40_graph.c
@@ -47,13 +47,13 @@
  *      created.
  */
 static void
-nv40_graph_context_init(drm_device_t *dev, struct mem_block *ctx)
+nv40_graph_context_init(drm_device_t *dev, nouveau_gpuobj_t *ctx)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
 	int i;
 
 	/* Always has the "instance address" of itself at offset 0 */
-	INSTANCE_WR(ctx, 0x00000/4, nouveau_chip_instance_get(dev, ctx));
+	INSTANCE_WR(ctx, 0x00000/4, ctx->im_pramin->start);
 	/* unknown */
 	INSTANCE_WR(ctx, 0x00024/4, 0x0000ffff);
 	INSTANCE_WR(ctx, 0x00028/4, 0x0000ffff);
@@ -188,12 +188,12 @@ nv40_graph_context_init(drm_device_t *dev, struct mem_block *ctx)
 }
 
 static void
-nv43_graph_context_init(drm_device_t *dev, struct mem_block *ctx)
+nv43_graph_context_init(drm_device_t *dev, nouveau_gpuobj_t *ctx)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
 	int i;
 	
-	INSTANCE_WR(ctx, 0x00000/4, nouveau_chip_instance_get(dev, ctx));
+	INSTANCE_WR(ctx, 0x00000/4, ctx->im_pramin->start);
 	INSTANCE_WR(ctx, 0x00024/4, 0x0000ffff);
 	INSTANCE_WR(ctx, 0x00028/4, 0x0000ffff);
 	INSTANCE_WR(ctx, 0x00030/4, 0x00000001);
@@ -304,12 +304,12 @@ nv43_graph_context_init(drm_device_t *dev, struct mem_block *ctx)
 };
 
 static void
-nv46_graph_context_init(drm_device_t *dev, struct mem_block *ctx)
+nv46_graph_context_init(drm_device_t *dev, nouveau_gpuobj_t *ctx)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
 	int i;
 
-	INSTANCE_WR(ctx, 0x00000/4, nouveau_chip_instance_get(dev, ctx));
+	INSTANCE_WR(ctx, 0x00000/4, ctx->im_pramin->start);
 	INSTANCE_WR(ctx, 0x00040/4, 0x0000ffff);
 	INSTANCE_WR(ctx, 0x00044/4, 0x0000ffff);
 	INSTANCE_WR(ctx, 0x0004c/4, 0x00000001);
@@ -455,12 +455,12 @@ nv46_graph_context_init(drm_device_t *dev, struct mem_block *ctx)
 }
 
 static void
-nv49_graph_context_init(drm_device_t *dev, struct mem_block *ctx)
+nv49_graph_context_init(drm_device_t *dev, nouveau_gpuobj_t *ctx)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
 	int i;
 
-	INSTANCE_WR(ctx, 0x00000/4, nouveau_chip_instance_get(dev, ctx));
+	INSTANCE_WR(ctx, 0x00000/4, ctx->im_pramin->start);
 	INSTANCE_WR(ctx, 0x00004/4, 0x0000c040);
 	INSTANCE_WR(ctx, 0x00008/4, 0x0000c040);
 	INSTANCE_WR(ctx, 0x0000c/4, 0x0000c040);
@@ -678,12 +678,12 @@ nv49_graph_context_init(drm_device_t *dev, struct mem_block *ctx)
 }
 
 static void
-nv4a_graph_context_init(drm_device_t *dev, struct mem_block *ctx)
+nv4a_graph_context_init(drm_device_t *dev, nouveau_gpuobj_t *ctx)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
 	int i;
 
-	INSTANCE_WR(ctx, 0x00000/4, nouveau_chip_instance_get(dev, ctx));
+	INSTANCE_WR(ctx, 0x00000/4, ctx->im_pramin->start);
 	INSTANCE_WR(ctx, 0x00024/4, 0x0000ffff);
 	INSTANCE_WR(ctx, 0x00028/4, 0x0000ffff);
 	INSTANCE_WR(ctx, 0x00030/4, 0x00000001);
@@ -795,12 +795,12 @@ nv4a_graph_context_init(drm_device_t *dev, struct mem_block *ctx)
 }
 
 static void
-nv4b_graph_context_init(drm_device_t *dev, struct mem_block *ctx)
+nv4b_graph_context_init(drm_device_t *dev, nouveau_gpuobj_t *ctx)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
 	int i;
 
-	INSTANCE_WR(ctx, 0x00000/4, nouveau_chip_instance_get(dev, ctx));
+	INSTANCE_WR(ctx, 0x00000/4, ctx->im_pramin->start);
 	INSTANCE_WR(ctx, 0x00004/4, 0x0000c040);
 	INSTANCE_WR(ctx, 0x00008/4, 0x0000c040);
 	INSTANCE_WR(ctx, 0x0000c/4, 0x0000c040);
@@ -1010,12 +1010,12 @@ nv4b_graph_context_init(drm_device_t *dev, struct mem_block *ctx)
 }
 
 static void
-nv4c_graph_context_init(drm_device_t *dev, struct mem_block *ctx)
+nv4c_graph_context_init(drm_device_t *dev, nouveau_gpuobj_t *ctx)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
 	int i;
 
-	INSTANCE_WR(ctx, 0x00000/4, nouveau_chip_instance_get(dev, ctx));
+	INSTANCE_WR(ctx, 0x00000/4, ctx->im_pramin->start);
 	INSTANCE_WR(ctx, 0x00024/4, 0x0000ffff);
 	INSTANCE_WR(ctx, 0x00028/4, 0x0000ffff);
 	INSTANCE_WR(ctx, 0x00030/4, 0x00000001);
@@ -1117,12 +1117,12 @@ nv4c_graph_context_init(drm_device_t *dev, struct mem_block *ctx)
 }
 
 static void
-nv4e_graph_context_init(drm_device_t *dev, struct mem_block *ctx)
+nv4e_graph_context_init(drm_device_t *dev, nouveau_gpuobj_t *ctx)
 {
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
 	int i;
 
-	INSTANCE_WR(ctx, 0x00000/4, nouveau_chip_instance_get(dev, ctx));
+	INSTANCE_WR(ctx, 0x00000/4, ctx->im_pramin->start);
 	INSTANCE_WR(ctx, 0x00024/4, 0x0000ffff);
 	INSTANCE_WR(ctx, 0x00028/4, 0x0000ffff);
 	INSTANCE_WR(ctx, 0x00030/4, 0x00000001);
@@ -1229,9 +1229,9 @@ nv40_graph_create_context(drm_device_t *dev, int channel)
 	drm_nouveau_private_t *dev_priv =
 		(drm_nouveau_private_t *)dev->dev_private;
 	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
-	void (*ctx_init)(drm_device_t *, struct mem_block *);
+	void (*ctx_init)(drm_device_t *, nouveau_gpuobj_t *);
 	unsigned int ctx_size;
-	int i;
+	int ret;
 
 	switch (dev_priv->chipset) {
 	case 0x40:
@@ -1272,15 +1272,13 @@ nv40_graph_create_context(drm_device_t *dev, int channel)
 		break;
 	}
 
-	/* Alloc and clear RAMIN to store the context */
-	chan->ramin_grctx = nouveau_instmem_alloc(dev, ctx_size, 4);
-	if (!chan->ramin_grctx)
-		return DRM_ERR(ENOMEM);
-	for (i=0; i<ctx_size; i+=4)
-		INSTANCE_WR(chan->ramin_grctx, i/4, 0x00000000);
+	if ((ret = nouveau_gpuobj_new_ref(dev, channel, -1, 0, ctx_size, 16,
+					  NVOBJ_FLAG_ZERO_ALLOC,
+					  &chan->ramin_grctx)))
+		return ret;
 
 	/* Initialise default context values */
-	ctx_init(dev, chan->ramin_grctx);
+	ctx_init(dev, chan->ramin_grctx->gpuobj);
 
 	return 0;
 }
@@ -1291,10 +1289,8 @@ nv40_graph_destroy_context(drm_device_t *dev, int channel)
 	drm_nouveau_private_t *dev_priv = dev->dev_private;
 	struct nouveau_fifo *chan = &dev_priv->fifos[channel];
 
-	if (chan->ramin_grctx) {
-		nouveau_instmem_free(dev, chan->ramin_grctx);
-		chan->ramin_grctx = NULL;
-	}
+	if (chan->ramin_grctx)
+		nouveau_gpuobj_ref_del(dev, &chan->ramin_grctx);
 }
 
 static int
@@ -1339,7 +1335,7 @@ nv40_graph_save_context(drm_device_t *dev, int channel)
 
 	if (!chan->ramin_grctx)
 		return DRM_ERR(EINVAL);
-	inst = nouveau_chip_instance_get(dev, chan->ramin_grctx);
+	inst = chan->ramin_grctx->instance >> 4;
 
 	return nv40_graph_transfer_context(dev, inst, 1);
 }
@@ -1357,7 +1353,7 @@ nv40_graph_load_context(drm_device_t *dev, int channel)
 
 	if (!chan->ramin_grctx)
 		return DRM_ERR(EINVAL);
-	inst = nouveau_chip_instance_get(dev, chan->ramin_grctx);
+	inst = chan->ramin_grctx->instance >> 4;
 
 	ret = nv40_graph_transfer_context(dev, inst, 0);
 	if (ret)