4 files changed, 2090 insertions, 0 deletions
diff --git a/libdrm/intel/intel_bufmgr_fake.c b/libdrm/intel/intel_bufmgr_fake.c
new file mode 100644
index 00000000..2aed3d85
--- /dev/null
+++ b/libdrm/intel/intel_bufmgr_fake.c
@@ -0,0 +1,1177 @@
+/**************************************************************************
+ * 
+ * Copyright 2006 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ * 
+ **************************************************************************/
+
+/* Originally a fake version of the buffer manager so that we can
+ * prototype the changes in a driver fairly quickly, has been fleshed
+ * out to a fully functional interim solution.
+ *
+ * Basically wraps the old style memory management in the new
+ * programming interface, but is more expressive and avoids many of
+ * the bugs in the old texture manager.
+ */
+#include "mtypes.h"
+#include "dri_bufmgr.h"
+#include "intel_bufmgr_fake.h"
+#include "drm.h"
+#include "i915_drm.h"
+
+#include "simple_list.h"
+#include "mm.h"
+#include "imports.h"
+
+#define DBG(...) do {					\
+   if (bufmgr_fake->bufmgr.debug)			\
+      _mesa_printf(__VA_ARGS__);			\
+} while (0)
+
+/* Internal flags:
+ */
+#define BM_NO_BACKING_STORE			0x00000001
+#define BM_NO_FENCE_SUBDATA			0x00000002
+#define BM_PINNED				0x00000004
+
+/* Wrapper around mm.c's mem_block, which understands that you must
+ * wait for fences to expire before memory can be freed.  This is
+ * specific to our use of memcpy for uploads - an upload that was
+ * processed through the command queue wouldn't need to care about
+ * fences.
+ */
+#define MAX_RELOCS 4096
+
+struct fake_buffer_reloc
+{
+   /** Buffer object that the relocation points at. */
+   dri_bo *target_buf;
+   /** Offset of the relocation entry within reloc_buf. */
+   GLuint offset;
+   /** Cached value of the offset when we last performed this relocation. */
+   GLuint last_target_offset;
+   /** Value added to target_buf's offset to get the relocation entry. */
+   GLuint delta;
+   /** Cache domains the target buffer is read into. */
+   uint32_t read_domains;
+   /** Cache domain the target buffer will have dirty cachelines in. */
+   uint32_t write_domain;
+};
+
+struct block {
+   struct block *next, *prev;
+   struct mem_block *mem;	/* BM_MEM_AGP */
+
+   /**
+    * Marks that the block is currently in the aperture and has yet to be
+    * fenced.
+    */
+   unsigned on_hardware:1;
+   /**
+    * Marks that the block is currently fenced (being used by rendering) and
+    * can't be freed until @fence is passed.
+    */
+   unsigned fenced:1;
+
+   /** Fence cookie for the block. */
+   unsigned fence; /* Split to read_fence, write_fence */
+
+   dri_bo *bo;
+   void *virtual;
+};
+
+typedef struct _bufmgr_fake {
+   dri_bufmgr bufmgr;
+
+   unsigned long low_offset;
+   unsigned long size;
+   void *virtual;
+
+   struct mem_block *heap;
+   struct block lru;		/* only allocated, non-fence-pending blocks here */
+
+   unsigned buf_nr;		/* for generating ids */
+
+   struct block on_hardware;	/* after bmValidateBuffers */
+   struct block fenced;		/* after bmFenceBuffers (mi_flush, emit irq, write dword) */
+                                /* then to bufmgr->lru or free() */
+
+   unsigned int last_fence;
+
+   unsigned fail:1;
+   unsigned need_fence:1;
+   GLboolean thrashing;
+
+   /**
+    * Driver callback to emit a fence, returning the cookie.
+    *
+    * Currently, this also requires that a write flush be emitted before
+    * emitting the fence, but this should change.
+    */
+   unsigned int (*fence_emit)(void *private);
+   /** Driver callback to wait for a fence cookie to have passed. */
+   int (*fence_wait)(void *private, unsigned int fence_cookie);
+   /** Driver-supplied argument to driver callbacks */
+   void *driver_priv;
+
+   GLboolean debug;
+
+   GLboolean performed_rendering;
+
+   /* keep track of the current total size of objects we have relocs for */
+   unsigned long current_total_size;
+} dri_bufmgr_fake;
+
+typedef struct _dri_bo_fake {
+   dri_bo bo;
+
+   unsigned id;			/* debug only */
+   const char *name;
+
+   unsigned dirty:1;
+   unsigned size_accounted:1; /*this buffers size has been accounted against the aperture */
+   unsigned card_dirty:1; /* has the card written to this buffer - we make need to copy it back */
+   unsigned int refcount;
+   /* Flags may consist of any of the DRM_BO flags, plus
+    * DRM_BO_NO_BACKING_STORE and BM_NO_FENCE_SUBDATA, which are the first two
+    * driver private flags.
+    */
+   uint64_t flags;
+   /** Cache domains the target buffer is read into. */
+   uint32_t read_domains;
+   /** Cache domain the target buffer will have dirty cachelines in. */
+   uint32_t write_domain;
+
+   unsigned int alignment;
+   GLboolean is_static, validated;
+   unsigned int map_count;
+
+   /** relocation list */
+   struct fake_buffer_reloc *relocs;
+   GLuint nr_relocs;
+
+   struct block *block;
+   void *backing_store;
+   void (*invalidate_cb)(dri_bo *bo, void *ptr);
+   void *invalidate_ptr;
+} dri_bo_fake;
+
+static int clear_fenced(dri_bufmgr_fake *bufmgr_fake,
+			unsigned int fence_cookie);
+
+static int dri_fake_check_aperture_space(dri_bo *bo);
+
+#define MAXFENCE 0x7fffffff
+
+static GLboolean FENCE_LTE( unsigned a, unsigned b )
+{
+   if (a == b)
+      return GL_TRUE;
+
+   if (a < b && b - a < (1<<24))
+      return GL_TRUE;
+
+   if (a > b && MAXFENCE - a + b < (1<<24))
+      return GL_TRUE;
+
+   return GL_FALSE;
+}
+
+static unsigned int
+_fence_emit_internal(dri_bufmgr_fake *bufmgr_fake)
+{
+   bufmgr_fake->last_fence = bufmgr_fake->fence_emit(bufmgr_fake->driver_priv);
+   return bufmgr_fake->last_fence;
+}
+
+static void
+_fence_wait_internal(dri_bufmgr_fake *bufmgr_fake, unsigned int cookie)
+{
+   int ret;
+
+   ret = bufmgr_fake->fence_wait(bufmgr_fake->driver_priv, cookie);
+   if (ret != 0) {
+      _mesa_printf("%s:%d: Error %d waiting for fence.\n",
+		   __FILE__, __LINE__);
+      abort();
+   }
+   clear_fenced(bufmgr_fake, cookie);
+}
+
+static GLboolean
+_fence_test(dri_bufmgr_fake *bufmgr_fake, unsigned fence)
+{
+   /* Slight problem with wrap-around:
+    */
+   return fence == 0 || FENCE_LTE(fence, bufmgr_fake->last_fence);
+}
+
+/**
+ * Allocate a memory manager block for the buffer.
+ */
+static GLboolean
+alloc_block(dri_bo *bo)
+{
+   dri_bo_fake *bo_fake = (dri_bo_fake *)bo;
+   dri_bufmgr_fake *bufmgr_fake= (dri_bufmgr_fake *)bo->bufmgr;
+   struct block *block = (struct block *)calloc(sizeof *block, 1);
+   unsigned int align_log2 = _mesa_ffs(bo_fake->alignment) - 1;
+   GLuint sz;
+
+   if (!block)
+      return GL_FALSE;
+
+   sz = (bo->size + bo_fake->alignment - 1) & ~(bo_fake->alignment - 1);
+
+   block->mem = mmAllocMem(bufmgr_fake->heap, sz, align_log2, 0);
+   if (!block->mem) {
+      free(block);
+      return GL_FALSE;
+   }
+
+   make_empty_list(block);
+
+   /* Insert at head or at tail???   
+    */
+   insert_at_tail(&bufmgr_fake->lru, block);
+
+   block->virtual = bufmgr_fake->virtual +
+      block->mem->ofs - bufmgr_fake->low_offset;
+   block->bo = bo;
+
+   bo_fake->block = block;
+
+   return GL_TRUE;
+}
+
+/* Release the card storage associated with buf:
+ */
+static void free_block(dri_bufmgr_fake *bufmgr_fake, struct block *block)
+{
+   dri_bo_fake *bo_fake;
+   DBG("free block %p %08x %d %d\n", block, block->mem->ofs, block->on_hardware, block->fenced);
+
+   if (!block)
+      return;
+
+   bo_fake = (dri_bo_fake *)block->bo;
+   if (!(bo_fake->flags & BM_NO_BACKING_STORE) && (bo_fake->card_dirty == 1)) {
+     memcpy(bo_fake->backing_store, block->virtual, block->bo->size);
+     bo_fake->card_dirty = 1;
+     bo_fake->dirty = 1;
+   }
+
+   if (block->on_hardware) {
+      block->bo = NULL;
+   }
+   else if (block->fenced) {
+      block->bo = NULL;
+   }
+   else {
+      DBG("    - free immediately\n");
+      remove_from_list(block);
+
+      mmFreeMem(block->mem);
+      free(block);
+   }
+}
+
+static void
+alloc_backing_store(dri_bo *bo)
+{
+   dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr;
+   dri_bo_fake *bo_fake = (dri_bo_fake *)bo;
+   assert(!bo_fake->backing_store);
+   assert(!(bo_fake->flags & (BM_PINNED|BM_NO_BACKING_STORE)));
+
+   bo_fake->backing_store = ALIGN_MALLOC(bo->size, 64);
+
+   DBG("alloc_backing - buf %d %p %d\n", bo_fake->id, bo_fake->backing_store, bo->size);
+   assert(bo_fake->backing_store);
+}
+
+static void
+free_backing_store(dri_bo *bo)
+{
+   dri_bo_fake *bo_fake = (dri_bo_fake *)bo;
+
+   if (bo_fake->backing_store) {
+      assert(!(bo_fake->flags & (BM_PINNED|BM_NO_BACKING_STORE)));
+      ALIGN_FREE(bo_fake->backing_store);
+      bo_fake->backing_store = NULL;
+   }
+}
+
+static void
+set_dirty(dri_bo *bo)
+{
+   dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr;
+   dri_bo_fake *bo_fake = (dri_bo_fake *)bo;
+
+   if (bo_fake->flags & BM_NO_BACKING_STORE && bo_fake->invalidate_cb != NULL)
+      bo_fake->invalidate_cb(bo, bo_fake->invalidate_ptr);
+
+   assert(!(bo_fake->flags & BM_PINNED));
+
+   DBG("set_dirty - buf %d\n", bo_fake->id);
+   bo_fake->dirty = 1;
+}
+
+static GLboolean
+evict_lru(dri_bufmgr_fake *bufmgr_fake, GLuint max_fence)
+{
+   struct block *block, *tmp;
+
+   DBG("%s\n", __FUNCTION__);
+
+   foreach_s(block, tmp, &bufmgr_fake->lru) {
+      dri_bo_fake *bo_fake = (dri_bo_fake *)block->bo;
+
+      if (bo_fake != NULL && (bo_fake->flags & BM_NO_FENCE_SUBDATA))
+	 continue;
+
+      if (block->fence && max_fence && !FENCE_LTE(block->fence, max_fence))
+	 return 0;
+
+      set_dirty(&bo_fake->bo);
+      bo_fake->block = NULL;
+
+      free_block(bufmgr_fake, block);
+      return GL_TRUE;
+   }
+
+   return GL_FALSE;
+}
+
+#define foreach_s_rev(ptr, t, list)   \
+        for(ptr=(list)->prev,t=(ptr)->prev; list != ptr; ptr=t, t=(t)->prev)
+
+static GLboolean
+evict_mru(dri_bufmgr_fake *bufmgr_fake)
+{
+   struct block *block, *tmp;
+
+   DBG("%s\n", __FUNCTION__);
+
+   foreach_s_rev(block, tmp, &bufmgr_fake->lru) {
+      dri_bo_fake *bo_fake = (dri_bo_fake *)block->bo;
+
+      if (bo_fake && (bo_fake->flags & BM_NO_FENCE_SUBDATA))
+	 continue;
+
+      set_dirty(&bo_fake->bo);
+      bo_fake->block = NULL;
+
+      free_block(bufmgr_fake, block);
+      return GL_TRUE;
+   }
+
+   return GL_FALSE;
+}
+
+/**
+ * Removes all objects from the fenced list older than the given fence.
+ */
+static int clear_fenced(dri_bufmgr_fake *bufmgr_fake,
+			unsigned int fence_cookie)
+{
+   struct block *block, *tmp;
+   int ret = 0;
+
+   foreach_s(block, tmp, &bufmgr_fake->fenced) {
+      assert(block->fenced);
+
+      if (_fence_test(bufmgr_fake, block->fence)) {
+
+	 block->fenced = 0;
+
+	 if (!block->bo) {
+	    DBG("delayed free: offset %x sz %x\n",
+		block->mem->ofs, block->mem->size);
+	    remove_from_list(block);
+	    mmFreeMem(block->mem);
+	    free(block);
+	 }
+	 else {
+	    DBG("return to lru: offset %x sz %x\n",
+		block->mem->ofs, block->mem->size);
+	    move_to_tail(&bufmgr_fake->lru, block);
+	 }
+
+	 ret = 1;
+      }
+      else {
+	 /* Blocks are ordered by fence, so if one fails, all from
+	  * here will fail also:
+	  */
+	DBG("fence not passed: offset %x sz %x %d %d \n",
+	    block->mem->ofs, block->mem->size, block->fence, bufmgr_fake->last_fence);
+	 break;
+      }
+   }
+
+   DBG("%s: %d\n", __FUNCTION__, ret);
+   return ret;
+}
+
+static void fence_blocks(dri_bufmgr_fake *bufmgr_fake, unsigned fence)
+{
+   struct block *block, *tmp;
+
+   foreach_s (block, tmp, &bufmgr_fake->on_hardware) {
+      DBG("Fence block %p (sz 0x%x ofs %x buf %p) with fence %d\n", block,
+	  block->mem->size, block->mem->ofs, block->bo, fence);
+      block->fence = fence;
+
+      block->on_hardware = 0;
+      block->fenced = 1;
+
+      /* Move to tail of pending list here
+       */
+      move_to_tail(&bufmgr_fake->fenced, block);
+   }
+
+   assert(is_empty_list(&bufmgr_fake->on_hardware));
+}
+
+static GLboolean evict_and_alloc_block(dri_bo *bo)
+{
+   dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr;
+   dri_bo_fake *bo_fake = (dri_bo_fake *)bo;
+
+   assert(bo_fake->block == NULL);
+
+   /* Search for already free memory:
+    */
+   if (alloc_block(bo))
+      return GL_TRUE;
+
+   /* If we're not thrashing, allow lru eviction to dig deeper into
+    * recently used textures.  We'll probably be thrashing soon:
+    */
+   if (!bufmgr_fake->thrashing) {
+      while (evict_lru(bufmgr_fake, 0))
+	 if (alloc_block(bo))
+	    return GL_TRUE;
+   }
+
+   /* Keep thrashing counter alive?
+    */
+   if (bufmgr_fake->thrashing)
+      bufmgr_fake->thrashing = 20;
+
+   /* Wait on any already pending fences - here we are waiting for any
+    * freed memory that has been submitted to hardware and fenced to
+    * become available:
+    */
+   while (!is_empty_list(&bufmgr_fake->fenced)) {
+      GLuint fence = bufmgr_fake->fenced.next->fence;
+      _fence_wait_internal(bufmgr_fake, fence);
+
+      if (alloc_block(bo))
+	 return GL_TRUE;
+   }
+
+   if (!is_empty_list(&bufmgr_fake->on_hardware)) {
+      while (!is_empty_list(&bufmgr_fake->fenced)) {
+	 GLuint fence = bufmgr_fake->fenced.next->fence;
+	 _fence_wait_internal(bufmgr_fake, fence);
+      }
+
+      if (!bufmgr_fake->thrashing) {
+	 DBG("thrashing\n");
+      }
+      bufmgr_fake->thrashing = 20;
+
+      if (alloc_block(bo))
+	 return GL_TRUE;
+   }
+
+   while (evict_mru(bufmgr_fake))
+      if (alloc_block(bo))
+	 return GL_TRUE;
+
+   DBG("%s 0x%x bytes failed\n", __FUNCTION__, bo->size);
+
+   return GL_FALSE;
+}
+
+/***********************************************************************
+ * Public functions
+ */
+
+/**
+ * Wait for hardware idle by emitting a fence and waiting for it.
+ */
+static void
+dri_bufmgr_fake_wait_idle(dri_bufmgr_fake *bufmgr_fake)
+{
+   unsigned int cookie;
+
+   cookie = bufmgr_fake->fence_emit(bufmgr_fake->driver_priv);
+   _fence_wait_internal(bufmgr_fake, cookie);
+}
+
+/**
+ * Wait for rendering to a buffer to complete.
+ *
+ * It is assumed that the bathcbuffer which performed the rendering included
+ * the necessary flushing.
+ */
+static void
+dri_fake_bo_wait_rendering(dri_bo *bo)
+{
+   dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr;
+   dri_bo_fake *bo_fake = (dri_bo_fake *)bo;
+
+   if (bo_fake->block == NULL || !bo_fake->block->fenced)
+      return;
+
+   _fence_wait_internal(bufmgr_fake, bo_fake->block->fence);
+}
+
+/* Specifically ignore texture memory sharing.
+ *  -- just evict everything
+ *  -- and wait for idle
+ */
+void
+dri_bufmgr_fake_contended_lock_take(dri_bufmgr *bufmgr)
+{
+   dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bufmgr;
+   struct block *block, *tmp;
+
+   bufmgr_fake->need_fence = 1;
+   bufmgr_fake->fail = 0;
+
+   /* Wait for hardware idle.  We don't know where acceleration has been
+    * happening, so we'll need to wait anyway before letting anything get
+    * put on the card again.
+    */
+   dri_bufmgr_fake_wait_idle(bufmgr_fake);
+
+   /* Check that we hadn't released the lock without having fenced the last
+    * set of buffers.
+    */
+   assert(is_empty_list(&bufmgr_fake->fenced));
+   assert(is_empty_list(&bufmgr_fake->on_hardware));
+
+   foreach_s(block, tmp, &bufmgr_fake->lru) {
+      assert(_fence_test(bufmgr_fake, block->fence));
+      set_dirty(block->bo);
+   }
+}
+
+static dri_bo *
+dri_fake_bo_alloc(dri_bufmgr *bufmgr, const char *name,
+		  unsigned long size, unsigned int alignment,
+		  uint64_t location_mask)
+{
+   dri_bufmgr_fake *bufmgr_fake;
+   dri_bo_fake *bo_fake;
+
+   bufmgr_fake = (dri_bufmgr_fake *)bufmgr;
+
+   assert(size != 0);
+
+   bo_fake = calloc(1, sizeof(*bo_fake));
+   if (!bo_fake)
+      return NULL;
+
+   bo_fake->bo.size = size;
+   bo_fake->bo.offset = -1;
+   bo_fake->bo.virtual = NULL;
+   bo_fake->bo.bufmgr = bufmgr;
+   bo_fake->refcount = 1;
+
+   /* Alignment must be a power of two */
+   assert((alignment & (alignment - 1)) == 0);
+   if (alignment == 0)
+      alignment = 1;
+   bo_fake->alignment = alignment;
+   bo_fake->id = ++bufmgr_fake->buf_nr;
+   bo_fake->name = name;
+   bo_fake->flags = 0;
+   bo_fake->is_static = GL_FALSE;
+
+   DBG("drm_bo_alloc: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name,
+       bo_fake->bo.size / 1024);
+
+   return &bo_fake->bo;
+}
+
+static dri_bo *
+dri_fake_bo_alloc_static(dri_bufmgr *bufmgr, const char *name,
+			 unsigned long offset, unsigned long size,
+			 void *virtual, uint64_t location_mask)
+{
+   dri_bufmgr_fake *bufmgr_fake;
+   dri_bo_fake *bo_fake;
+
+   bufmgr_fake = (dri_bufmgr_fake *)bufmgr;
+
+   assert(size != 0);
+
+   bo_fake = calloc(1, sizeof(*bo_fake));
+   if (!bo_fake)
+      return NULL;
+
+   bo_fake->bo.size = size;
+   bo_fake->bo.offset = offset;
+   bo_fake->bo.virtual = virtual;
+   bo_fake->bo.bufmgr = bufmgr;
+   bo_fake->refcount = 1;
+   bo_fake->id = ++bufmgr_fake->buf_nr;
+   bo_fake->name = name;
+   bo_fake->flags = BM_PINNED | DRM_BO_FLAG_NO_MOVE;
+   bo_fake->is_static = GL_TRUE;
+
+   DBG("drm_bo_alloc_static: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name,
+       bo_fake->bo.size / 1024);
+
+   return &bo_fake->bo;
+}
+
+static void
+dri_fake_bo_reference(dri_bo *bo)
+{
+   dri_bo_fake *bo_fake = (dri_bo_fake *)bo;
+
+   bo_fake->refcount++;
+}
+
+static void
+dri_fake_bo_unreference(dri_bo *bo)
+{
+   dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr;
+   dri_bo_fake *bo_fake = (dri_bo_fake *)bo;
+   int i;
+
+   if (!bo)
+      return;
+
+   if (--bo_fake->refcount == 0) {
+      assert(bo_fake->map_count == 0);
+      /* No remaining references, so free it */
+      if (bo_fake->block)
+	 free_block(bufmgr_fake, bo_fake->block);
+      free_backing_store(bo);
+
+      for (i = 0; i < bo_fake->nr_relocs; i++)
+	 dri_bo_unreference(bo_fake->relocs[i].target_buf);
+
+      DBG("drm_bo_unreference: free buf %d %s\n", bo_fake->id, bo_fake->name);
+
+      free(bo_fake->relocs);
+      free(bo);
+
+      return;
+   }
+}
+
+/**
+ * Set the buffer as not requiring backing store, and instead get the callback
+ * invoked whenever it would be set dirty.
+ */
+void dri_bo_fake_disable_backing_store(dri_bo *bo,
+				       void (*invalidate_cb)(dri_bo *bo,
+							     void *ptr),
+				       void *ptr)
+{
+   dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr;
+   dri_bo_fake *bo_fake = (dri_bo_fake *)bo;
+
+   if (bo_fake->backing_store)
+      free_backing_store(bo);
+
+   bo_fake->flags |= BM_NO_BACKING_STORE;
+
+   DBG("disable_backing_store set buf %d dirty\n", bo_fake->id);
+   bo_fake->dirty = 1;
+   bo_fake->invalidate_cb = invalidate_cb;
+   bo_fake->invalidate_ptr = ptr;
+
+   /* Note that it is invalid right from the start.  Also note
+    * invalidate_cb is called with the bufmgr locked, so cannot
+    * itself make bufmgr calls.
+    */
+   if (invalidate_cb != NULL)
+      invalidate_cb(bo, ptr);
+}
+
+/**
+ * Map a buffer into bo->virtual, allocating either card memory space (If
+ * BM_NO_BACKING_STORE or BM_PINNED) or backing store, as necessary.
+ */
+static int
+dri_fake_bo_map(dri_bo *bo, GLboolean write_enable)
+{
+   dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr;
+   dri_bo_fake *bo_fake = (dri_bo_fake *)bo;
+
+   /* Static buffers are always mapped. */
+   if (bo_fake->is_static)
+      return 0;
+
+   /* Allow recursive mapping.  Mesa may recursively map buffers with
+    * nested display loops, and it is used internally in bufmgr_fake
+    * for relocation.
+    */
+   if (bo_fake->map_count++ != 0)
+      return 0;
+
+   {
+      DBG("drm_bo_map: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name,
+	  bo_fake->bo.size / 1024);
+
+      if (bo->virtual != NULL) {
+	 _mesa_printf("%s: already mapped\n", __FUNCTION__);
+	 abort();
+      }
+      else if (bo_fake->flags & (BM_NO_BACKING_STORE|BM_PINNED)) {
+
+	 if (!bo_fake->block && !evict_and_alloc_block(bo)) {
+	    DBG("%s: alloc failed\n", __FUNCTION__);
+	    bufmgr_fake->fail = 1;
+	    return 1;
+	 }
+	 else {
+	    assert(bo_fake->block);
+	    bo_fake->dirty = 0;
+
+	    if (!(bo_fake->flags & BM_NO_FENCE_SUBDATA) &&
+		bo_fake->block->fenced) {
+	       dri_fake_bo_wait_rendering(bo);
+	    }
+
+	    bo->virtual = bo_fake->block->virtual;
+	 }
+      }
+      else {
+	 if (write_enable)
+	    set_dirty(bo);
+
+	 if (bo_fake->backing_store == 0)
+	    alloc_backing_store(bo);
+
+	 bo->virtual = bo_fake->backing_store;
+      }
+   }
+
+   return 0;
+}
+
+static int
+dri_fake_bo_unmap(dri_bo *bo)
+{
+   dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr;
+   dri_bo_fake *bo_fake = (dri_bo_fake *)bo;
+
+   /* Static buffers are always mapped. */
+   if (bo_fake->is_static)
+      return 0;
+
+   assert(bo_fake->map_count != 0);
+   if (--bo_fake->map_count != 0)
+      return 0;
+
+   DBG("drm_bo_unmap: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name,
+       bo_fake->bo.size / 1024);
+
+   bo->virtual = NULL;
+
+   return 0;
+}
+
+static void
+dri_fake_kick_all(dri_bufmgr_fake *bufmgr_fake)
+{
+   struct block *block, *tmp;
+
+   bufmgr_fake->performed_rendering = GL_FALSE;
+   /* okay for ever BO that is on the HW kick it off.
+      seriously not afraid of the POLICE right now */
+   foreach_s(block, tmp, &bufmgr_fake->on_hardware) {
+      dri_bo_fake *bo_fake = (dri_bo_fake *)block->bo;
+
+      block->on_hardware = 0;
+      free_block(bufmgr_fake, block);
+      bo_fake->block = NULL;
+      bo_fake->validated = GL_FALSE;
+      if (!(bo_fake->flags & BM_NO_BACKING_STORE))
+         bo_fake->dirty = 1;
+   }
+}
+
+static int
+dri_fake_bo_validate(dri_bo *bo)
+{
+   dri_bufmgr_fake *bufmgr_fake;
+   dri_bo_fake *bo_fake = (dri_bo_fake *)bo;
+
+   /* XXX: Sanity-check whether we've already validated this one under
+    * different flags.  See drmAddValidateItem().
+    */
+   bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr;
+
+   DBG("drm_bo_validate: (buf %d: %s, %d kb)\n", bo_fake->id, bo_fake->name,
+       bo_fake->bo.size / 1024);
+
+   /* Sanity check: Buffers should be unmapped before being validated.
+    * This is not so much of a problem for bufmgr_fake, but TTM refuses,
+    * and the problem is harder to debug there.
+    */
+   assert(bo_fake->map_count == 0);
+
+   if (bo_fake->is_static) {
+      /* Add it to the needs-fence list */
+      bufmgr_fake->need_fence = 1;
+      return 0;
+   }
+
+   /* reset size accounted */
+   bo_fake->size_accounted = 0;
+
+   /* Allocate the card memory */
+   if (!bo_fake->block && !evict_and_alloc_block(bo)) {
+      bufmgr_fake->fail = 1;
+      DBG("Failed to validate buf %d:%s\n", bo_fake->id, bo_fake->name);
+      return -1;
+   }
+
+   assert(bo_fake->block);
+   assert(bo_fake->block->bo == &bo_fake->bo);
+
+   bo->offset = bo_fake->block->mem->ofs;
+
+   /* Upload the buffer contents if necessary */
+   if (bo_fake->dirty) {
+      DBG("Upload dirty buf %d:%s, sz %d offset 0x%x\n", bo_fake->id,
+	  bo_fake->name, bo->size, bo_fake->block->mem->ofs);
+
+      assert(!(bo_fake->flags &
+	       (BM_NO_BACKING_STORE|BM_PINNED)));
+
+      /* Actually, should be able to just wait for a fence on the memory,
+       * which we would be tracking when we free it.  Waiting for idle is
+       * a sufficiently large hammer for now.
+       */
+      dri_bufmgr_fake_wait_idle(bufmgr_fake);
+
+      /* we may never have mapped this BO so it might not have any backing
+       * store if this happens it should be rare, but 0 the card memory
+       * in any case */
+      if (bo_fake->backing_store)
+         memcpy(bo_fake->block->virtual, bo_fake->backing_store, bo->size);
+      else
+         memset(bo_fake->block->virtual, 0, bo->size);
+
+      bo_fake->dirty = 0;
+   }
+
+   bo_fake->block->fenced = 0;
+   bo_fake->block->on_hardware = 1;
+   move_to_tail(&bufmgr_fake->on_hardware, bo_fake->block);
+
+   bo_fake->validated = GL_TRUE;
+   bufmgr_fake->need_fence = 1;
+
+   return 0;
+}
+
+static void
+dri_fake_fence_validated(dri_bufmgr *bufmgr)
+{
+   dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bufmgr;
+   unsigned int cookie;
+
+   cookie = _fence_emit_internal(bufmgr_fake);
+   fence_blocks(bufmgr_fake, cookie);
+
+   DBG("drm_fence_validated: 0x%08x cookie\n", cookie);
+}
+
+static void
+dri_fake_destroy(dri_bufmgr *bufmgr)
+{
+   dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bufmgr;
+
+   mmDestroy(bufmgr_fake->heap);
+   free(bufmgr);
+}
+
+static int
+dri_fake_emit_reloc(dri_bo *reloc_buf,
+		    uint32_t read_domains, uint32_t write_domain,
+		    uint32_t delta, uint32_t offset, dri_bo *target_buf)
+{
+   dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)reloc_buf->bufmgr;
+   struct fake_buffer_reloc *r;
+   dri_bo_fake *reloc_fake = (dri_bo_fake *)reloc_buf;
+   dri_bo_fake *target_fake = (dri_bo_fake *)target_buf;
+   int i;
+
+   assert(reloc_buf);
+   assert(target_buf);
+
+   assert(target_fake->is_static || target_fake->size_accounted);
+
+   if (reloc_fake->relocs == NULL) {
+      reloc_fake->relocs = malloc(sizeof(struct fake_buffer_reloc) *
+				  MAX_RELOCS);
+   }
+
+   r = &reloc_fake->relocs[reloc_fake->nr_relocs++];
+
+   assert(reloc_fake->nr_relocs <= MAX_RELOCS);
+
+   dri_bo_reference(target_buf);
+
+   r->target_buf = target_buf;
+   r->offset = offset;
+   r->last_target_offset = target_buf->offset;
+   r->delta = delta;
+   r->read_domains = read_domains;
+   r->write_domain = write_domain;
+
+   if (bufmgr_fake->debug) {
+      /* Check that a conflicting relocation hasn't already been emitted. */
+      for (i = 0; i < reloc_fake->nr_relocs - 1; i++) {
+	 struct fake_buffer_reloc *r2 = &reloc_fake->relocs[i];
+
+	 assert(r->offset != r2->offset);
+      }
+   }
+
+   return 0;
+}
+
+/**
+ * Incorporates the validation flags associated with each relocation into
+ * the combined validation flags for the buffer on this batchbuffer submission.
+ */
+static void
+dri_fake_calculate_domains(dri_bo *bo)
+{
+   dri_bo_fake *bo_fake = (dri_bo_fake *)bo;
+   int i;
+
+   for (i = 0; i < bo_fake->nr_relocs; i++) {
+      struct fake_buffer_reloc *r = &bo_fake->relocs[i];
+      dri_bo_fake *target_fake = (dri_bo_fake *)r->target_buf;
+
+      /* Do the same for the tree of buffers we depend on */
+      dri_fake_calculate_domains(r->target_buf);
+
+      target_fake->read_domains |= r->read_domains;
+      if (target_fake->write_domain != 0)
+	 target_fake->write_domain = r->write_domain;
+   }
+}
+
+
+static int
+dri_fake_reloc_and_validate_buffer(dri_bo *bo)
+{
+   dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr;
+   dri_bo_fake *bo_fake = (dri_bo_fake *)bo;
+   int i, ret;
+
+   assert(bo_fake->map_count == 0);
+
+   for (i = 0; i < bo_fake->nr_relocs; i++) {
+      struct fake_buffer_reloc *r = &bo_fake->relocs[i];
+      dri_bo_fake *target_fake = (dri_bo_fake *)r->target_buf;
+      uint32_t reloc_data;
+
+      /* Validate the target buffer if that hasn't been done. */
+      if (!target_fake->validated) {
+         ret = dri_fake_reloc_and_validate_buffer(r->target_buf);
+         if (ret != 0) {
+            if (bo->virtual != NULL)
+                dri_bo_unmap(bo);
+            return ret;
+         }
+      }
+
+      /* Calculate the value of the relocation entry. */
+      if (r->target_buf->offset != r->last_target_offset) {
+	 reloc_data = r->target_buf->offset + r->delta;
+
+	 if (bo->virtual == NULL)
+	    dri_bo_map(bo, GL_TRUE);
+
+	 *(uint32_t *)(bo->virtual + r->offset) = reloc_data;
+
+	 r->last_target_offset = r->target_buf->offset;
+      }
+   }
+
+   if (bo->virtual != NULL)
+      dri_bo_unmap(bo);
+
+   if (bo_fake->write_domain != 0) {
+      if (!(bo_fake->flags & (BM_NO_BACKING_STORE|BM_PINNED))) {
+         if (bo_fake->backing_store == 0)
+            alloc_backing_store(bo);
+
+         bo_fake->card_dirty = 1;
+      }
+      bufmgr_fake->performed_rendering = GL_TRUE;
+   }
+
+   return dri_fake_bo_validate(bo);
+}
+
+static void *
+dri_fake_process_relocs(dri_bo *batch_buf)
+{
+   dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)batch_buf->bufmgr;
+   dri_bo_fake *batch_fake = (dri_bo_fake *)batch_buf;
+   int ret;
+   int retry_count = 0;
+
+   bufmgr_fake->performed_rendering = GL_FALSE;
+
+   dri_fake_calculate_domains(batch_buf);
+
+   batch_fake->read_domains = DRM_GEM_DOMAIN_I915_COMMAND;
+
+   /* we've ran out of RAM so blow the whole lot away and retry */
+ restart:
+   ret = dri_fake_reloc_and_validate_buffer(batch_buf);
+   if (bufmgr_fake->fail == 1) {
+      if (retry_count == 0) {
+         retry_count++;
+         dri_fake_kick_all(bufmgr_fake);
+         bufmgr_fake->fail = 0;
+         goto restart;
+      } else /* dump out the memory here */
+         mmDumpMemInfo(bufmgr_fake->heap);
+   }
+
+   assert(ret == 0);
+
+   bufmgr_fake->current_total_size = 0;
+   return NULL;
+}
+
+static void
+dri_bo_fake_post_submit(dri_bo *bo)
+{
+   dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr;
+   dri_bo_fake *bo_fake = (dri_bo_fake *)bo;
+   int i;
+
+   for (i = 0; i < bo_fake->nr_relocs; i++) {
+      struct fake_buffer_reloc *r = &bo_fake->relocs[i];
+      dri_bo_fake *target_fake = (dri_bo_fake *)r->target_buf;
+
+      if (target_fake->validated)
+	 dri_bo_fake_post_submit(r->target_buf);
+
+      DBG("%s@0x%08x + 0x%08x -> %s@0x%08x + 0x%08x\n",
+	  bo_fake->name, (uint32_t)bo->offset, r->offset,
+	  target_fake->name, (uint32_t)r->target_buf->offset, r->delta);
+   }
+
+   assert(bo_fake->map_count == 0);
+   bo_fake->validated = GL_FALSE;
+   bo_fake->read_domains = 0;
+   bo_fake->write_domain = 0;
+}
+
+
+static void
+dri_fake_post_submit(dri_bo *batch_buf)
+{
+   dri_fake_fence_validated(batch_buf->bufmgr);
+
+   dri_bo_fake_post_submit(batch_buf);
+}
+
+static int
+dri_fake_check_aperture_space(dri_bo *bo)
+{
+   dri_bufmgr_fake *bufmgr_fake = (dri_bufmgr_fake *)bo->bufmgr;
+   dri_bo_fake *bo_fake = (dri_bo_fake *)bo;
+   GLuint sz;
+
+   sz = (bo->size + bo_fake->alignment - 1) & ~(bo_fake->alignment - 1);
+
+   if (bo_fake->size_accounted || bo_fake->is_static)
+      return 0;
+
+   if (bufmgr_fake->current_total_size + sz > bufmgr_fake->size) {
+     DBG("check_space: %s bo %d %d overflowed bufmgr size %d\n", bo_fake->name, bo_fake->id, sz, bufmgr_fake->size);
+      return -1;
+   }
+
+   bufmgr_fake->current_total_size += sz;
+   bo_fake->size_accounted = 1;
+   DBG("drm_check_space: buf %d, %s %d %d\n", bo_fake->id, bo_fake->name, bo->size, bufmgr_fake->current_total_size);
+   return 0;
+}
+
+dri_bufmgr *
+dri_bufmgr_fake_init(unsigned long low_offset, void *low_virtual,
+		     unsigned long size,
+		     unsigned int (*fence_emit)(void *private),
+		     int (*fence_wait)(void *private, unsigned int cookie),
+		     void *driver_priv)
+{
+   dri_bufmgr_fake *bufmgr_fake;
+
+   bufmgr_fake = calloc(1, sizeof(*bufmgr_fake));
+
+   /* Initialize allocator */
+   make_empty_list(&bufmgr_fake->fenced);
+   make_empty_list(&bufmgr_fake->on_hardware);
+   make_empty_list(&bufmgr_fake->lru);
+
+   bufmgr_fake->low_offset = low_offset;
+   bufmgr_fake->virtual = low_virtual;
+   bufmgr_fake->size = size;
+   bufmgr_fake->heap = mmInit(low_offset, size);
+
+   /* Hook in methods */
+   bufmgr_fake->bufmgr.bo_alloc = dri_fake_bo_alloc;
+   bufmgr_fake->bufmgr.bo_alloc_static = dri_fake_bo_alloc_static;
+   bufmgr_fake->bufmgr.bo_reference = dri_fake_bo_reference;
+   bufmgr_fake->bufmgr.bo_unreference = dri_fake_bo_unreference;
+   bufmgr_fake->bufmgr.bo_map = dri_fake_bo_map;
+   bufmgr_fake->bufmgr.bo_unmap = dri_fake_bo_unmap;
+   bufmgr_fake->bufmgr.bo_wait_rendering = dri_fake_bo_wait_rendering;
+   bufmgr_fake->bufmgr.destroy = dri_fake_destroy;
+   bufmgr_fake->bufmgr.emit_reloc = dri_fake_emit_reloc;
+   bufmgr_fake->bufmgr.process_relocs = dri_fake_process_relocs;
+   bufmgr_fake->bufmgr.post_submit = dri_fake_post_submit;
+   bufmgr_fake->bufmgr.check_aperture_space = dri_fake_check_aperture_space;
+   bufmgr_fake->bufmgr.debug = GL_FALSE;
+
+   bufmgr_fake->fence_emit = fence_emit;
+   bufmgr_fake->fence_wait = fence_wait;
+   bufmgr_fake->driver_priv = driver_priv;
+
+   return &bufmgr_fake->bufmgr;
+}
+
diff --git a/libdrm/intel/intel_bufmgr_fake.h b/libdrm/intel/intel_bufmgr_fake.h
new file mode 100644
index 00000000..bc7e59e6
--- /dev/null
+++ b/libdrm/intel/intel_bufmgr_fake.h
@@ -0,0 +1,50 @@
+/**************************************************************************
+ * 
+ * Copyright © 2007 Intel Corporation
+ * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
+ * All Rights Reserved.
+ * 
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ * 
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ * 
+ * 
+ **************************************************************************/
+/*
+ * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com>
+ *          Keith Whitwell <keithw-at-tungstengraphics-dot-com>
+ *	    Eric Anholt <eric@anholt.net>
+ */
+
+#ifndef _INTEL_BUFMGR_FAKE_H_
+#define _INTEL_BUFMGR_FAKE_H_
+
+void dri_bufmgr_fake_contended_lock_take(dri_bufmgr *bufmgr);
+dri_bufmgr *dri_bufmgr_fake_init(unsigned long low_offset, void *low_virtual,
+				 unsigned long size,
+				 unsigned int (*fence_emit)(void *private),
+				 int (*fence_wait)(void *private,
+						   unsigned int cookie),
+				 void *driver_priv);
+void dri_bo_fake_disable_backing_store(dri_bo *bo,
+				       void (*invalidate_cb)(dri_bo *bo,
+							     void *ptr),
+				       void *ptr);
+#endif /* _INTEL_BUFMGR_FAKE_H_ */
+
diff --git a/libdrm/intel/intel_bufmgr_gem.c b/libdrm/intel/intel_bufmgr_gem.c
new file mode 100644
index 00000000..3c1c3157
--- /dev/null
+++ b/libdrm/intel/intel_bufmgr_gem.c
@@ -0,0 +1,847 @@
+/**************************************************************************
+ *
+ * Copyright � 2007 Red Hat Inc.
+ * Copyright � 2007 Intel Corporation
+ * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ *
+ **************************************************************************/
+/*
+ * Authors: Thomas Hellstr�m <thomas-at-tungstengraphics-dot-com>
+ *          Keith Whitwell <keithw-at-tungstengraphics-dot-com>
+ *	    Eric Anholt <eric@anholt.net>
+ *	    Dave Airlie <airlied@linux.ie>
+ */
+
+#include <xf86drm.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <assert.h>
+#include <sys/ioctl.h>
+#include <sys/mman.h>
+
+#include "errno.h"
+#include "mtypes.h"
+#include "dri_bufmgr.h"
+#include "string.h"
+#include "imports.h"
+
+#include "i915_drm.h"
+
+#include "intel_bufmgr_gem.h"
+
+#define DBG(...) do {					\
+   if (bufmgr_gem->bufmgr.debug)			\
+      fprintf(stderr, __VA_ARGS__);			\
+} while (0)
+
+struct intel_validate_entry {
+    dri_bo *bo;
+    struct drm_i915_op_arg bo_arg;
+};
+
+struct dri_gem_bo_bucket_entry {
+   uint32_t gem_handle;
+   uint32_t last_offset;
+   struct dri_gem_bo_bucket_entry *next;
+};
+
+struct dri_gem_bo_bucket {
+   struct dri_gem_bo_bucket_entry *head;
+   struct dri_gem_bo_bucket_entry **tail;
+   /**
+    * Limit on the number of entries in this bucket.
+    *
+    * 0 means that this caching at this bucket size is disabled.
+    * -1 means that there is no limit to caching at this size.
+    */
+   int max_entries;
+   int num_entries;
+};
+
+/* Arbitrarily chosen, 16 means that the maximum size we'll cache for reuse
+ * is 1 << 16 pages, or 256MB.
+ */
+#define INTEL_GEM_BO_BUCKETS	16
+typedef struct _dri_bufmgr_gem {
+    dri_bufmgr bufmgr;
+
+    int fd;
+
+    uint32_t max_relocs;
+
+    struct drm_i915_gem_exec_object *exec_objects;
+    dri_bo **exec_bos;
+    int exec_size;
+    int exec_count;
+
+    /** Array of lists of cached gem objects of power-of-two sizes */
+    struct dri_gem_bo_bucket cache_bucket[INTEL_GEM_BO_BUCKETS];
+
+    struct drm_i915_gem_execbuffer exec_arg;
+} dri_bufmgr_gem;
+
+typedef struct _dri_bo_gem {
+    dri_bo bo;
+
+    int refcount;
+    GLboolean mapped;
+    uint32_t gem_handle;
+    const char *name;
+
+    /**
+     * Index of the buffer within the validation list while preparing a
+     * batchbuffer execution.
+     */
+    int validate_index;
+
+    /**
+     * Tracks whether set_domain to CPU is current
+     * Set when set_domain has been called
+     * Cleared when a batch has been submitted
+     */
+    GLboolean cpu_domain_set;
+
+    /** Array passed to the DRM containing relocation information. */
+    struct drm_i915_gem_relocation_entry *relocs;
+    /** Array of bos corresponding to relocs[i].target_handle */
+    dri_bo **reloc_target_bo;
+    /** Number of entries in relocs */
+    int reloc_count;
+    /** Mapped address for the buffer */
+    void *virtual;
+} dri_bo_gem;
+
+static int
+logbase2(int n)
+{
+   GLint i = 1;
+   GLint log2 = 0;
+
+   while (n > i) {
+      i *= 2;
+      log2++;
+   }
+
+   return log2;
+}
+
+static struct dri_gem_bo_bucket *
+dri_gem_bo_bucket_for_size(dri_bufmgr_gem *bufmgr_gem, unsigned long size)
+{
+    int i;
+
+    /* We only do buckets in power of two increments */
+    if ((size & (size - 1)) != 0)
+	return NULL;
+
+    /* We should only see sizes rounded to pages. */
+    assert((size % 4096) == 0);
+
+    /* We always allocate in units of pages */
+    i = ffs(size / 4096) - 1;
+    if (i >= INTEL_GEM_BO_BUCKETS)
+	return NULL;
+
+    return &bufmgr_gem->cache_bucket[i];
+}
+
+
+static void dri_gem_dump_validation_list(dri_bufmgr_gem *bufmgr_gem)
+{
+    int i, j;
+
+    for (i = 0; i < bufmgr_gem->exec_count; i++) {
+	dri_bo *bo = bufmgr_gem->exec_bos[i];
+	dri_bo_gem *bo_gem = (dri_bo_gem *)bo;
+
+	if (bo_gem->relocs == NULL) {
+	    DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle, bo_gem->name);
+	    continue;
+	}
+
+	for (j = 0; j < bo_gem->reloc_count; j++) {
+	    dri_bo *target_bo = bo_gem->reloc_target_bo[j];
+	    dri_bo_gem *target_gem = (dri_bo_gem *)target_bo;
+
+	    DBG("%2d: %d (%s)@0x%08llx -> %d (%s)@0x%08lx + 0x%08x\n",
+		i,
+		bo_gem->gem_handle, bo_gem->name, bo_gem->relocs[j].offset,
+		target_gem->gem_handle, target_gem->name, target_bo->offset,
+		bo_gem->relocs[j].delta);
+	}
+    }
+}
+
+/**
+ * Adds the given buffer to the list of buffers to be validated (moved into the
+ * appropriate memory type) with the next batch submission.
+ *
+ * If a buffer is validated multiple times in a batch submission, it ends up
+ * with the intersection of the memory type flags and the union of the
+ * access flags.
+ */
+static void
+intel_add_validate_buffer(dri_bo *bo)
+{
+    dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr;
+    dri_bo_gem *bo_gem = (dri_bo_gem *)bo;
+    int index;
+
+    if (bo_gem->validate_index != -1)
+	return;
+
+    /* Extend the array of validation entries as necessary. */
+    if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
+	int new_size = bufmgr_gem->exec_size * 2;
+
+	if (new_size == 0)
+	    new_size = 5;
+
+	bufmgr_gem->exec_objects =
+	    realloc(bufmgr_gem->exec_objects,
+		    sizeof(*bufmgr_gem->exec_objects) * new_size);
+	bufmgr_gem->exec_bos =
+	    realloc(bufmgr_gem->exec_bos,
+		    sizeof(*bufmgr_gem->exec_bos) * new_size);
+	bufmgr_gem->exec_size = new_size;
+    }
+
+    index = bufmgr_gem->exec_count;
+    bo_gem->validate_index = index;
+    /* Fill in array entry */
+    bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle;
+    bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count;
+    bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
+    bufmgr_gem->exec_objects[index].alignment = 0;
+    bufmgr_gem->exec_objects[index].offset = 0;
+    bufmgr_gem->exec_bos[index] = bo;
+    dri_bo_reference(bo);
+    bufmgr_gem->exec_count++;
+}
+
+
+#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \
+	sizeof(uint32_t))
+
+static int
+intel_setup_reloc_list(dri_bo *bo)
+{
+    dri_bo_gem *bo_gem = (dri_bo_gem *)bo;
+    dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr;
+
+    bo_gem->relocs = malloc(bufmgr_gem->max_relocs *
+			    sizeof(struct drm_i915_gem_relocation_entry));
+    bo_gem->reloc_target_bo = malloc(bufmgr_gem->max_relocs * sizeof(dri_bo *));
+
+    return 0;
+}
+
+static dri_bo *
+dri_gem_bo_alloc(dri_bufmgr *bufmgr, const char *name,
+		 unsigned long size, unsigned int alignment,
+		 uint64_t location_mask)
+{
+    dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bufmgr;
+    dri_bo_gem *bo_gem;
+    unsigned int page_size = getpagesize();
+    int ret;
+    struct dri_gem_bo_bucket *bucket;
+    GLboolean alloc_from_cache = GL_FALSE;
+
+    bo_gem = calloc(1, sizeof(*bo_gem));
+    if (!bo_gem)
+	return NULL;
+
+    /* Round the allocated size up to a power of two number of pages. */
+    bo_gem->bo.size = 1 << logbase2(size);
+    if (bo_gem->bo.size < page_size)
+	bo_gem->bo.size = page_size;
+    bucket = dri_gem_bo_bucket_for_size(bufmgr_gem, bo_gem->bo.size);
+
+    /* If we don't have caching at this size, don't actually round the
+     * allocation up.
+     */
+    if (bucket == NULL || bucket->max_entries == 0) {
+	bo_gem->bo.size = size;
+	if (bo_gem->bo.size < page_size)
+	    bo_gem->bo.size = page_size;
+    }
+
+    /* Get a buffer out of the cache if available */
+    if (bucket != NULL && bucket->num_entries > 0) {
+	struct dri_gem_bo_bucket_entry *entry = bucket->head;
+	struct drm_i915_gem_busy busy;
+	
+        busy.handle = entry->gem_handle;
+        ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
+        alloc_from_cache = (ret == 0 && busy.busy == 0);
+
+	if (alloc_from_cache) {
+	    bucket->head = entry->next;
+	    if (entry->next == NULL)
+		bucket->tail = &bucket->head;
+	    bucket->num_entries--;
+
+	    bo_gem->gem_handle = entry->gem_handle;
+	    bo_gem->bo.offset = entry->last_offset;
+	    free(entry);
+	}
+    }
+
+    if (!alloc_from_cache) {
+	struct drm_gem_create create;
+
+	memset(&create, 0, sizeof(create));
+	create.size = bo_gem->bo.size;
+
+	ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CREATE, &create);
+	bo_gem->gem_handle = create.handle;
+	if (ret != 0) {
+	    free(bo_gem);
+	    return NULL;
+	}
+    }
+
+    bo_gem->bo.virtual = NULL;
+    bo_gem->bo.bufmgr = bufmgr;
+    bo_gem->name = name;
+    bo_gem->refcount = 1;
+    bo_gem->validate_index = -1;
+
+    DBG("bo_create: buf %d (%s) %ldb\n",
+	bo_gem->gem_handle, bo_gem->name, size);
+
+    return &bo_gem->bo;
+}
+
+/* Our GEM backend doesn't allow creation of static buffers, as that requires
+ * privelege for the non-fake case, and the lock in the fake case where we were
+ * working around the X Server not creating buffers and passing handles to us.
+ */
+static dri_bo *
+dri_gem_bo_alloc_static(dri_bufmgr *bufmgr, const char *name,
+			unsigned long offset, unsigned long size, void *virtual,
+			uint64_t location_mask)
+{
+    return NULL;
+}
+
+/**
+ * Returns a dri_bo wrapping the given buffer object handle.
+ *
+ * This can be used when one application needs to pass a buffer object
+ * to another.
+ */
+dri_bo *
+intel_gem_bo_create_from_handle(dri_bufmgr *bufmgr, const char *name,
+			      unsigned int handle)
+{
+    dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bufmgr;
+    dri_bo_gem *bo_gem;
+    int ret;
+    struct drm_gem_open open_arg;
+
+    bo_gem = calloc(1, sizeof(*bo_gem));
+    if (!bo_gem)
+	return NULL;
+
+    memset(&open_arg, 0, sizeof(open_arg));
+    open_arg.name = handle;
+    ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_OPEN, &open_arg);
+    if (ret != 0) {
+	fprintf(stderr, "Couldn't reference %s handle 0x%08x: %s\n",
+	       name, handle, strerror(-ret));
+	free(bo_gem);
+	return NULL;
+    }
+    bo_gem->bo.size = open_arg.size;
+    bo_gem->bo.offset = 0;
+    bo_gem->bo.virtual = NULL;
+    bo_gem->bo.bufmgr = bufmgr;
+    bo_gem->name = name;
+    bo_gem->refcount = 1;
+    bo_gem->validate_index = -1;
+    bo_gem->gem_handle = open_arg.handle;
+
+    DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name);
+
+    return &bo_gem->bo;
+}
+
+static void
+dri_gem_bo_reference(dri_bo *bo)
+{
+    dri_bo_gem *bo_gem = (dri_bo_gem *)bo;
+
+    bo_gem->refcount++;
+}
+
+static void
+dri_gem_bo_unreference(dri_bo *bo)
+{
+    dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr;
+    dri_bo_gem *bo_gem = (dri_bo_gem *)bo;
+
+    if (!bo)
+	return;
+
+    if (--bo_gem->refcount == 0) {
+	struct dri_gem_bo_bucket *bucket;
+	int ret;
+
+	if (bo_gem->mapped)
+	    munmap (bo_gem->virtual, bo->size);
+
+	if (bo_gem->relocs != NULL) {
+	    int i;
+
+	    /* Unreference all the target buffers */
+	    for (i = 0; i < bo_gem->reloc_count; i++)
+		 dri_bo_unreference(bo_gem->reloc_target_bo[i]);
+	    free(bo_gem->reloc_target_bo);
+	    free(bo_gem->relocs);
+	}
+
+	bucket = dri_gem_bo_bucket_for_size(bufmgr_gem, bo->size);
+	/* Put the buffer into our internal cache for reuse if we can. */
+	if (bucket != NULL &&
+	    (bucket->max_entries == -1 ||
+	     (bucket->max_entries > 0 &&
+	      bucket->num_entries < bucket->max_entries)))
+	{
+	    struct dri_gem_bo_bucket_entry *entry;
+
+	    entry = calloc(1, sizeof(*entry));
+	    entry->gem_handle = bo_gem->gem_handle;
+	    entry->last_offset = bo->offset;
+
+	    entry->next = NULL;
+	    *bucket->tail = entry;
+	    bucket->tail = &entry->next;
+	    bucket->num_entries++;
+	} else {
+	    struct drm_gem_close close;
+
+	    /* Close this object */
+	    close.handle = bo_gem->gem_handle;
+	    ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close);
+	    if (ret != 0) {
+	       fprintf(stderr,
+		       "DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
+		       bo_gem->gem_handle, bo_gem->name, strerror(-ret));
+	    }
+	}
+
+	DBG("bo_unreference final: %d (%s)\n",
+	    bo_gem->gem_handle, bo_gem->name);
+
+	free(bo);
+	return;
+    }
+}
+
+static int
+dri_gem_bo_map(dri_bo *bo, GLboolean write_enable)
+{
+    dri_bufmgr_gem *bufmgr_gem;
+    dri_bo_gem *bo_gem = (dri_bo_gem *)bo;
+    struct drm_gem_set_domain set_domain;
+    int ret;
+
+    bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr;
+
+    /* Allow recursive mapping. Mesa may recursively map buffers with
+     * nested display loops.
+     */
+    if (!bo_gem->mapped) {
+    
+	assert(bo->virtual == NULL);
+    
+	DBG("bo_map: %d (%s)\n", bo_gem->gem_handle, bo_gem->name);
+    
+	if (bo_gem->virtual == NULL) {
+	    struct drm_gem_mmap mmap_arg;
+    
+	    memset(&mmap_arg, 0, sizeof(mmap_arg));
+	    mmap_arg.handle = bo_gem->gem_handle;
+	    mmap_arg.offset = 0;
+	    mmap_arg.size = bo->size;
+	    ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_MMAP, &mmap_arg);
+	    if (ret != 0) {
+		fprintf(stderr, "%s:%d: Error mapping buffer %d (%s): %s .\n",
+			__FILE__, __LINE__,
+			bo_gem->gem_handle, bo_gem->name, strerror(errno));
+	    }
+	    bo_gem->virtual = (void *)(uintptr_t)mmap_arg.addr_ptr;
+	}
+	bo->virtual = bo_gem->virtual;
+	bo_gem->mapped = GL_TRUE;
+	DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name, bo_gem->virtual);
+    }
+
+    if (!bo_gem->cpu_domain_set) {
+	set_domain.handle = bo_gem->gem_handle;
+	set_domain.read_domains = DRM_GEM_DOMAIN_CPU;
+	set_domain.write_domain = write_enable ? DRM_GEM_DOMAIN_CPU : 0;
+	ret = ioctl (bufmgr_gem->fd, DRM_IOCTL_GEM_SET_DOMAIN, &set_domain);
+	if (ret != 0) {
+	    fprintf (stderr, "%s:%d: Error setting memory domains %d (%08x %08x): %s .\n",
+		     __FILE__, __LINE__,
+		     bo_gem->gem_handle, set_domain.read_domains, set_domain.write_domain,
+		     strerror (errno));
+	}
+	bo_gem->cpu_domain_set = GL_TRUE;
+    }
+
+    return 0;
+}
+
+static int
+dri_gem_bo_unmap(dri_bo *bo)
+{
+    dri_bo_gem *bo_gem = (dri_bo_gem *)bo;
+
+    if (bo == NULL)
+	return 0;
+
+    assert(bo_gem->mapped);
+
+    return 0;
+}
+
+static int
+dri_gem_bo_subdata (dri_bo *bo, unsigned long offset,
+		    unsigned long size, const void *data)
+{
+    dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr;
+    dri_bo_gem *bo_gem = (dri_bo_gem *)bo;
+    struct drm_gem_pwrite pwrite;
+    int ret;
+
+    memset (&pwrite, 0, sizeof (pwrite));
+    pwrite.handle = bo_gem->gem_handle;
+    pwrite.offset = offset;
+    pwrite.size = size;
+    pwrite.data_ptr = (uint64_t) (uintptr_t) data;
+    ret = ioctl (bufmgr_gem->fd, DRM_IOCTL_GEM_PWRITE, &pwrite);
+    if (ret != 0) {
+	fprintf (stderr, "%s:%d: Error writing data to buffer %d: (%d %d) %s .\n",
+		 __FILE__, __LINE__,
+		 bo_gem->gem_handle, (int) offset, (int) size,
+		 strerror (errno));
+    }
+    return 0;
+}
+
+static int
+dri_gem_bo_get_subdata (dri_bo *bo, unsigned long offset,
+			unsigned long size, void *data)
+{
+    dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr;
+    dri_bo_gem *bo_gem = (dri_bo_gem *)bo;
+    struct drm_gem_pread pread;
+    int ret;
+
+    memset (&pread, 0, sizeof (pread));
+    pread.handle = bo_gem->gem_handle;
+    pread.offset = offset;
+    pread.size = size;
+    pread.data_ptr = (uint64_t) (uintptr_t) data;
+    ret = ioctl (bufmgr_gem->fd, DRM_IOCTL_GEM_PREAD, &pread);
+    if (ret != 0) {
+	fprintf (stderr, "%s:%d: Error reading data from buffer %d: (%d %d) %s .\n",
+		 __FILE__, __LINE__,
+		 bo_gem->gem_handle, (int) offset, (int) size,
+		 strerror (errno));
+    }
+    return 0;
+}
+
+static void
+dri_gem_bo_wait_rendering(dri_bo *bo)
+{
+    dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr;
+    dri_bo_gem *bo_gem = (dri_bo_gem *)bo;
+    struct drm_gem_set_domain set_domain;
+    int ret;
+
+    set_domain.handle = bo_gem->gem_handle;
+    set_domain.read_domains = DRM_GEM_DOMAIN_CPU;
+    set_domain.write_domain = 0;
+    ret = ioctl (bufmgr_gem->fd, DRM_IOCTL_GEM_SET_DOMAIN, &set_domain);
+    if (ret != 0) {
+	fprintf (stderr, "%s:%d: Error setting memory domains %d (%08x %08x): %s .\n",
+		 __FILE__, __LINE__,
+		 bo_gem->gem_handle, set_domain.read_domains, set_domain.write_domain,
+		 strerror (errno));
+    }
+}
+
+static void
+dri_bufmgr_gem_destroy(dri_bufmgr *bufmgr)
+{
+    dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bufmgr;
+    int i;
+
+    free(bufmgr_gem->exec_objects);
+    free(bufmgr_gem->exec_bos);
+
+    /* Free any cached buffer objects we were going to reuse */
+    for (i = 0; i < INTEL_GEM_BO_BUCKETS; i++) {
+	struct dri_gem_bo_bucket *bucket = &bufmgr_gem->cache_bucket[i];
+	struct dri_gem_bo_bucket_entry *entry;
+
+	while ((entry = bucket->head) != NULL) {
+	    struct drm_gem_close close;
+	    int ret;
+
+	    bucket->head = entry->next;
+	    if (entry->next == NULL)
+		bucket->tail = &bucket->head;
+	    bucket->num_entries--;
+
+	    /* Close this object */
+	    close.handle = entry->gem_handle;
+	    ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close);
+	    if (ret != 0) {
+	       fprintf(stderr, "DRM_IOCTL_GEM_CLOSE failed: %s\n",
+		       strerror(-ret));
+	    }
+
+	    free(entry);
+	}
+    }
+
+    free(bufmgr);
+}
+
+/**
+ * Adds the target buffer to the validation list and adds the relocation
+ * to the reloc_buffer's relocation list.
+ *
+ * The relocation entry at the given offset must already contain the
+ * precomputed relocation value, because the kernel will optimize out
+ * the relocation entry write when the buffer hasn't moved from the
+ * last known offset in target_bo.
+ */
+static int
+dri_gem_emit_reloc(dri_bo *bo, uint32_t read_domains, uint32_t write_domain,
+		   uint32_t delta, uint32_t offset, dri_bo *target_bo)
+{
+    dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bo->bufmgr;
+    dri_bo_gem *bo_gem = (dri_bo_gem *)bo;
+    dri_bo_gem *target_bo_gem = (dri_bo_gem *)target_bo;
+
+    /* Create a new relocation list if needed */
+    if (bo_gem->relocs == NULL)
+	intel_setup_reloc_list(bo);
+
+    /* Check overflow */
+    assert(bo_gem->reloc_count < bufmgr_gem->max_relocs);
+
+    /* Check args */
+    assert (offset <= bo->size - 4);
+    assert ((write_domain & (write_domain-1)) == 0);
+
+    bo_gem->relocs[bo_gem->reloc_count].offset = offset;
+    bo_gem->relocs[bo_gem->reloc_count].delta = delta;
+    bo_gem->relocs[bo_gem->reloc_count].target_handle =
+	target_bo_gem->gem_handle;
+    bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains;
+    bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain;
+    bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset;
+
+    bo_gem->reloc_target_bo[bo_gem->reloc_count] = target_bo;
+    dri_bo_reference(target_bo);
+
+    bo_gem->reloc_count++;
+    return 0;
+}
+
+/**
+ * Walk the tree of relocations rooted at BO and accumulate the list of
+ * validations to be performed and update the relocation buffers with
+ * index values into the validation list.
+ */
+static void
+dri_gem_bo_process_reloc(dri_bo *bo)
+{
+    dri_bo_gem *bo_gem = (dri_bo_gem *)bo;
+    int i;
+
+    if (bo_gem->relocs == NULL)
+	return;
+
+    for (i = 0; i < bo_gem->reloc_count; i++) {
+	dri_bo *target_bo = bo_gem->reloc_target_bo[i];
+
+	/* Continue walking the tree depth-first. */
+	dri_gem_bo_process_reloc(target_bo);
+
+	/* Add the target to the validate list */
+	intel_add_validate_buffer(target_bo);
+    }
+}
+
+static void *
+dri_gem_process_reloc(dri_bo *batch_buf)
+{
+    dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *) batch_buf->bufmgr;
+
+    /* Update indices and set up the validate list. */
+    dri_gem_bo_process_reloc(batch_buf);
+
+    /* Add the batch buffer to the validation list.  There are no relocations
+     * pointing to it.
+     */
+    intel_add_validate_buffer(batch_buf);
+
+    bufmgr_gem->exec_arg.buffers_ptr = (uintptr_t)bufmgr_gem->exec_objects;
+    bufmgr_gem->exec_arg.buffer_count = bufmgr_gem->exec_count;
+    bufmgr_gem->exec_arg.batch_start_offset = 0;
+    bufmgr_gem->exec_arg.batch_len = 0;	/* written in intel_exec_ioctl */
+
+    return &bufmgr_gem->exec_arg;
+}
+
+static void
+intel_update_buffer_offsets (dri_bufmgr_gem *bufmgr_gem)
+{
+    int i;
+
+    for (i = 0; i < bufmgr_gem->exec_count; i++) {
+	dri_bo *bo = bufmgr_gem->exec_bos[i];
+	dri_bo_gem *bo_gem = (dri_bo_gem *)bo;
+
+	/* Update the buffer offset */
+	if (bufmgr_gem->exec_objects[i].offset != bo->offset) {
+	    DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
+		bo_gem->gem_handle, bo_gem->name, bo->offset,
+		bufmgr_gem->exec_objects[i].offset);
+	    bo->offset = bufmgr_gem->exec_objects[i].offset;
+	}
+    }
+}
+
+static void
+dri_gem_post_submit(dri_bo *batch_buf)
+{
+    dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)batch_buf->bufmgr;
+    int i;
+
+    intel_update_buffer_offsets (bufmgr_gem);
+
+    if (bufmgr_gem->bufmgr.debug)
+	dri_gem_dump_validation_list(bufmgr_gem);
+
+    for (i = 0; i < bufmgr_gem->exec_count; i++) {
+	dri_bo *bo = bufmgr_gem->exec_bos[i];
+	dri_bo_gem *bo_gem = (dri_bo_gem *)bo;
+
+	/* Need to call set_domain on next bo_map */
+	bo_gem->cpu_domain_set = GL_FALSE;
+
+	/* Disconnect the buffer from the validate list */
+	bo_gem->validate_index = -1;
+	dri_bo_unreference(bo);
+	bufmgr_gem->exec_bos[i] = NULL;
+    }
+    bufmgr_gem->exec_count = 0;
+}
+
+/**
+ * Enables unlimited caching of buffer objects for reuse.
+ *
+ * This is potentially very memory expensive, as the cache at each bucket
+ * size is only bounded by how many buffers of that size we've managed to have
+ * in flight at once.
+ */
+void
+intel_gem_enable_bo_reuse(dri_bufmgr *bufmgr)
+{
+    dri_bufmgr_gem *bufmgr_gem = (dri_bufmgr_gem *)bufmgr;
+    int i;
+
+    for (i = 0; i < INTEL_GEM_BO_BUCKETS; i++) {
+	bufmgr_gem->cache_bucket[i].max_entries = -1;
+    }
+}
+
+/*
+ *
+ */
+static int
+dri_gem_check_aperture_space(dri_bo *bo)
+{
+    return 0;
+}
+
+/**
+ * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
+ * and manage map buffer objections.
+ *
+ * \param fd File descriptor of the opened DRM device.
+ */
+dri_bufmgr *
+intel_bufmgr_gem_init(int fd, int batch_size)
+{
+    dri_bufmgr_gem *bufmgr_gem;
+    int i;
+
+    bufmgr_gem = calloc(1, sizeof(*bufmgr_gem));
+    bufmgr_gem->fd = fd;
+
+    /* Let's go with one relocation per every 2 dwords (but round down a bit
+     * since a power of two will mean an extra page allocation for the reloc
+     * buffer).
+     *
+     * Every 4 was too few for the blender benchmark.
+     */
+    bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
+
+    bufmgr_gem->bufmgr.bo_alloc = dri_gem_bo_alloc;
+    bufmgr_gem->bufmgr.bo_alloc_static = dri_gem_bo_alloc_static;
+    bufmgr_gem->bufmgr.bo_reference = dri_gem_bo_reference;
+    bufmgr_gem->bufmgr.bo_unreference = dri_gem_bo_unreference;
+    bufmgr_gem->bufmgr.bo_map = dri_gem_bo_map;
+    bufmgr_gem->bufmgr.bo_unmap = dri_gem_bo_unmap;
+    bufmgr_gem->bufmgr.bo_subdata = dri_gem_bo_subdata;
+    bufmgr_gem->bufmgr.bo_get_subdata = dri_gem_bo_get_subdata;
+    bufmgr_gem->bufmgr.bo_wait_rendering = dri_gem_bo_wait_rendering;
+    bufmgr_gem->bufmgr.destroy = dri_bufmgr_gem_destroy;
+    bufmgr_gem->bufmgr.emit_reloc = dri_gem_emit_reloc;
+    bufmgr_gem->bufmgr.process_relocs = dri_gem_process_reloc;
+    bufmgr_gem->bufmgr.post_submit = dri_gem_post_submit;
+    bufmgr_gem->bufmgr.debug = GL_FALSE;
+    bufmgr_gem->bufmgr.check_aperture_space = dri_gem_check_aperture_space;
+    /* Initialize the linked lists for BO reuse cache. */
+    for (i = 0; i < INTEL_GEM_BO_BUCKETS; i++)
+	bufmgr_gem->cache_bucket[i].tail = &bufmgr_gem->cache_bucket[i].head;
+
+    return &bufmgr_gem->bufmgr;
+}
+
diff --git a/libdrm/intel/intel_bufmgr_gem.h b/libdrm/intel/intel_bufmgr_gem.h
new file mode 100644
index 00000000..36caeba2
--- /dev/null
+++ b/libdrm/intel/intel_bufmgr_gem.h
@@ -0,0 +1,16 @@
+
+#ifndef INTEL_BUFMGR_GEM_H
+#define INTEL_BUFMGR_GEM_H
+
+#include "dri_bufmgr.h"
+
+extern dri_bo *intel_gem_bo_create_from_handle(dri_bufmgr *bufmgr,
+					       const char *name,
+					       unsigned int handle);
+
+dri_bufmgr *intel_bufmgr_gem_init(int fd, int batch_size);
+
+void
+intel_gem_enable_bo_reuse(dri_bufmgr *bufmgr);
+
+#endif /* INTEL_BUFMGR_GEM_H */