summaryrefslogtreecommitdiff
path: root/libdrm/intel/intel_bufmgr_gem.c
diff options
context:
space:
mode:
authorEric Anholt <eric@anholt.net>2009-02-18 13:06:35 -0800
committerEric Anholt <eric@anholt.net>2009-02-21 09:57:06 -0800
commit72abe983adfe7e8dcdcec11f1bc11d0b3daae063 (patch)
tree24021de560fcdef47c42259a14ef95a56b370382 /libdrm/intel/intel_bufmgr_gem.c
parenta1345338feb7af25c0a9fe02ec16c2b9cce83a9e (diff)
intel: Add a new bufmgr alloc function to get BOs ready for rendering to.
This avoids using the oldest BO in the BO cache and waiting for it to be idle before we turn around and render to it with the GPU. Thanks to Chris Wilson for pointing out how silly we were being.
Diffstat (limited to 'libdrm/intel/intel_bufmgr_gem.c')
-rw-r--r--libdrm/intel/intel_bufmgr_gem.c81
1 files changed, 56 insertions, 25 deletions
diff --git a/libdrm/intel/intel_bufmgr_gem.c b/libdrm/intel/intel_bufmgr_gem.c
index 51641b7a..6ddecf4a 100644
--- a/libdrm/intel/intel_bufmgr_gem.c
+++ b/libdrm/intel/intel_bufmgr_gem.c
@@ -52,6 +52,7 @@
#include <sys/types.h>
#include "errno.h"
+#include "libdrm_lists.h"
#include "intel_bufmgr.h"
#include "intel_bufmgr_priv.h"
#include "intel_chipset.h"
@@ -67,7 +68,8 @@
typedef struct _drm_intel_bo_gem drm_intel_bo_gem;
struct drm_intel_gem_bo_bucket {
- drm_intel_bo_gem *head, **tail;
+ drmMMListHead head;
+
/**
* Limit on the number of entries in this bucket.
*
@@ -145,8 +147,8 @@ struct _drm_intel_bo_gem {
/** Mapped address for the buffer, saved across map/unmap cycles */
void *virtual;
- /** free list */
- drm_intel_bo_gem *next;
+ /** BO cache list */
+ drmMMListHead head;
/**
* Boolean of whether this BO and its children have been included in
@@ -323,8 +325,9 @@ drm_intel_setup_reloc_list(drm_intel_bo *bo)
}
static drm_intel_bo *
-drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name,
- unsigned long size, unsigned int alignment)
+drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr, const char *name,
+ unsigned long size, unsigned int alignment,
+ int for_render)
{
drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
drm_intel_bo_gem *bo_gem;
@@ -353,19 +356,35 @@ drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name,
/* Get a buffer out of the cache if available */
if (bucket != NULL && bucket->num_entries > 0) {
struct drm_i915_gem_busy busy;
-
- bo_gem = bucket->head;
- memset(&busy, 0, sizeof(busy));
- busy.handle = bo_gem->gem_handle;
-
- ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
- alloc_from_cache = (ret == 0 && busy.busy == 0);
-
- if (alloc_from_cache) {
- bucket->head = bo_gem->next;
- if (bo_gem->next == NULL)
- bucket->tail = &bucket->head;
+
+ if (for_render) {
+ /* Allocate new render-target BOs from the tail (MRU)
+ * of the list, as it will likely be hot in the GPU cache
+ * and in the aperture for us.
+ */
+ bo_gem = DRMLISTENTRY(drm_intel_bo_gem, bucket->head.prev, head);
+ DRMLISTDEL(&bo_gem->head);
bucket->num_entries--;
+ alloc_from_cache = 1;
+ } else {
+ /* For non-render-target BOs (where we're probably going to map it
+ * first thing in order to fill it with data), check if the
+ * last BO in the cache is unbusy, and only reuse in that case.
+ * Otherwise, allocating a new buffer is probably faster than
+ * waiting for the GPU to finish.
+ */
+ bo_gem = DRMLISTENTRY(drm_intel_bo_gem, bucket->head.next, head);
+
+ memset(&busy, 0, sizeof(busy));
+ busy.handle = bo_gem->gem_handle;
+
+ ret = ioctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
+ alloc_from_cache = (ret == 0 && busy.busy == 0);
+
+ if (alloc_from_cache) {
+ DRMLISTDEL(&bo_gem->head);
+ bucket->num_entries--;
+ }
}
}
pthread_mutex_unlock(&bufmgr_gem->lock);
@@ -406,6 +425,20 @@ drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name,
return &bo_gem->bo;
}
+static drm_intel_bo *
+drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr, const char *name,
+ unsigned long size, unsigned int alignment)
+{
+ return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, alignment, 1);
+}
+
+static drm_intel_bo *
+drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr, const char *name,
+ unsigned long size, unsigned int alignment)
+{
+ return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, alignment, 0);
+}
+
/**
* Returns a drm_intel_bo wrapping the given buffer object handle.
*
@@ -545,9 +578,7 @@ drm_intel_gem_bo_unreference_locked(drm_intel_bo *bo)
bo_gem->reloc_target_bo = NULL;
bo_gem->reloc_count = 0;
- bo_gem->next = NULL;
- *bucket->tail = bo_gem;
- bucket->tail = &bo_gem->next;
+ DRMLISTADDTAIL(&bo_gem->head, &bucket->head);
bucket->num_entries++;
} else {
drm_intel_gem_bo_free(bo);
@@ -827,10 +858,9 @@ drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr)
struct drm_intel_gem_bo_bucket *bucket = &bufmgr_gem->cache_bucket[i];
drm_intel_bo_gem *bo_gem;
- while ((bo_gem = bucket->head) != NULL) {
- bucket->head = bo_gem->next;
- if (bo_gem->next == NULL)
- bucket->tail = &bucket->head;
+ while (!DRMLISTEMPTY(&bucket->head)) {
+ bo_gem = DRMLISTENTRY(drm_intel_bo_gem, bucket->head.next, head);
+ DRMLISTDEL(&bo_gem->head);
bucket->num_entries--;
drm_intel_gem_bo_free(&bo_gem->bo);
@@ -1348,6 +1378,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc;
+ bufmgr_gem->bufmgr.bo_alloc_for_render = drm_intel_gem_bo_alloc_for_render;
bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference;
bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference;
bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map;
@@ -1367,7 +1398,7 @@ drm_intel_bufmgr_gem_init(int fd, int batch_size)
bufmgr_gem->bufmgr.check_aperture_space = drm_intel_gem_check_aperture_space;
/* Initialize the linked lists for BO reuse cache. */
for (i = 0; i < DRM_INTEL_GEM_BO_BUCKETS; i++)
- bufmgr_gem->cache_bucket[i].tail = &bufmgr_gem->cache_bucket[i].head;
+ DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head);
return &bufmgr_gem->bufmgr;
}