summaryrefslogtreecommitdiff
path: root/freedreno/freedreno_device.c
diff options
context:
space:
mode:
authorRob Clark <robclark@freedesktop.org>2013-12-13 12:48:30 -0500
committerRob Clark <robclark@freedesktop.org>2013-12-13 15:48:10 -0500
commit068ea68b3f7ebd5efcfcc2f6ae417651423c8382 (patch)
treee094e44e8d01c33ab8e5d74aa489308f267d3eea /freedreno/freedreno_device.c
parent1489811a805fb6b5b19d61fa99b9b962cc06bd22 (diff)
freedreno: add bo cache
Workloads which create many transient buffers cause significant CPU overhead in buffer allocation, zeroing, cache maint, and mmap setup. By caching and re-using existing buffers, the CPU overhead drops significantly. See: http://bloggingthemonkey.blogspot.com/2013/09/freedreno-update-moar-fps.html A simple time based policy is used for purging the cache. Once the kernel supports it, we could use madvise style API to handle memory pressure scenarios a bit better. Signed-off-by: Rob Clark <robclark@freedesktop.org>
Diffstat (limited to 'freedreno/freedreno_device.c')
-rw-r--r--freedreno/freedreno_device.c60
1 files changed, 56 insertions, 4 deletions
diff --git a/freedreno/freedreno_device.c b/freedreno/freedreno_device.c
index 1e3d9df2..6486983d 100644
--- a/freedreno/freedreno_device.c
+++ b/freedreno/freedreno_device.c
@@ -39,6 +39,44 @@ static void * dev_table;
struct fd_device * kgsl_device_new(int fd);
struct fd_device * msm_device_new(int fd);
+static void
+add_bucket(struct fd_device *dev, int size)
+{
+ unsigned int i = dev->num_buckets;
+
+ assert(i < ARRAY_SIZE(dev->cache_bucket));
+
+ list_inithead(&dev->cache_bucket[i].list);
+ dev->cache_bucket[i].size = size;
+ dev->num_buckets++;
+}
+
+static void
+init_cache_buckets(struct fd_device *dev)
+{
+ unsigned long size, cache_max_size = 64 * 1024 * 1024;
+
+ /* OK, so power of two buckets was too wasteful of memory.
+ * Give 3 other sizes between each power of two, to hopefully
+ * cover things accurately enough. (The alternative is
+ * probably to just go for exact matching of sizes, and assume
+ * that for things like composited window resize the tiled
+ * width/height alignment and rounding of sizes to pages will
+ * get us useful cache hit rates anyway)
+ */
+ add_bucket(dev, 4096);
+ add_bucket(dev, 4096 * 2);
+ add_bucket(dev, 4096 * 3);
+
+ /* Initialize the linked lists for BO reuse cache. */
+ for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
+ add_bucket(dev, size);
+ add_bucket(dev, size + size * 1 / 4);
+ add_bucket(dev, size + size * 2 / 4);
+ add_bucket(dev, size + size * 3 / 4);
+ }
+}
+
static struct fd_device * fd_device_new_impl(int fd)
{
struct fd_device *dev;
@@ -69,6 +107,7 @@ static struct fd_device * fd_device_new_impl(int fd)
dev->fd = fd;
dev->handle_table = drmHashCreate();
dev->name_table = drmHashCreate();
+ init_cache_buckets(dev);
return dev;
}
@@ -102,14 +141,27 @@ struct fd_device * fd_device_ref(struct fd_device *dev)
return dev;
}
+static void fd_device_del_impl(struct fd_device *dev)
+{
+ fd_cleanup_bo_cache(dev, 0);
+ drmHashDestroy(dev->handle_table);
+ drmHashDestroy(dev->name_table);
+ drmHashDelete(dev_table, dev->fd);
+ dev->funcs->destroy(dev);
+}
+
+void fd_device_del_locked(struct fd_device *dev)
+{
+ if (!atomic_dec_and_test(&dev->refcnt))
+ return;
+ fd_device_del_impl(dev);
+}
+
void fd_device_del(struct fd_device *dev)
{
if (!atomic_dec_and_test(&dev->refcnt))
return;
pthread_mutex_lock(&table_lock);
- drmHashDestroy(dev->handle_table);
- drmHashDestroy(dev->name_table);
- drmHashDelete(dev_table, dev->fd);
+ fd_device_del_impl(dev);
pthread_mutex_unlock(&table_lock);
- dev->funcs->destroy(dev);
}