diff options
author | Eric Anholt <eric@anholt.net> | 2008-07-23 10:07:16 -0700 |
---|---|---|
committer | Eric Anholt <eric@anholt.net> | 2008-07-23 10:10:54 -0700 |
commit | 439d7106832f2e9742deb900d96f1d3bc07162b1 (patch) | |
tree | f016507c4003135279ac53cb8c5521217306c8a0 /linux-core | |
parent | bddb952578d58c4dcfafe969c045a39d27666b56 (diff) |
intel-gem: Add a quick hack to reduce clflushing on pread.
This increases overhead for the large-readpixels case due to the repeated
page cache accessing, but greatly reduces overhead for the small-readpixels
case.
Diffstat (limited to 'linux-core')
-rw-r--r-- | linux-core/i915_gem.c | 57 |
1 files changed, 50 insertions, 7 deletions
diff --git a/linux-core/i915_gem.c b/linux-core/i915_gem.c index ca2dd19c..db068ce3 100644 --- a/linux-core/i915_gem.c +++ b/linux-core/i915_gem.c @@ -55,6 +55,9 @@ i915_gem_set_domain(struct drm_gem_object *obj, struct drm_file *file_priv, uint32_t read_domains, uint32_t write_domain); +static int i915_gem_object_get_page_list(struct drm_gem_object *obj); +static void i915_gem_object_free_page_list(struct drm_gem_object *obj); +static int i915_gem_object_wait_rendering(struct drm_gem_object *obj); static void i915_gem_clflush_object(struct drm_gem_object *obj); @@ -128,6 +131,7 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, { struct drm_i915_gem_pread *args = data; struct drm_gem_object *obj; + struct drm_i915_gem_object *obj_priv; ssize_t read; loff_t offset; int ret; @@ -135,15 +139,52 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data, obj = drm_gem_object_lookup(dev, file_priv, args->handle); if (obj == NULL) return -EINVAL; + obj_priv = obj->driver_private; - mutex_lock(&dev->struct_mutex); - ret = i915_gem_set_domain(obj, file_priv, - I915_GEM_DOMAIN_CPU, 0); - if (ret) { + /* Bounds check source. + * + * XXX: This could use review for overflow issues... + */ + if (args->offset > obj->size || args->size > obj->size || + args->offset + args->size > obj->size) { drm_gem_object_unreference(obj); - mutex_unlock(&dev->struct_mutex); - return ret; + return -EFAULT; + } + + mutex_lock(&dev->struct_mutex); + + /* Do a partial equivalent of i915_gem_set_domain(CPU, 0), as + * we don't want to clflush whole objects to read a portion of them. + * + * The side effect of doing this is that repeated preads of the same + * contents would take extra clflush overhead, since we don't track + * flushedness on a page basis. + */ + if (obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)) { + ret = i915_gem_object_wait_rendering(obj); + if (ret) { + drm_gem_object_unreference(obj); + mutex_unlock(&dev->struct_mutex); + return ret; + } } + if ((obj->read_domains & I915_GEM_DOMAIN_CPU) == 0) { + int got_page_list = 0; + int first_page = args->offset / PAGE_SIZE; + int last_page = (args->offset + args->size) / PAGE_SIZE; + + if (obj_priv->page_list == NULL) { + i915_gem_object_get_page_list(obj); + got_page_list = 1; + } + + drm_ttm_cache_flush(&obj_priv->page_list[first_page], + last_page - first_page + 1); + + if (got_page_list) + i915_gem_object_free_page_list(obj); + } + offset = args->offset; read = vfs_read(obj->filp, (char __user *)(uintptr_t)args->data_ptr, @@ -329,8 +370,10 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data, * XXX: This could use review for overflow issues... */ if (args->offset > obj->size || args->size > obj->size || - args->offset + args->size > obj->size) + args->offset + args->size > obj->size) { + drm_gem_object_unreference(obj); return -EFAULT; + } /* We can only do the GTT pwrite on untiled buffers, as otherwise * it would end up going through the fenced access, and we'll get |