summaryrefslogtreecommitdiff
path: root/linux-core
diff options
context:
space:
mode:
authorKeith Packard <keithp@keithp.com>2008-06-23 00:53:53 -0700
committerKeith Packard <keithp@keithp.com>2008-06-24 09:52:05 -0700
commit71d975072cf57507385bdf8e0bf4af4c23b1fceb (patch)
treec1a7dc2913e2c3610c5d4d3e1fb68eff088a51c3 /linux-core
parenta369bf0e575697308690f532576caf652e42b4cb (diff)
[intel-gem] pwrite through GTT
Pin/copy_from_user/unpin through the GTT to eliminate clflush costs. Benchmarks say this helps quite a bit.
Diffstat (limited to 'linux-core')
-rw-r--r--linux-core/i915_gem.c158
1 files changed, 125 insertions, 33 deletions
diff --git a/linux-core/i915_gem.c b/linux-core/i915_gem.c
index 4361b060..4dfbd2a1 100644
--- a/linux-core/i915_gem.c
+++ b/linux-core/i915_gem.c
@@ -36,7 +36,14 @@
#define WATCH_LRU 0
#define WATCH_RELOC 0
#define WATCH_INACTIVE 0
+#define WATCH_PWRITE 0
+#if WATCH_BUF | WATCH_EXEC | WATCH_PWRITE
+static void
+i915_gem_dump_object(struct drm_gem_object *obj, int len,
+ const char *where, uint32_t mark);
+#endif
+
static int
i915_gem_object_set_domain(struct drm_gem_object *obj,
uint32_t read_domains,
@@ -154,6 +161,8 @@ i915_gem_pread_ioctl(struct drm_device *dev, void *data,
return 0;
}
+#include "drm_compat.h"
+
/**
* Writes data to the object referenced by handle.
*
@@ -165,41 +174,121 @@ i915_gem_pwrite_ioctl(struct drm_device *dev, void *data,
{
struct drm_i915_gem_pwrite *args = data;
struct drm_gem_object *obj;
- ssize_t written;
+ struct drm_i915_gem_object *obj_priv;
+ ssize_t remain;
loff_t offset;
- int ret;
+ char __user *user_data;
+ char *vaddr;
+ int i, o, l;
+ int ret = 0;
+ unsigned long pfn;
+ unsigned long unwritten;
obj = drm_gem_object_lookup(dev, file_priv, args->handle);
if (obj == NULL)
return -EINVAL;
+ /** Bounds check destination.
+ *
+ * XXX: This could use review for overflow issues...
+ */
+ if (args->offset > obj->size || args->size > obj->size ||
+ args->offset + args->size > obj->size)
+ return -EFAULT;
+
+ user_data = (char __user *) (uintptr_t) args->data_ptr;
+ remain = args->size;
+ if (!access_ok(VERIFY_READ, user_data, remain))
+ return -EFAULT;
+
+
mutex_lock(&dev->struct_mutex);
- ret = i915_gem_set_domain(obj, file_priv,
- I915_GEM_DOMAIN_CPU, I915_GEM_DOMAIN_CPU);
+ ret = i915_gem_object_pin(obj, 0);
if (ret) {
drm_gem_object_unreference(obj);
mutex_unlock(&dev->struct_mutex);
return ret;
}
- offset = args->offset;
+ ret = i915_gem_set_domain(obj, file_priv,
+ I915_GEM_DOMAIN_GTT, I915_GEM_DOMAIN_GTT);
+ if (ret)
+ goto fail;
+
+ obj_priv = obj->driver_private;
+ offset = obj_priv->gtt_offset + args->offset;
+ obj_priv->dirty = 1;
+
+ while (remain > 0) {
+
+ /** Operation in this page
+ *
+ * i = page number
+ * o = offset within page
+ * l = bytes to copy
+ */
+ i = offset >> PAGE_SHIFT;
+ o = offset & (PAGE_SIZE-1);
+ l = remain;
+ if ((o + l) > PAGE_SIZE)
+ l = PAGE_SIZE - o;
+
+ pfn = (dev->agp->base >> PAGE_SHIFT) + i;
+
+#ifdef DRM_KMAP_ATOMIC_PROT_PFN
+ /* kmap_atomic can't map IO pages on non-HIGHMEM kernels
+ */
+ vaddr = kmap_atomic_prot_pfn(pfn, KM_USER0,
+ __pgprot(__PAGE_KERNEL));
+#if WATCH_PWRITE
+ DRM_INFO("pwrite i %d o %d l %d pfn %ld vaddr %p\n",
+ i, o, l, pfn, vaddr);
+#endif
+ unwritten = __copy_from_user_inatomic_nocache(vaddr + o, user_data, l);
+ kunmap_atomic(vaddr, KM_USER0);
- written = vfs_write(obj->filp,
- (char __user *)(uintptr_t) args->data_ptr,
- args->size, &offset);
+ if (unwritten)
+#endif
+ {
+ vaddr = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE);
+#if WATCH_PWRITE
+ DRM_INFO("pwrite slow i %d o %d l %d pfn %ld vaddr %p\n",
+ i, o, l, pfn, vaddr);
+#endif
+ if (vaddr == NULL) {
+ ret = -EFAULT;
+ goto fail;
+ }
+ unwritten = __copy_from_user(vaddr + o, user_data, l);
+#if WATCH_PWRITE
+ DRM_INFO("unwritten %ld\n", unwritten);
+#endif
+ iounmap(vaddr);
+ if (unwritten) {
+ ret = -EFAULT;
+ goto fail;
+ }
+ }
- if (written != args->size) {
- drm_gem_object_unreference(obj);
- mutex_unlock(&dev->struct_mutex);
- if (written < 0)
- return written;
- else
- return -EINVAL;
+ remain -= l;
+ user_data += l;
+ offset += l;
}
+#if WATCH_PWRITE && 1
+ i915_gem_clflush_object(obj);
+ i915_gem_dump_object(obj, args->offset + args->size, __func__, ~0);
+ i915_gem_clflush_object(obj);
+#endif
+fail:
+ i915_gem_object_unpin (obj);
drm_gem_object_unreference(obj);
mutex_unlock(&dev->struct_mutex);
- return 0;
+#if WATCH_PWRITE
+ if (ret)
+ DRM_INFO("pwrite failed %d\n", ret);
+#endif
+ return ret;
}
/**
@@ -361,7 +450,7 @@ i915_verify_inactive(struct drm_device *dev, char *file, int line)
list_for_each_entry(obj_priv, &dev_priv->mm.inactive_list, list) {
obj = obj_priv->obj;
- if (obj_priv->pin_count || obj_priv->active || (obj->write_domain & ~I915_GEM_DOMAIN_CPU))
+ if (obj_priv->pin_count || obj_priv->active || (obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT))
DRM_ERROR("inactive %p (p %d a %d w %x) %s:%d\n",
obj,
obj_priv->pin_count, obj_priv->active, obj->write_domain, file, line);
@@ -644,7 +733,7 @@ i915_gem_flush(struct drm_device *dev,
if (flush_domains & I915_GEM_DOMAIN_CPU)
drm_agp_chipset_flush(dev);
- if ((invalidate_domains|flush_domains) & ~I915_GEM_DOMAIN_CPU) {
+ if ((invalidate_domains|flush_domains) & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)) {
/*
* read/write caches:
*
@@ -712,7 +801,7 @@ i915_gem_object_wait_rendering(struct drm_gem_object *obj)
/* If there are writes queued to the buffer, flush and
* create a new seqno to wait for.
*/
- if (obj->write_domain & ~(I915_GEM_DOMAIN_CPU)) {
+ if (obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)) {
uint32_t write_domain = obj->write_domain;
#if WATCH_BUF
DRM_INFO("%s: flushing object %p from write domain %08x\n",
@@ -798,11 +887,13 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
i915_gem_object_free_page_list(obj);
- atomic_dec(&dev->gtt_count);
- atomic_sub(obj->size, &dev->gtt_memory);
-
- drm_memrange_put_block(obj_priv->gtt_space);
- obj_priv->gtt_space = NULL;
+ if (obj_priv->gtt_space) {
+ atomic_dec(&dev->gtt_count);
+ atomic_sub(obj->size, &dev->gtt_memory);
+
+ drm_memrange_put_block(obj_priv->gtt_space);
+ obj_priv->gtt_space = NULL;
+ }
/* Remove ourselves from the LRU list if present. */
if (!list_empty(&obj_priv->list))
@@ -811,7 +902,7 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
return 0;
}
-#if WATCH_BUF | WATCH_EXEC
+#if WATCH_BUF | WATCH_EXEC | WATCH_PWRITE
static void
i915_gem_dump_page(struct page *page, uint32_t start, uint32_t end,
uint32_t bias, uint32_t mark)
@@ -1105,8 +1196,8 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
* wasn't in the GTT, there shouldn't be any way it could have been in
* a GPU cache
*/
- BUG_ON(obj->read_domains & ~I915_GEM_DOMAIN_CPU);
- BUG_ON(obj->write_domain & ~I915_GEM_DOMAIN_CPU);
+ BUG_ON(obj->read_domains & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
+ BUG_ON(obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
return 0;
}
@@ -1289,7 +1380,7 @@ i915_gem_object_set_domain(struct drm_gem_object *obj,
* flushed before the cpu cache is invalidated
*/
if ((invalidate_domains & I915_GEM_DOMAIN_CPU) &&
- (flush_domains & ~I915_GEM_DOMAIN_CPU)) {
+ (flush_domains & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT))) {
ret = i915_gem_object_wait_rendering(obj);
if (ret)
return ret;
@@ -1911,7 +2002,7 @@ i915_gem_object_pin(struct drm_gem_object *obj, uint32_t alignment)
if (obj_priv->pin_count == 1) {
atomic_inc(&dev->pin_count);
atomic_add(obj->size, &dev->pin_memory);
- if (!obj_priv->active && (obj->write_domain & ~I915_GEM_DOMAIN_CPU) == 0 &&
+ if (!obj_priv->active && (obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)) == 0 &&
!list_empty(&obj_priv->list))
list_del_init(&obj_priv->list);
}
@@ -1937,7 +2028,7 @@ i915_gem_object_unpin(struct drm_gem_object *obj)
* the inactive list
*/
if (obj_priv->pin_count == 0) {
- if (!obj_priv->active && (obj->write_domain & ~I915_GEM_DOMAIN_CPU) == 0)
+ if (!obj_priv->active && (obj->write_domain & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT)) == 0)
list_move_tail(&obj_priv->list,
&dev_priv->mm.inactive_list);
atomic_dec(&dev->pin_count);
@@ -2096,7 +2187,7 @@ i915_gem_set_domain(struct drm_gem_object *obj,
return ret;
flush_domains = i915_gem_dev_set_domain(obj->dev);
- if (flush_domains & ~I915_GEM_DOMAIN_CPU)
+ if (flush_domains & ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT))
(void) i915_add_request(dev, flush_domains);
return 0;
@@ -2154,8 +2245,9 @@ i915_gem_idle(struct drm_device *dev)
/* Flush the GPU along with all non-CPU write domains
*/
- i915_gem_flush(dev, ~I915_GEM_DOMAIN_CPU, ~I915_GEM_DOMAIN_CPU);
- seqno = i915_add_request(dev, ~I915_GEM_DOMAIN_CPU);
+ i915_gem_flush(dev, ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT),
+ ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
+ seqno = i915_add_request(dev, ~(I915_GEM_DOMAIN_CPU|I915_GEM_DOMAIN_GTT));
if (seqno == 0) {
mutex_unlock(&dev->struct_mutex);