summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeith Packard <keithp@keithp.com>2008-05-06 20:00:23 -0700
committerKeith Packard <keithp@keithp.com>2008-05-06 20:00:23 -0700
commit61253f4f677518537368103799c9510b8b5ad1e3 (patch)
tree8ea2d019cef0e1bdad109b839dc911a4abe3e7c6
parent2b9ef32669acf8197cf7d9b73b851c001db494cd (diff)
[intel-GEM] Add memory domain support.
Memory domains allow the kernel to track which caches to flush and how to move objects before buffer execution.
-rw-r--r--linux-core/drm_gem.c41
-rw-r--r--linux-core/i915_gem.c156
2 files changed, 134 insertions, 63 deletions
diff --git a/linux-core/drm_gem.c b/linux-core/drm_gem.c
index 929c008f..e2272f27 100644
--- a/linux-core/drm_gem.c
+++ b/linux-core/drm_gem.c
@@ -325,6 +325,10 @@ drm_gem_mmap_ioctl(struct drm_device *dev, void *data,
drm_gem_object_unreference(obj);
if (IS_ERR((void *)addr))
return (int) addr;
+
+ /* XXX hack until we have a driver callback to make this work */
+ obj->read_domains = DRM_GEM_DOMAIN_CPU;
+ obj->write_domain = DRM_GEM_DOMAIN_CPU;
args->addr_ptr = (uint64_t) addr;
@@ -540,40 +544,3 @@ drm_gem_object_handle_free (struct kref *kref)
}
EXPORT_SYMBOL(drm_gem_object_handle_free);
-/*
- * Set the next domain for the specified object. This
- * may not actually perform the necessary flushing/invaliding though,
- * as that may want to be batched with other set_domain operations
- */
-int drm_gem_object_set_domain (struct drm_gem_object *obj,
- uint32_t read_domains,
- uint32_t write_domain)
-{
- struct drm_device *dev = obj->dev;
- uint32_t invalidate_domains = 0;
- uint32_t flush_domains = 0;
-
- /*
- * Flush the current write domain if
- * the new read domains don't match. Invalidate
- * any read domains which differ from the old
- * write domain
- */
- if (obj->write_domain && obj->write_domain != read_domains)
- {
- flush_domains |= obj->write_domain;
- invalidate_domains |= read_domains & ~obj->write_domain;
- }
- /*
- * Invalidate any read caches which may have
- * stale data. That is, any new read domains.
- */
- invalidate_domains |= read_domains & ~obj->read_domains;
- obj->write_domain = write_domain;
- obj->read_domain = read_domains;
- if ((flush_domains | invalidate_domains) & DRM_GEM_DOMAIN_CPU)
- drm_gem_object_clflush (obj);
- dev->invalidate_domains |= invalidate_domains & ~DRM_GEM_DOMAIN_CPU;
- dev->flush_domains |= flush_domains & ~DRM_GEM_DOMAIN_CPU;
-}
-EXPORT_SYMBOL(drm_gem_object_set_domain);
diff --git a/linux-core/i915_gem.c b/linux-core/i915_gem.c
index d630b0b2..911f9aa8 100644
--- a/linux-core/i915_gem.c
+++ b/linux-core/i915_gem.c
@@ -30,8 +30,8 @@
#include "i915_drm.h"
#include "i915_drv.h"
-#define WATCH_BUF 0
-#define WATCH_EXEC 0
+#define WATCH_BUF 1
+#define WATCH_EXEC 1
int
i915_gem_init_ioctl(struct drm_device *dev, void *data,
@@ -73,9 +73,33 @@ i915_gem_object_free_page_list(struct drm_gem_object *obj)
obj_priv->page_list = NULL;
}
+static void
+i915_gem_flush(struct drm_device *dev, uint32_t domains)
+{
+ drm_i915_private_t *dev_priv = dev->dev_private;
+ uint32_t cmd;
+ RING_LOCALS;
+
+#if WATCH_EXEC
+ DRM_INFO ("%s: flush %08x\n", __FUNCTION__, domains);
+#endif
+ cmd = CMD_MI_FLUSH | MI_NO_WRITE_FLUSH;
+ if (domains & DRM_GEM_DOMAIN_I915_RENDER)
+ cmd &= ~MI_NO_WRITE_FLUSH;
+ if (domains & DRM_GEM_DOMAIN_I915_SAMPLER)
+ cmd |= MI_READ_FLUSH;
+ if (domains & DRM_GEM_DOMAIN_I915_INSTRUCTION)
+ cmd |= MI_EXE_FLUSH;
+
+ BEGIN_LP_RING(2);
+ OUT_RING(cmd);
+ OUT_RING(0); /* noop */
+ ADVANCE_LP_RING();
+}
+
/**
* Ensures that all rendering to the object has completed and the object is
- * safe to unbind from the GTT.
+ * safe to unbind from the GTT or access from the CPU.
*/
static int
i915_gem_object_wait_rendering(struct drm_gem_object *obj)
@@ -84,10 +108,27 @@ i915_gem_object_wait_rendering(struct drm_gem_object *obj)
struct drm_i915_gem_object *obj_priv = obj->driver_private;
int ret;
+ /* If there are writes queued to the buffer, flush and
+ * create a new cookie to wait for.
+ */
+ if (obj->write_domain & ~(DRM_GEM_DOMAIN_CPU))
+ {
+#if WATCH_BUF
+ DRM_INFO ("%s: flushing object %p from write domain %08x\n",
+ __FUNCTION__, obj, obj->write_domain);
+#endif
+ i915_gem_flush (dev, obj->write_domain);
+ obj->write_domain = 0;
+ obj_priv->last_rendering_cookie = i915_emit_irq (dev);
+ }
/* If there is rendering queued on the buffer being evicted, wait for
* it.
*/
if (obj_priv->last_rendering_cookie != 0) {
+#if WATCH_BUF
+ DRM_INFO ("%s: object %p wait for cookie %08x\n",
+ __FUNCTION__, obj, obj_priv->last_rendering_cookie);
+#endif
ret = i915_wait_irq(dev, obj_priv->last_rendering_cookie);
if (ret != 0)
return ret;
@@ -125,6 +166,7 @@ i915_gem_object_unbind(struct drm_gem_object *obj)
drm_memrange_put_block(obj_priv->gtt_space);
obj_priv->gtt_space = NULL;
list_del_init(&obj_priv->gtt_lru_entry);
+ drm_gem_object_unreference (obj);
}
#if 0
@@ -277,10 +319,6 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
unlock_page (obj_priv->page_list[i]);
}
- drm_ttm_cache_flush (obj_priv->page_list, page_count);
- DRM_MEMORYBARRIER();
- drm_agp_chipset_flush(dev);
-
/* Create an AGP memory structure pointing at our pages, and bind it
* into the GTT.
*/
@@ -298,6 +336,73 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
return 0;
}
+static void
+i915_gem_clflush_object (struct drm_gem_object *obj)
+{
+ struct drm_device *dev = obj->dev;
+ struct drm_i915_gem_object *obj_priv = obj->driver_private;
+
+ drm_ttm_cache_flush (obj_priv->page_list, obj->size / PAGE_SIZE);
+ drm_agp_chipset_flush(dev);
+}
+
+/*
+ * Set the next domain for the specified object. This
+ * may not actually perform the necessary flushing/invaliding though,
+ * as that may want to be batched with other set_domain operations
+ */
+static void
+i915_gem_object_set_domain (struct drm_gem_object *obj,
+ uint32_t read_domains,
+ uint32_t write_domain)
+{
+ struct drm_device *dev = obj->dev;
+ uint32_t invalidate_domains = 0;
+ uint32_t flush_domains = 0;
+
+#if WATCH_BUF
+ DRM_INFO ("%s: object %p read %08x write %08x\n",
+ __FUNCTION__, obj, read_domains, write_domain);
+#endif
+ /*
+ * Flush the current write domain if
+ * the new read domains don't match. Invalidate
+ * any read domains which differ from the old
+ * write domain
+ */
+ if (obj->write_domain && obj->write_domain != read_domains)
+ {
+ flush_domains |= obj->write_domain;
+ invalidate_domains |= read_domains & ~obj->write_domain;
+ }
+ /*
+ * Invalidate any read caches which may have
+ * stale data. That is, any new read domains.
+ */
+ invalidate_domains |= read_domains & ~obj->read_domains;
+ if ((flush_domains | invalidate_domains) & DRM_GEM_DOMAIN_CPU)
+ {
+#if WATCH_BUF
+ DRM_INFO ("%s: CPU domain flush %08x invalidate %08x\n",
+ __FUNCTION__, flush_domains, invalidate_domains);
+#endif
+ /*
+ * If we're invaliding the CPU cache and flushing a GPU cache,
+ * then pause for rendering so that the GPU caches will be
+ * flushed before the cpu cache is invalidated
+ */
+ if ((invalidate_domains & DRM_GEM_DOMAIN_CPU) &&
+ (flush_domains & ~DRM_GEM_DOMAIN_CPU))
+ i915_gem_object_wait_rendering (obj);
+ i915_gem_clflush_object (obj);
+ }
+
+ obj->write_domain = write_domain;
+ obj->read_domains = read_domains;
+ dev->invalidate_domains |= invalidate_domains & ~DRM_GEM_DOMAIN_CPU;
+ dev->flush_domains |= flush_domains & ~DRM_GEM_DOMAIN_CPU;
+}
+
static int
i915_gem_reloc_and_validate_object(struct drm_gem_object *obj,
struct drm_file *file_priv,
@@ -318,12 +423,17 @@ i915_gem_reloc_and_validate_object(struct drm_gem_object *obj,
if (obj_priv->gtt_space == NULL)
return -ENOMEM;
}
+
+ /* Do domain migration */
+ i915_gem_object_set_domain (obj, entry->read_domains, entry->write_domain);
+
entry->buffer_offset = obj_priv->gtt_offset;
if (obj_priv->pin_count == 0) {
/* Move our buffer to the head of the LRU. */
list_del_init(&obj_priv->gtt_lru_entry);
list_add(&obj_priv->gtt_lru_entry, &dev_priv->mm.gtt_lru);
+ drm_gem_object_reference (obj);
}
relocs = (struct drm_i915_gem_relocation_entry __user *) (uintptr_t) entry->relocs_ptr;
@@ -413,18 +523,6 @@ i915_gem_reloc_and_validate_object(struct drm_gem_object *obj,
return 0;
}
-static void
-i915_gem_flush(struct drm_device *dev)
-{
- drm_i915_private_t *dev_priv = dev->dev_private;
- RING_LOCALS;
-
- BEGIN_LP_RING(2);
- OUT_RING(CMD_MI_FLUSH | MI_READ_FLUSH | MI_EXE_FLUSH);
- OUT_RING(0); /* noop */
- ADVANCE_LP_RING();
-}
-
static int
i915_dispatch_gem_execbuffer (struct drm_device * dev,
struct drm_i915_gem_execbuffer * exec,
@@ -556,6 +654,17 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
}
}
+ if (dev->invalidate_domains | dev->flush_domains)
+ {
+#if WATCH_EXEC
+ DRM_INFO ("%s: invalidate_domains %08x flush_domains %08x\n",
+ __FUNCTION__, dev->invalidate_domains, dev->flush_domains);
+#endif
+ i915_gem_flush (dev, dev->invalidate_domains | dev->flush_domains);
+ dev->invalidate_domains = 0;
+ dev->flush_domains = 0;
+ }
+
exec_offset = validate_list[args->buffer_count - 1].buffer_offset;
/* make sure all previous memory operations have passed */
@@ -575,14 +684,9 @@ i915_gem_execbuffer(struct drm_device *dev, void *data,
goto err;
}
- /* Flush the rendering. We want this flush to go away, which will
- * require intelligent cache management.
- */
- i915_gem_flush(dev);
-
- /* Get a cookie representing the flush of the current buffer, which we
+ /* Get a cookie representing the execution of the current buffer, which we
* can wait on. We would like to mitigate these interrupts, likely by
- * only flushing occasionally (so that we have *some* interrupts
+ * only creating cookies occasionally (so that we have *some* interrupts
* representing completion of buffers that we can wait on when trying
* to clear up gtt space).
*/