summaryrefslogtreecommitdiff
path: root/linux-core
diff options
context:
space:
mode:
Diffstat (limited to 'linux-core')
-rw-r--r--linux-core/i915_gem.c123
1 files changed, 118 insertions, 5 deletions
diff --git a/linux-core/i915_gem.c b/linux-core/i915_gem.c
index 60a8fa58..b3b2dbce 100644
--- a/linux-core/i915_gem.c
+++ b/linux-core/i915_gem.c
@@ -746,12 +746,12 @@ i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment)
return -ENOMEM;
}
- /* When we have just bound an object, we have no valid read
- * caches on it, regardless of where it was before. We also need
- * an MI_FLUSH to occur so that the render and sampler TLBs
- * get flushed and pick up our binding change above.
+ /* Assert that the object is not currently in any GPU domain. As it
+ * wasn't in the GTT, there shouldn't be any way it could have been in
+ * a GPU cache
*/
- obj->read_domains = 0;
+ BUG_ON(obj->read_domains & ~DRM_GEM_DOMAIN_CPU);
+ BUG_ON(obj->write_domain & ~DRM_GEM_DOMAIN_CPU);
return 0;
}
@@ -775,6 +775,112 @@ i915_gem_clflush_object(struct drm_gem_object *obj)
* Set the next domain for the specified object. This
* may not actually perform the necessary flushing/invaliding though,
* as that may want to be batched with other set_domain operations
+ *
+ * This is (we hope) the only really tricky part of gem. The goal
+ * is fairly simple -- track which caches hold bits of the object
+ * and make sure they remain coherent. A few concrete examples may
+ * help to explain how it works. For shorthand, we use the notation
+ * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the
+ * a pair of read and write domain masks.
+ *
+ * Case 1: the batch buffer
+ *
+ * 1. Allocated
+ * 2. Written by CPU
+ * 3. Mapped to GTT
+ * 4. Read by GPU
+ * 5. Unmapped from GTT
+ * 6. Freed
+ *
+ * Let's take these a step at a time
+ *
+ * 1. Allocated
+ * Pages allocated from the kernel may still have
+ * cache contents, so we set them to (CPU, CPU) always.
+ * 2. Written by CPU (using pwrite)
+ * The pwrite function calls set_domain (CPU, CPU) and
+ * this function does nothing (as nothing changes)
+ * 3. Mapped by GTT
+ * This function asserts that the object is not
+ * currently in any GPU-based read or write domains
+ * 4. Read by GPU
+ * i915_gem_execbuffer calls set_domain (COMMAND, 0).
+ * As write_domain is zero, this function adds in the
+ * current read domains (CPU+COMMAND, 0).
+ * flush_domains is set to CPU.
+ * invalidate_domains is set to COMMAND
+ * clflush is run to get data out of the CPU caches
+ * then i915_dev_set_domain calls i915_gem_flush to
+ * emit an MI_FLUSH and drm_agp_chipset_flush
+ * 5. Unmapped from GTT
+ * i915_gem_object_unbind calls set_domain (CPU, CPU)
+ * flush_domains and invalidate_domains end up both zero
+ * so no flushing/invalidating happens
+ * 6. Freed
+ * yay, done
+ *
+ * Case 2: The shared render buffer
+ *
+ * 1. Allocated
+ * 2. Mapped to GTT
+ * 3. Read/written by GPU
+ * 4. set_domain to (CPU,CPU)
+ * 5. Read/written by CPU
+ * 6. Read/written by GPU
+ *
+ * 1. Allocated
+ * Same as last example, (CPU, CPU)
+ * 2. Mapped to GTT
+ * Nothing changes (assertions find that it is not in the GPU)
+ * 3. Read/written by GPU
+ * execbuffer calls set_domain (RENDER, RENDER)
+ * flush_domains gets CPU
+ * invalidate_domains gets GPU
+ * clflush (obj)
+ * MI_FLUSH and drm_agp_chipset_flush
+ * 4. set_domain (CPU, CPU)
+ * flush_domains gets GPU
+ * invalidate_domains gets CPU
+ * wait_rendering (obj) to make sure all drawing is complete.
+ * This will include an MI_FLUSH to get the data from GPU
+ * to memory
+ * clflush (obj) to invalidate the CPU cache
+ * Another MI_FLUSH in i915_gem_flush (eliminate this somehow?)
+ * 5. Read/written by CPU
+ * cache lines are loaded and dirtied
+ * 6. Read written by GPU
+ * Same as last GPU access
+ *
+ * Case 3: The constant buffer
+ *
+ * 1. Allocated
+ * 2. Written by CPU
+ * 3. Read by GPU
+ * 4. Updated (written) by CPU again
+ * 5. Read by GPU
+ *
+ * 1. Allocated
+ * (CPU, CPU)
+ * 2. Written by CPU
+ * (CPU, CPU)
+ * 3. Read by GPU
+ * (CPU+RENDER, 0)
+ * flush_domains = CPU
+ * invalidate_domains = RENDER
+ * clflush (obj)
+ * MI_FLUSH
+ * drm_agp_chipset_flush
+ * 4. Updated (written) by CPU again
+ * (CPU, CPU)
+ * flush_domains = 0 (no previous write domain)
+ * invalidate_domains = 0 (no new read domains)
+ * 5. Read by GPU
+ * (CPU+RENDER, 0)
+ * flush_domains = CPU
+ * invalidate_domains = RENDER
+ * clflush (obj)
+ * MI_FLUSH
+ * drm_agp_chipset_flush
*/
static void
i915_gem_object_set_domain(struct drm_gem_object *obj,
@@ -790,6 +896,13 @@ i915_gem_object_set_domain(struct drm_gem_object *obj,
__func__, obj, read_domains, write_domain);
#endif
/*
+ * If the object isn't moving to a new write domain,
+ * let the object stay in multiple read domains
+ */
+ if (write_domain == 0)
+ read_domains |= obj->read_domains;
+
+ /*
* Flush the current write domain if
* the new read domains don't match. Invalidate
* any read domains which differ from the old