From 57b9a54eb668477407c8be54c041d7a9f92c1f51 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Tue, 4 Dec 2007 12:22:30 -0800 Subject: Allow relocation to be skipped when buffers don't move. One of the costs of superioctl has been the need to perform relocations inside the kernel. The cost of mapping the buffers to the CPU and writing data is fairly high, especially if those buffers have been mapped and read by the GPU. If we assume that buffers don't move around very often, we can have the client compute the relocations itself using the previous GPU address. When that object doesn't move, the kernel can skip computing and writing the updated data. Here's a patch which adds a new field to struct drm_bo_info_req called 'presumed_offset', and a new DRM_BO_HINT_PRESUMED_OFFSET that is set when this field has been filled in by the client. There are two separate optimizations performed when the presumed_offset is correct: 1. i915_exec_reloc checks to see if all previous buffer offsets were guessed correctly. If so, there's no need for it to look at *any* of the relocations for a buffer. When this happens, it skips the whole relocation process, simply returning success. 2. i915_apply_reloc checks to see if the target buffer offset was guessed correctly. If so, it skips mapping the relocatee, computing the relocation and writing the value. If no relocations are needed, the relocatee should never be mapped to the CPU, and so the kernel shouldn't need to wait for any fences to pass. --- shared-core/drm.h | 2 ++ shared-core/i915_dma.c | 62 ++++++++++++++++++++++++++++++++++++++------------ 2 files changed, 50 insertions(+), 14 deletions(-) (limited to 'shared-core') diff --git a/shared-core/drm.h b/shared-core/drm.h index ec07b895..70a25b24 100644 --- a/shared-core/drm.h +++ b/shared-core/drm.h @@ -753,6 +753,7 @@ struct drm_fence_arg { /* Don't place this buffer on the unfenced list.*/ #define DRM_BO_HINT_DONT_FENCE 0x00000004 #define DRM_BO_HINT_WAIT_LAZY 0x00000008 +#define DRM_BO_HINT_PRESUMED_OFFSET 0x00000010 #define DRM_BO_INIT_MAGIC 0xfe769812 #define DRM_BO_INIT_MAJOR 1 @@ -769,6 +770,7 @@ struct drm_bo_info_req { unsigned int desired_tile_stride; unsigned int tile_info; unsigned int pad64; + uint64_t presumed_offset; }; struct drm_bo_create_req { diff --git a/shared-core/i915_dma.c b/shared-core/i915_dma.c index 42114beb..090ac80a 100644 --- a/shared-core/i915_dma.c +++ b/shared-core/i915_dma.c @@ -712,15 +712,20 @@ struct i915_relocatee_info { int is_iomem; }; -static void i915_dereference_buffers_locked(struct drm_buffer_object **buffers, +struct drm_i915_validate_buffer { + struct drm_buffer_object *buffer; + int presumed_offset_correct; +}; + +static void i915_dereference_buffers_locked(struct drm_i915_validate_buffer *buffers, unsigned num_buffers) { while (num_buffers--) - drm_bo_usage_deref_locked(&buffers[num_buffers]); + drm_bo_usage_deref_locked(&buffers[num_buffers].buffer); } int i915_apply_reloc(struct drm_file *file_priv, int num_buffers, - struct drm_buffer_object **buffers, + struct drm_i915_validate_buffer *buffers, struct i915_relocatee_info *relocatee, uint32_t *reloc) { @@ -734,6 +739,13 @@ int i915_apply_reloc(struct drm_file *file_priv, int num_buffers, return -EINVAL; } + /* + * Short-circuit relocations that were correctly + * guessed by the client + */ + if (buffers[reloc[2]].presumed_offset_correct) + return 0; + new_cmd_offset = reloc[0]; if (!relocatee->data_page || !drm_bo_same_page(relocatee->offset, new_cmd_offset)) { @@ -751,7 +763,7 @@ int i915_apply_reloc(struct drm_file *file_priv, int num_buffers, relocatee->page_offset = (relocatee->offset & PAGE_MASK); } - val = buffers[reloc[2]]->offset; + val = buffers[reloc[2]].buffer->offset; index = (reloc[0] - relocatee->page_offset) >> 2; /* add in validate */ @@ -765,7 +777,7 @@ int i915_process_relocs(struct drm_file *file_priv, uint32_t buf_handle, uint32_t *reloc_buf_handle, struct i915_relocatee_info *relocatee, - struct drm_buffer_object **buffers, + struct drm_i915_validate_buffer *buffers, uint32_t num_buffers) { struct drm_device *dev = file_priv->head->dev; @@ -851,12 +863,25 @@ out: static int i915_exec_reloc(struct drm_file *file_priv, drm_handle_t buf_handle, drm_handle_t buf_reloc_handle, - struct drm_buffer_object **buffers, + struct drm_i915_validate_buffer *buffers, uint32_t buf_count) { struct drm_device *dev = file_priv->head->dev; struct i915_relocatee_info relocatee; int ret = 0; + int b; + + /* + * Short circuit relocations when all previous + * buffers offsets were correctly guessed by + * the client + */ + for (b = 0; b < buf_count; b++) + if (!buffers[b].presumed_offset_correct) + break; + + if (b == buf_count) + return 0; memset(&relocatee, 0, sizeof(relocatee)); @@ -890,7 +915,7 @@ out_err: */ int i915_validate_buffer_list(struct drm_file *file_priv, unsigned int fence_class, uint64_t data, - struct drm_buffer_object **buffers, + struct drm_i915_validate_buffer *buffers, uint32_t *num_buffers) { struct drm_i915_op_arg arg; @@ -910,7 +935,8 @@ int i915_validate_buffer_list(struct drm_file *file_priv, goto out_err; } - buffers[buf_count] = NULL; + buffers[buf_count].buffer = NULL; + buffers[buf_count].presumed_offset_correct = 0; if (copy_from_user(&arg, (void __user *)(unsigned long)data, sizeof(arg))) { ret = -EFAULT; @@ -920,7 +946,7 @@ int i915_validate_buffer_list(struct drm_file *file_priv, if (arg.handled) { data = arg.next; mutex_lock(&dev->struct_mutex); - buffers[buf_count] = drm_lookup_buffer_object(file_priv, req->arg_handle, 1); + buffers[buf_count].buffer = drm_lookup_buffer_object(file_priv, req->arg_handle, 1); mutex_unlock(&dev->struct_mutex); buf_count++; continue; @@ -951,13 +977,21 @@ int i915_validate_buffer_list(struct drm_file *file_priv, req->bo_req.hint, 0, &rep.bo_info, - &buffers[buf_count]); + &buffers[buf_count].buffer); if (rep.ret) { DRM_ERROR("error on handle validate %d\n", rep.ret); goto out_err; } - + /* + * If the user provided a presumed offset hint, check whether + * the buffer is in the same place, if so, relocations relative to + * this buffer need not be performed + */ + if ((req->bo_req.hint & DRM_BO_HINT_PRESUMED_OFFSET) && + buffers[buf_count].buffer->offset == req->bo_req.presumed_offset) { + buffers[buf_count].presumed_offset_correct = 1; + } next = arg.next; arg.handled = 1; @@ -991,7 +1025,7 @@ static int i915_execbuffer(struct drm_device *dev, void *data, struct drm_fence_arg *fence_arg = &exec_buf->fence_arg; int num_buffers; int ret; - struct drm_buffer_object **buffers; + struct drm_i915_validate_buffer *buffers; struct drm_fence_object *fence; if (!dev_priv->allow_batchbuffer) { @@ -1026,7 +1060,7 @@ static int i915_execbuffer(struct drm_device *dev, void *data, num_buffers = exec_buf->num_buffers; - buffers = drm_calloc(num_buffers, sizeof(struct drm_buffer_object *), DRM_MEM_DRIVER); + buffers = drm_calloc(num_buffers, sizeof(struct drm_i915_validate_buffer), DRM_MEM_DRIVER); if (!buffers) { drm_bo_read_unlock(&dev->bm.bm_lock); mutex_unlock(&dev_priv->cmdbuf_mutex); @@ -1044,7 +1078,7 @@ static int i915_execbuffer(struct drm_device *dev, void *data, drm_agp_chipset_flush(dev); /* submit buffer */ - batch->start = buffers[num_buffers-1]->offset; + batch->start = buffers[num_buffers-1].buffer->offset; DRM_DEBUG("i915 exec batchbuffer, start %x used %d cliprects %d\n", batch->start, batch->used, batch->num_cliprects); -- cgit v1.2.3 From 9ee511d786b1a87944f043c1a16057e8dfc48668 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Tue, 4 Dec 2007 20:54:53 -0800 Subject: Bump driver minor for relocation optimzations --- shared-core/i915_drv.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'shared-core') diff --git a/shared-core/i915_drv.h b/shared-core/i915_drv.h index cb336659..bdc78410 100644 --- a/shared-core/i915_drv.h +++ b/shared-core/i915_drv.h @@ -57,10 +57,11 @@ * 1.9: Usable page flipping and triple buffering * 1.10: Plane/pipe disentangling * 1.11: TTM superioctl + * 1.12: TTM relocation optimization */ #define DRIVER_MAJOR 1 #if defined(I915_HAVE_FENCE) && defined(I915_HAVE_BUFFER) -#define DRIVER_MINOR 11 +#define DRIVER_MINOR 12 #else #define DRIVER_MINOR 6 #endif -- cgit v1.2.3 From 4ec8f58d042d7fe0dab570fed35a438759645ca8 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Thu, 6 Dec 2007 15:12:21 -0800 Subject: i915: wait for buffer idle before writing relocations When writing a relocation entry, make sure the target buffer is idle, otherwise the GPU may see inconsistent data. --- shared-core/i915_dma.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'shared-core') diff --git a/shared-core/i915_dma.c b/shared-core/i915_dma.c index 090ac80a..18d2482e 100644 --- a/shared-core/i915_dma.c +++ b/shared-core/i915_dma.c @@ -751,6 +751,13 @@ int i915_apply_reloc(struct drm_file *file_priv, int num_buffers, !drm_bo_same_page(relocatee->offset, new_cmd_offset)) { drm_bo_kunmap(&relocatee->kmap); relocatee->offset = new_cmd_offset; + mutex_lock (&relocatee->buf->mutex); + ret = drm_bo_wait (relocatee->buf, 0, 0, FALSE); + mutex_unlock (&relocatee->buf->mutex); + if (ret) { + DRM_ERROR("Could not wait for buffer to apply relocs\n %08lx", new_cmd_offset); + return ret; + } ret = drm_bo_kmap(relocatee->buf, new_cmd_offset >> PAGE_SHIFT, 1, &relocatee->kmap); if (ret) { -- cgit v1.2.3 From 7dcaf0cdbb57dcf85aa8798736948c280d3966b2 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Tue, 11 Dec 2007 20:21:23 -0800 Subject: Make relocation validate client computed values when debugging --- shared-core/i915_dma.c | 30 +++++++++++++++++++++++------- 1 file changed, 23 insertions(+), 7 deletions(-) (limited to 'shared-core') diff --git a/shared-core/i915_dma.c b/shared-core/i915_dma.c index 18d2482e..80416726 100644 --- a/shared-core/i915_dma.c +++ b/shared-core/i915_dma.c @@ -702,7 +702,14 @@ static int i915_cmdbuffer(struct drm_device *dev, void *data, return 0; } +#if DRM_DEBUG_CODE +#define DRM_DEBUG_RELOCATION (drm_debug != 0) +#else +#define DRM_DEBUG_RELOCATION 0 +#endif + #ifdef I915_HAVE_BUFFER + struct i915_relocatee_info { struct drm_buffer_object *buf; unsigned long offset; @@ -743,7 +750,7 @@ int i915_apply_reloc(struct drm_file *file_priv, int num_buffers, * Short-circuit relocations that were correctly * guessed by the client */ - if (buffers[reloc[2]].presumed_offset_correct) + if (buffers[reloc[2]].presumed_offset_correct && !DRM_DEBUG_RELOCATION) return 0; new_cmd_offset = reloc[0]; @@ -776,6 +783,13 @@ int i915_apply_reloc(struct drm_file *file_priv, int num_buffers, /* add in validate */ val = val + reloc[1]; + if (DRM_DEBUG_RELOCATION) { + if (buffers[reloc[2]].presumed_offset_correct && + relocatee->data_page[index] != val) { + DRM_DEBUG ("Relocation mismatch source %d target %d buffer %d user %08x kernel %08x\n", + reloc[0], reloc[1], reloc[2], relocatee->data_page[index], val); + } + } relocatee->data_page[index] = val; return 0; } @@ -883,12 +897,14 @@ static int i915_exec_reloc(struct drm_file *file_priv, drm_handle_t buf_handle, * buffers offsets were correctly guessed by * the client */ - for (b = 0; b < buf_count; b++) - if (!buffers[b].presumed_offset_correct) - break; - - if (b == buf_count) - return 0; + if (!DRM_DEBUG_RELOCATION) { + for (b = 0; b < buf_count; b++) + if (!buffers[b].presumed_offset_correct) + break; + + if (b == buf_count) + return 0; + } memset(&relocatee, 0, sizeof(relocatee)); -- cgit v1.2.3