From af8e087157ef5034fa12d93202037f87da61355d Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Tue, 20 May 2008 14:03:27 -0700 Subject: [gem] Use a separate sequence number field from classic/ttm This lets us get some qualities we desire, such as using the full 32-bit range (except zero), avoiding DRM_WAIT_ON, and a 1:1 mapping of active sequence numbers to request structs, which will be used soon for throttling and interrupt-driven list cleanup. --- linux-core/i915_gem.c | 123 +++++++++++++++++++++++++++++++++++++++++++++++-- shared-core/i915_dma.c | 4 +- shared-core/i915_drv.h | 45 ++++++++++++++++++ 3 files changed, 168 insertions(+), 4 deletions(-) diff --git a/linux-core/i915_gem.c b/linux-core/i915_gem.c index ee8f1023..3535fae1 100644 --- a/linux-core/i915_gem.c +++ b/linux-core/i915_gem.c @@ -115,6 +115,123 @@ i915_gem_object_move_to_inactive(struct drm_gem_object *obj) } } +/** + * Creates a new sequence number, emitting a write of it to the status page + * plus an interrupt, which will trigger i915_user_interrupt_handler. + * + * Must be called with struct_lock held. + * + * Returned sequence numbers are nonzero on success. + */ +static uint32_t +i915_add_request(struct drm_device *dev) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_i915_gem_request *request; + uint32_t seqno; + RING_LOCALS; + + request = drm_calloc(1, sizeof(*request), DRM_MEM_DRIVER); + if (request == NULL) + return 0; + + /* Grab the seqno we're going to make this request be, and bump the + * next (skipping 0 so it can be the reserved no-seqno value). + */ + seqno = dev_priv->mm.next_gem_seqno; + dev_priv->mm.next_gem_seqno++; + if (dev_priv->mm.next_gem_seqno == 0) + dev_priv->mm.next_gem_seqno++; + + BEGIN_LP_RING(4); + OUT_RING(CMD_STORE_DWORD_IDX); + OUT_RING(I915_GEM_HWS_INDEX << STORE_DWORD_INDEX_SHIFT); + OUT_RING(seqno); + + OUT_RING(GFX_OP_USER_INTERRUPT); + ADVANCE_LP_RING(); + + DRM_DEBUG("%d\n", seqno); + + request->seqno = seqno; + list_add_tail(&request->list, &dev_priv->mm.request_list); + + return seqno; +} + +/** + * Returns true if seq1 is later than seq2. + */ +static int +i915_seqno_passed(uint32_t seq1, uint32_t seq2) +{ + return (int32_t)(seq1 - seq2) >= 0; +} + +static uint32_t +i915_get_gem_seqno(struct drm_device *dev) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + + return READ_HWSP(dev_priv, I915_GEM_HWS_INDEX); +} + +/** + * This function clears the request list as sequence numbers are passed. + */ +void +i915_gem_retire_requests(struct drm_device *dev) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + uint32_t seqno; + + seqno = i915_get_gem_seqno(dev); + + while (!list_empty(&dev_priv->mm.request_list)) { + struct drm_i915_gem_request *request; + uint32_t retiring_seqno; + + request = list_first_entry(&dev_priv->mm.request_list, + struct drm_i915_gem_request, + list); + retiring_seqno = request->seqno; + + if (i915_seqno_passed(seqno, retiring_seqno)) { + list_del(&request->list); + drm_free(request, sizeof(*request), DRM_MEM_DRIVER); + } else + break; + } +} + +/** + * Waits for a sequence number to be signaled, and cleans up the + * request and object lists appropriately for that event. + */ +int +i915_wait_request(struct drm_device *dev, uint32_t seqno) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + int ret = 0; + + BUG_ON(seqno == 0); + + i915_user_irq_on(dev_priv); + ret = wait_event_interruptible(dev_priv->irq_queue, + i915_seqno_passed(i915_get_gem_seqno(dev), + seqno)); + i915_user_irq_off(dev_priv); + + /* Directly dispatch request retiring. While we have the work queue + * to handle this, the waiter on a request often wants an associated + * buffer to have made it to the inactive list, and we would need + * a separate wait queue to handle that. + */ + if (ret == 0) + i915_gem_retire_requests(dev); + + return ret; +} static void i915_gem_flush(struct drm_device *dev, @@ -207,7 +324,7 @@ i915_gem_object_wait_rendering(struct drm_gem_object *obj) obj->write_domain = 0; i915_gem_object_move_to_active(obj); - obj_priv->last_rendering_seqno = i915_emit_irq(dev); + obj_priv->last_rendering_seqno = i915_add_request(dev); BUG_ON(obj_priv->last_rendering_seqno == 0); #if WATCH_LRU DRM_INFO("%s: flush moves to exec list %p\n", __func__, obj); @@ -221,7 +338,7 @@ i915_gem_object_wait_rendering(struct drm_gem_object *obj) DRM_INFO("%s: object %p wait for seqno %08x\n", __func__, obj, obj_priv->last_rendering_seqno); #endif - ret = i915_wait_irq(dev, obj_priv->last_rendering_seqno); + ret = i915_wait_request(dev, obj_priv->last_rendering_seqno); if (ret != 0) return ret; @@ -995,7 +1112,7 @@ i915_gem_execbuffer(struct drm_device *dev, void *data, * *some* interrupts representing completion of buffers that we can * wait on when trying to clear up gtt space). */ - seqno = i915_emit_irq(dev); + seqno = i915_add_request(dev); BUG_ON(seqno == 0); for (i = 0; i < args->buffer_count; i++) { struct drm_gem_object *obj = object_list[i]; diff --git a/shared-core/i915_dma.c b/shared-core/i915_dma.c index 0e832057..30ba8c65 100644 --- a/shared-core/i915_dma.c +++ b/shared-core/i915_dma.c @@ -524,7 +524,7 @@ void i915_emit_breadcrumb(struct drm_device *dev) BEGIN_LP_RING(4); OUT_RING(CMD_STORE_DWORD_IDX); - OUT_RING(20); + OUT_RING(5 << STORE_DWORD_INDEX_SHIFT); OUT_RING(dev_priv->counter); OUT_RING(0); ADVANCE_LP_RING(); @@ -1053,6 +1053,8 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags) INIT_LIST_HEAD(&dev_priv->mm.active_list); INIT_LIST_HEAD(&dev_priv->mm.inactive_list); + INIT_LIST_HEAD(&dev_priv->mm.request_list); + dev_priv->mm.next_gem_seqno = 1; #ifdef __linux__ #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25) diff --git a/shared-core/i915_drv.h b/shared-core/i915_drv.h index 79c607bc..ef41b433 100644 --- a/shared-core/i915_drv.h +++ b/shared-core/i915_drv.h @@ -260,6 +260,14 @@ typedef struct drm_i915_private { * freed, and we'll pull it off the list in the free path. */ struct list_head inactive_list; + + /** + * List of breadcrumbs associated with GPU requests currently + * outstanding. + */ + struct list_head request_list; + + uint32_t next_gem_seqno; } mm; } drm_i915_private_t; @@ -309,6 +317,23 @@ struct drm_i915_gem_object { uint32_t last_rendering_seqno; }; +/** + * Request queue structure. + * + * The request queue allows us to note sequence numbers that have been emitted + * and may be associated with active buffers to be retired. + * + * By keeping this list, we can avoid having to do questionable + * sequence-number comparisons on buffer last_rendering_seqnos, and associate + * an emission time with seqnos for tracking how far ahead of the GPU we are. + */ +struct drm_i915_gem_request { + /** GEM sequence number associated with this request. */ + uint32_t seqno; + + struct list_head list; +}; + extern struct drm_ioctl_desc i915_ioctls[]; extern int i915_max_ioctl; @@ -506,7 +531,12 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller); #define GFX_OP_BREAKPOINT_INTERRUPT ((0<<29)|(1<<23)) #define CMD_REPORT_HEAD (7<<23) #define CMD_STORE_DWORD_IMM ((0x20<<23) | (0x1 << 22) | 0x1) +/** + * Stores a 32-bit integer to the status page at the dword index given. + */ #define CMD_STORE_DWORD_IDX ((0x21<<23) | 0x1) +# define STORE_DWORD_INDEX_SHIFT 2 + #define CMD_OP_BATCH_BUFFER ((0x0<<29)|(0x30<<23)|0x1) #define CMD_MI_FLUSH (0x04 << 23) @@ -855,7 +885,22 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller); #define BREADCRUMB_MASK ((1U << BREADCRUMB_BITS) - 1) #define READ_BREADCRUMB(dev_priv) (((volatile u32*)(dev_priv->hw_status_page))[5]) + +/** + * Reads a dword out of the status page, which is written to from the command + * queue by automatic updates, MI_REPORT_HEAD, MI_STORE_DATA_INDEX, or + * MI_STORE_DATA_IMM. + * + * The following dwords have a reserved meaning: + * 0: ISR copy, updated when an ISR bit not set in the HWSTAM changes. + * 4: ring 0 head pointer + * 5: ring 1 head pointer (915-class) + * 6: ring 2 head pointer (915-class) + * + * The area from dword 0x10 to 0x3ff is available for driver usage. + */ #define READ_HWSP(dev_priv, reg) (((volatile u32*)(dev_priv->hw_status_page))[reg]) +#define I915_GEM_HWS_INDEX 0x10 #define BLC_PWM_CTL 0x61254 #define BACKLIGHT_MODULATION_FREQ_SHIFT (17) -- cgit v1.2.3