From 15995234b4d6cb848d919b0342b5697fffe80c89 Mon Sep 17 00:00:00 2001 From: Felix Kuehling Date: Sun, 6 Mar 2005 01:41:06 +0000 Subject: Added support for command DMA on Savage4-based hardware. Unfortunately command and vertex DMA don't work at the same time. Command DMA performance is superior and works with all vertex formats. Bumped minor version and driver date. --- shared-core/savage_bci.c | 315 +++++++++++++++++++++++++++++++++++++++++++-- shared-core/savage_drv.h | 87 ++++++++++++- shared-core/savage_state.c | 184 ++++++++++++++++---------- 3 files changed, 506 insertions(+), 80 deletions(-) (limited to 'shared-core') diff --git a/shared-core/savage_bci.c b/shared-core/savage_bci.c index 3ddcccb6..8c58873a 100644 --- a/shared-core/savage_bci.c +++ b/shared-core/savage_bci.c @@ -47,6 +47,7 @@ savage_bci_wait_fifo_shadow(drm_savage_private_t *dev_priv, unsigned int n) #endif for (i = 0; i < SAVAGE_DEFAULT_USEC_TIMEOUT; i++) { + DRM_MEMORYBARRIER(); status = dev_priv->status_ptr[0]; if ((status & mask) < threshold) return 0; @@ -120,6 +121,7 @@ savage_bci_wait_event_shadow(drm_savage_private_t *dev_priv, uint16_t e) int i; for (i = 0; i < SAVAGE_EVENT_USEC_TIMEOUT; i++) { + DRM_MEMORYBARRIER(); status = dev_priv->status_ptr[1]; if ((((status & 0xffff) - e) & 0xffff) <= 0x7fff || (status & 0xffff) == 0) @@ -247,7 +249,7 @@ static drm_buf_t *savage_freelist_get(drm_device_t *dev) event = SAVAGE_READ(SAVAGE_STATUS_WORD1) & 0xffff; wrap = dev_priv->event_wrap; if (event > dev_priv->event_counter) - wrap--; /* hardware hasn't passed the last wrap yet */ + wrap--; /* hardware hasn't passed the last wrap yet */ DRM_DEBUG(" tail=0x%04x %d\n", tail->age.event, tail->age.wrap); DRM_DEBUG(" head=0x%04x %d\n", event, wrap); @@ -285,6 +287,225 @@ void savage_freelist_put(drm_device_t *dev, drm_buf_t *buf) entry->next = next; } +/* + * Command DMA + */ +static int savage_dma_init(drm_savage_private_t *dev_priv) +{ + unsigned int i; + + dev_priv->nr_dma_pages = dev_priv->cmd_dma->size / + (SAVAGE_DMA_PAGE_SIZE*4); + dev_priv->dma_pages = drm_alloc(sizeof(drm_savage_dma_page_t) * + dev_priv->nr_dma_pages, + DRM_MEM_DRIVER); + if (dev_priv->dma_pages == NULL) + return DRM_ERR(ENOMEM); + + for (i = 0; i < dev_priv->nr_dma_pages; ++i) { + dev_priv->dma_pages[i].age.event = 0; + dev_priv->dma_pages[i].age.wrap = 0; + dev_priv->dma_pages[i].used = 0; + } + + dev_priv->first_dma_page = 0; + dev_priv->current_dma_page = 0; + + return 0; +} + +void savage_dma_reset(drm_savage_private_t *dev_priv) +{ + uint16_t event; + unsigned int wrap, i; + event = savage_bci_emit_event(dev_priv, 0); + wrap = dev_priv->event_wrap; + for (i = 0; i < dev_priv->nr_dma_pages; ++i) { + SET_AGE(&dev_priv->dma_pages[i].age, event, wrap); + dev_priv->dma_pages[i].used = 0; + } + dev_priv->first_dma_page = dev_priv->current_dma_page = 0; +} + +void savage_dma_wait(drm_savage_private_t *dev_priv, unsigned int page) +{ + uint16_t event; + unsigned int wrap; + + /* Faked DMA buffer pages don't age. */ + if (dev_priv->cmd_dma == &dev_priv->fake_dma) + return; + + UPDATE_EVENT_COUNTER(); + if (dev_priv->status_ptr) + event = dev_priv->status_ptr[1] & 0xffff; + else + event = SAVAGE_READ(SAVAGE_STATUS_WORD1) & 0xffff; + wrap = dev_priv->event_wrap; + if (event > dev_priv->event_counter) + wrap--; /* hardware hasn't passed the last wrap yet */ + + if (dev_priv->dma_pages[page].age.wrap >= wrap && + dev_priv->dma_pages[page].age.event > event) { + if (dev_priv->wait_evnt(dev_priv, + dev_priv->dma_pages[page].age.event) + < 0) + DRM_ERROR("wait_evnt failed!\n"); + } +} + +uint32_t *savage_dma_alloc(drm_savage_private_t *dev_priv, unsigned int n) +{ + unsigned int cur = dev_priv->current_dma_page; + unsigned int rest = SAVAGE_DMA_PAGE_SIZE - + dev_priv->dma_pages[cur].used; + unsigned int nr_pages = (n - rest + SAVAGE_DMA_PAGE_SIZE-1) / + SAVAGE_DMA_PAGE_SIZE; + uint32_t *dma_ptr; + unsigned int i; + + DRM_DEBUG("cur=%u, cur->used=%u, n=%u, rest=%u, nr_pages=%u\n", + cur, dev_priv->dma_pages[cur].used, n, rest, nr_pages); + + if (cur + nr_pages < dev_priv->nr_dma_pages) { + dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle + + cur*SAVAGE_DMA_PAGE_SIZE + + dev_priv->dma_pages[cur].used; + if (n < rest) + rest = n; + dev_priv->dma_pages[cur].used += rest; + n -= rest; + cur++; + } else { + dev_priv->dma_flush(dev_priv); + nr_pages = (n + SAVAGE_DMA_PAGE_SIZE-1) / SAVAGE_DMA_PAGE_SIZE; + for (i = cur+1; i < dev_priv->nr_dma_pages; ++i) { + dev_priv->dma_pages[i].age = + dev_priv->dma_pages[cur].age; + dev_priv->dma_pages[i].used = 0; + } + dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle; + dev_priv->first_dma_page = cur = 0; + } + for (i = cur; nr_pages > 0; ++i, --nr_pages) { +#if SAVAGE_DMA_DEBUG + if (dev_priv->dma_pages[i].used) { + DRM_ERROR("unflushed page %u: used=%u\n", + i, dev_priv->dma_pages[i].used); + } +#endif + if (n > SAVAGE_DMA_PAGE_SIZE) + dev_priv->dma_pages[i].used = SAVAGE_DMA_PAGE_SIZE; + else + dev_priv->dma_pages[i].used = n; + n -= SAVAGE_DMA_PAGE_SIZE; + } + dev_priv->current_dma_page = --i; + + DRM_DEBUG("cur=%u, cur->used=%u, n=%u\n", + i, dev_priv->dma_pages[i].used, n); + + savage_dma_wait(dev_priv, dev_priv->current_dma_page); + + return dma_ptr; +} + +static void savage_dma_flush(drm_savage_private_t *dev_priv) +{ + BCI_LOCALS; + unsigned int cur = dev_priv->current_dma_page; + uint16_t event; + unsigned int wrap, pad, len, i; + unsigned long phys_addr; + + if (dev_priv->first_dma_page == dev_priv->current_dma_page && + dev_priv->dma_pages[dev_priv->current_dma_page].used == 0) + return; + + /* pad to multiples of 8 entries (really needed? 2 should do it) */ + pad = -dev_priv->dma_pages[cur].used & 7; + DRM_DEBUG("used=%d, pad=%u\n", dev_priv->dma_pages[cur].used, pad); + + if (pad) { + uint32_t *dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle + + cur * SAVAGE_DMA_PAGE_SIZE + + dev_priv->dma_pages[cur].used; + dev_priv->dma_pages[cur].used += pad; + while(pad != 0) { + *dma_ptr++ = BCI_CMD_WAIT; + pad--; + } + } + + DRM_MEMORYBARRIER(); + + /* do flush ... */ + phys_addr = dev_priv->cmd_dma->offset + + dev_priv->first_dma_page * SAVAGE_DMA_PAGE_SIZE*4; + len = (cur - dev_priv->first_dma_page) * SAVAGE_DMA_PAGE_SIZE + + dev_priv->dma_pages[cur].used; + + DRM_DEBUG("phys_addr=%lx, len=%u\n", + phys_addr | dev_priv->dma_type, len); + + BEGIN_BCI(3); + BCI_SET_REGISTERS(SAVAGE_DMABUFADDR, 1); + BCI_WRITE(phys_addr | dev_priv->dma_type); + BCI_DMA(len); + + /* age DMA pages */ + event = savage_bci_emit_event(dev_priv, 0); + wrap = dev_priv->event_wrap; + for (i = dev_priv->first_dma_page; + i <= dev_priv->current_dma_page; ++i) { + SET_AGE(&dev_priv->dma_pages[i].age, event, wrap); + dev_priv->dma_pages[i].used = 0; + } + + /* advance to next page */ + if (i == dev_priv->nr_dma_pages) + i = 0; + dev_priv->first_dma_page = dev_priv->current_dma_page = i; +} + +static void savage_fake_dma_flush(drm_savage_private_t *dev_priv) +{ + BCI_LOCALS; + unsigned int i, j; + if (dev_priv->first_dma_page == dev_priv->current_dma_page && + dev_priv->dma_pages[dev_priv->current_dma_page].used == 0) + return; + + DRM_DEBUG("first=%u, cur=%u, cur->used=%u\n", + dev_priv->first_dma_page, dev_priv->current_dma_page, + dev_priv->dma_pages[dev_priv->current_dma_page].used); + + for (i = dev_priv->first_dma_page; + i <= dev_priv->current_dma_page && dev_priv->dma_pages[i].used; + ++i) { + uint32_t *dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle + + i * SAVAGE_DMA_PAGE_SIZE; +#if SAVAGE_DMA_DEBUG + /* Sanity check: all pages except the last one must be full. */ + if (i < dev_priv->current_dma_page && + dev_priv->dma_pages[i].used != SAVAGE_DMA_PAGE_SIZE) { + DRM_ERROR("partial DMA page %u: used=%u", + i, dev_priv->dma_pages[i].used); + } +#endif + BEGIN_BCI(dev_priv->dma_pages[i].used); + for (j = 0; j < dev_priv->dma_pages[i].used; ++j) { + BCI_WRITE(dma_ptr[j]); + } + dev_priv->dma_pages[i].used = 0; + } + + /* advance to next page */ + if (i == dev_priv->nr_dma_pages) + i = 0; + dev_priv->first_dma_page = dev_priv->current_dma_page = i; +} + /* * Initalize permanent mappings. On Savage4 and SavageIX the alignment * and size of the aperture is not suitable for automatic MTRR setup @@ -464,14 +685,20 @@ static int savage_do_init_bci(drm_device_t *dev, drm_savage_init_t *init) } else { dev_priv->status = NULL; } - if (dev_priv->dma_type == SAVAGE_DMA_AGP) { + if (dev_priv->dma_type == SAVAGE_DMA_AGP && init->buffers_offset) { dev->agp_buffer_map = drm_core_findmap(dev, init->buffers_offset); if (!dev->agp_buffer_map) { - DRM_ERROR("could not find dma buffer region!\n"); + DRM_ERROR("could not find DMA buffer region!\n"); savage_do_cleanup_bci(dev); return DRM_ERR(EINVAL); } + drm_core_ioremap(dev->agp_buffer_map, dev); + if (!dev->agp_buffer_map) { + DRM_ERROR("failed to ioremap DMA buffer region!\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(ENOMEM); + } } if (init->agp_textures_offset) { dev_priv->agp_textures = @@ -484,25 +711,65 @@ static int savage_do_init_bci(drm_device_t *dev, drm_savage_init_t *init) } else { dev_priv->agp_textures = NULL; } - if (0 && !S3_SAVAGE3D_SERIES(dev_priv->chipset)) { - /* command DMA not implemented yet */ + + if (init->cmd_dma_offset) { + if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { + DRM_ERROR("command DMA not supported on " + "Savage3D/MX/IX.\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(EINVAL); + } + if (dev->dma && dev->dma->buflist) { + DRM_ERROR("command and vertex DMA not supported " + "at the same time.\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(EINVAL); + } dev_priv->cmd_dma = drm_core_findmap(dev, init->cmd_dma_offset); if (!dev_priv->cmd_dma) { DRM_ERROR("could not find command DMA region!\n"); savage_do_cleanup_bci(dev); return DRM_ERR(EINVAL); } + if (dev_priv->dma_type == SAVAGE_DMA_AGP) { + if (dev_priv->cmd_dma->type != _DRM_AGP) { + DRM_ERROR("AGP command DMA region is not a " + "_DRM_AGP map!\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(EINVAL); + } + drm_core_ioremap(dev_priv->cmd_dma, dev); + if (!dev_priv->cmd_dma->handle) { + DRM_ERROR("failed to ioremap command " + "DMA region!\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(ENOMEM); + } + } else if (dev_priv->cmd_dma->type != _DRM_CONSISTENT) { + DRM_ERROR("PCI command DMA region is not a " + "_DRM_CONSISTENT map!\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(EINVAL); + } } else { dev_priv->cmd_dma = NULL; } - if (dev_priv->cmd_dma && dev_priv->dma_type == SAVAGE_DMA_AGP) { - drm_core_ioremap(dev_priv->cmd_dma, dev); - if (!dev_priv->cmd_dma->handle) { - DRM_ERROR("failed to ioremap command DMA region!\n"); + dev_priv->dma_flush = savage_dma_flush; + if (!dev_priv->cmd_dma) { + DRM_DEBUG("falling back to faked command DMA.\n"); + dev_priv->fake_dma.offset = 0; + dev_priv->fake_dma.size = SAVAGE_FAKE_DMA_SIZE; + dev_priv->fake_dma.type = _DRM_SHM; + dev_priv->fake_dma.handle = drm_alloc(SAVAGE_FAKE_DMA_SIZE, + DRM_MEM_DRIVER); + if (!dev_priv->fake_dma.handle) { + DRM_ERROR("could not allocate faked DMA buffer!\n"); savage_do_cleanup_bci(dev); return DRM_ERR(ENOMEM); } + dev_priv->cmd_dma = &dev_priv->fake_dma; + dev_priv->dma_flush = savage_fake_dma_flush; } dev_priv->sarea_priv = @@ -578,6 +845,12 @@ static int savage_do_init_bci(drm_device_t *dev, drm_savage_init_t *init) return DRM_ERR(ENOMEM); } + if (savage_dma_init(dev_priv) < 0) { + DRM_ERROR("could not initialize command DMA\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(ENOMEM); + } + return 0; } @@ -585,9 +858,29 @@ int savage_do_cleanup_bci(drm_device_t *dev) { drm_savage_private_t *dev_priv = dev->dev_private; - if (dev_priv->cmd_dma && dev_priv->dma_type == SAVAGE_DMA_AGP) + if (dev_priv->cmd_dma == &dev_priv->fake_dma) { + if (dev_priv->fake_dma.handle) + drm_free(dev_priv->fake_dma.handle, + SAVAGE_FAKE_DMA_SIZE, DRM_MEM_DRIVER); + } else if (dev_priv->cmd_dma && dev_priv->cmd_dma->handle && + dev_priv->cmd_dma->type == _DRM_AGP && + dev_priv->dma_type == SAVAGE_DMA_AGP) drm_core_ioremapfree(dev_priv->cmd_dma, dev); + if (dev_priv->dma_type == SAVAGE_DMA_AGP && + dev->agp_buffer_map && dev->agp_buffer_map->handle) { + drm_core_ioremapfree(dev->agp_buffer_map, dev); + /* make sure the next instance (which may be running + * in PCI mode) doesn't try to use an old + * agp_buffer_map. */ + dev->agp_buffer_map = NULL; + } + + if (dev_priv->dma_pages) + drm_free(dev_priv->dma_pages, + sizeof(drm_savage_dma_page_t)*dev_priv->nr_dma_pages, + DRM_MEM_DRIVER); + return 0; } @@ -651,7 +944,7 @@ int savage_bci_event_wait(DRM_IOCTL_ARGS) hw_e = SAVAGE_READ(SAVAGE_STATUS_WORD1) & 0xffff; hw_w = dev_priv->event_wrap; if (hw_e > dev_priv->event_counter) - hw_w--; /* hardware hasn't passed the last wrap yet */ + hw_w--; /* hardware hasn't passed the last wrap yet */ event_e = event.count & 0xffff; event_w = event.count >> 16; diff --git a/shared-core/savage_drv.h b/shared-core/savage_drv.h index 2b44e529..ae37a48f 100644 --- a/shared-core/savage_drv.h +++ b/shared-core/savage_drv.h @@ -30,10 +30,10 @@ #define DRIVER_NAME "savage" #define DRIVER_DESC "Savage3D/MX/IX, Savage4, SuperSavage, Twister, ProSavage[DDR]" -#define DRIVER_DATE "20050222" +#define DRIVER_DATE "20050305" #define DRIVER_MAJOR 2 -#define DRIVER_MINOR 3 +#define DRIVER_MINOR 4 #define DRIVER_PATCHLEVEL 0 /* Interface history: * @@ -45,6 +45,8 @@ * 2.3 Event counters used by BCI_EVENT_EMIT/WAIT ioctls are now 32 bits * wide and thus very long lived (unlikely to ever wrap). The size * in the struct was 32 bits before, but only 16 bits were used + * 2.4 Implemented command DMA. Now drm_savage_init_t.cmd_dma_offset is + * actually used */ typedef struct drm_savage_age { @@ -59,6 +61,16 @@ typedef struct drm_savage_buf_priv { drm_buf_t *buf; } drm_savage_buf_priv_t; +typedef struct drm_savage_dma_page { + drm_savage_age_t age; + unsigned int used; +} drm_savage_dma_page_t; +#define SAVAGE_DMA_PAGE_SIZE 1024 /* in dwords */ +/* Fake DMA buffer size in bytes. 4 pages. Allows a maximum command + * size of 16kbytes or 4k entries. Minimum requirement would be + * 10kbytes for 255 40-byte vertices in one drawing command. */ +#define SAVAGE_FAKE_DMA_SIZE (SAVAGE_DMA_PAGE_SIZE*4*4) + /* interesting bits of hardware state that are saved in dev_priv */ typedef union { struct drm_savage_common_state { @@ -143,6 +155,7 @@ typedef struct drm_savage_private { drm_local_map_t *status; drm_local_map_t *agp_textures; drm_local_map_t *cmd_dma; + drm_local_map_t fake_dma; struct { int handle; @@ -155,6 +168,10 @@ typedef struct drm_savage_private { uint16_t event_counter; unsigned int event_wrap; + /* Savage4 command DMA */ + drm_savage_dma_page_t *dma_pages; + unsigned int nr_dma_pages, first_dma_page, current_dma_page; + /* saved hw state for global/local check on S3D */ uint32_t hw_draw_ctrl, hw_zbuf_ctrl; /* and for scissors (global, so don't emit if not changed) */ @@ -172,6 +189,7 @@ typedef struct drm_savage_private { * Avoid unwanted macro expansion. */ void (*emit_clip_rect)(struct drm_savage_private *dev_priv, drm_clip_rect_t *pbox); + void (*dma_flush)(struct drm_savage_private *dev_priv); } drm_savage_private_t; /* ioctls */ @@ -185,6 +203,10 @@ extern int savage_bci_buffers(DRM_IOCTL_ARGS); extern uint16_t savage_bci_emit_event(drm_savage_private_t *dev_priv, unsigned int flags); extern void savage_freelist_put(drm_device_t *dev, drm_buf_t *buf); +extern void savage_dma_reset(drm_savage_private_t *dev_priv); +extern void savage_dma_wait(drm_savage_private_t *dev_priv, unsigned int page); +extern uint32_t *savage_dma_alloc(drm_savage_private_t *dev_priv, + unsigned int n); extern int savage_preinit(drm_device_t *dev, unsigned long chipset); extern int savage_postcleanup(drm_device_t *dev); extern int savage_do_cleanup_bci(drm_device_t *dev); @@ -290,6 +312,7 @@ extern void savage_emit_clip_rect_s4(drm_savage_private_t *dev_priv, /* common stuff */ #define SAVAGE_VERTBUFADDR 0x3e #define SAVAGE_BITPLANEWTMASK 0xd7 +#define SAVAGE_DMABUFADDR 0x51 /* texture enable bits (needed for tex addr checking) */ #define SAVAGE_TEXCTRL_TEXEN_MASK 0x00010000 /* S3D */ @@ -408,6 +431,8 @@ extern void savage_emit_clip_rect_s4(drm_savage_private_t *dev_priv, #define BCI_CMD_DRAW_NO_V1 0x00000080 #define BCI_CMD_DRAW_NO_UV1 0x000000c0 +#define BCI_CMD_DMA 0xa8000000 + #define BCI_W_H(w, h) ((((h) << 16) | (w)) & 0x0FFF0FFF) #define BCI_X_Y(x, y) ((((y) << 16) | (x)) & 0x0FFF0FFF) #define BCI_X_W(x, y) ((((w) << 16) | (x)) & 0x0FFF0FFF) @@ -431,10 +456,17 @@ extern void savage_emit_clip_rect_s4(drm_savage_private_t *dev_priv, BCI_WRITE(BCI_CMD_SET_REGISTER | \ ((uint32_t)(n) & 0xff) << 16 | \ ((uint32_t)(first) & 0xffff)) +#define DMA_SET_REGISTERS( first, n ) \ + DMA_WRITE(BCI_CMD_SET_REGISTER | \ + ((uint32_t)(n) & 0xff) << 16 | \ + ((uint32_t)(first) & 0xffff)) #define BCI_DRAW_PRIMITIVE(n, type, skip) \ BCI_WRITE(BCI_CMD_DRAW_PRIM | (type) | (skip) | \ ((n) << 16)) +#define DMA_DRAW_PRIMITIVE(n, type, skip) \ + DMA_WRITE(BCI_CMD_DRAW_PRIM | (type) | (skip) | \ + ((n) << 16)) #define BCI_DRAW_INDICES_S3D(n, type, i0) \ BCI_WRITE(BCI_CMD_DRAW_INDEXED_PRIM | (type) | \ @@ -444,6 +476,9 @@ extern void savage_emit_clip_rect_s4(drm_savage_private_t *dev_priv, BCI_WRITE(BCI_CMD_DRAW_INDEXED_PRIM | (type) | \ (skip) | ((n) << 16)) +#define BCI_DMA(n) \ + BCI_WRITE(BCI_CMD_DMA | (((n) >> 1) - 1)) + /* * access to MMIO */ @@ -473,6 +508,54 @@ extern void savage_emit_clip_rect_s4(drm_savage_private_t *dev_priv, } \ } while(0) +/* + * command DMA support + */ +#define SAVAGE_DMA_DEBUG 1 + +#define DMA_LOCALS uint32_t *dma_ptr; + +#define BEGIN_DMA( n ) do { \ + unsigned int cur = dev_priv->current_dma_page; \ + unsigned int rest = SAVAGE_DMA_PAGE_SIZE - \ + dev_priv->dma_pages[cur].used; \ + if ((n) > rest) { \ + dma_ptr = savage_dma_alloc(dev_priv, (n)); \ + } else { /* fast path for small allocations */ \ + dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle + \ + cur * SAVAGE_DMA_PAGE_SIZE + \ + dev_priv->dma_pages[cur].used; \ + if (dev_priv->dma_pages[cur].used == 0) \ + savage_dma_wait(dev_priv, cur); \ + dev_priv->dma_pages[cur].used += (n); \ + } \ +} while(0) + +#define DMA_WRITE( val ) *dma_ptr++ = (uint32_t)(val) + +#define DMA_COPY_FROM_USER(src,n) do { \ + DRM_COPY_FROM_USER_UNCHECKED(dma_ptr, (src), (n)*4); \ + dma_ptr += n; \ +} while(0) + +#if SAVAGE_DMA_DEBUG +#define DMA_COMMIT() do { \ + unsigned int cur = dev_priv->current_dma_page; \ + uint32_t *expected = (uint32_t *)dev_priv->cmd_dma->handle + \ + cur * SAVAGE_DMA_PAGE_SIZE + \ + dev_priv->dma_pages[cur].used; \ + if (dma_ptr != expected) { \ + DRM_ERROR("DMA allocation and use don't match: " \ + "%p != %p\n", expected, dma_ptr); \ + savage_dma_reset(dev_priv); \ + } \ +} while(0) +#else +#define DMA_COMMIT() do {/* nothing */} while(0) +#endif + +#define DMA_FLUSH() dev_priv->dma_flush(dev_priv) + /* Buffer aging via event tag */ diff --git a/shared-core/savage_state.c b/shared-core/savage_state.c index f1e424a7..cc386527 100644 --- a/shared-core/savage_state.c +++ b/shared-core/savage_state.c @@ -39,15 +39,16 @@ void savage_emit_clip_rect_s3d(drm_savage_private_t *dev_priv, ((((uint32_t)pbox->y2-1) << 16) & 0x07ff0000); if (scstart != dev_priv->state.s3d.scstart || scend != dev_priv->state.s3d.scend) { - BCI_LOCALS; - BEGIN_BCI(4); - BCI_WRITE(BCI_CMD_WAIT|BCI_CMD_WAIT_3D); - BCI_SET_REGISTERS(SAVAGE_SCSTART_S3D, 2); - BCI_WRITE(scstart); - BCI_WRITE(scend); + DMA_LOCALS; + BEGIN_DMA(4); + DMA_WRITE(BCI_CMD_WAIT|BCI_CMD_WAIT_3D); + DMA_SET_REGISTERS(SAVAGE_SCSTART_S3D, 2); + DMA_WRITE(scstart); + DMA_WRITE(scend); dev_priv->state.s3d.scstart = scstart; dev_priv->state.s3d.scend = scend; dev_priv->waiting = 1; + DMA_COMMIT(); } } @@ -64,15 +65,16 @@ void savage_emit_clip_rect_s4(drm_savage_private_t *dev_priv, ((((uint32_t)pbox->y2-1) << 12) & 0x00fff000); if (drawctrl0 != dev_priv->state.s4.drawctrl0 || drawctrl1 != dev_priv->state.s4.drawctrl1) { - BCI_LOCALS; - BEGIN_BCI(4); - BCI_WRITE(BCI_CMD_WAIT|BCI_CMD_WAIT_3D); - BCI_SET_REGISTERS(SAVAGE_DRAWCTRL0_S4, 2); - BCI_WRITE(drawctrl0); - BCI_WRITE(drawctrl1); + DMA_LOCALS; + BEGIN_DMA(4); + DMA_WRITE(BCI_CMD_WAIT|BCI_CMD_WAIT_3D); + DMA_SET_REGISTERS(SAVAGE_DRAWCTRL0_S4, 2); + DMA_WRITE(drawctrl0); + DMA_WRITE(drawctrl1); dev_priv->state.s4.drawctrl0 = drawctrl0; dev_priv->state.s4.drawctrl1 = drawctrl1; dev_priv->waiting = 1; + DMA_COMMIT(); } } @@ -192,7 +194,7 @@ static int savage_dispatch_state(drm_savage_private_t *dev_priv, const drm_savage_cmd_header_t *cmd_header, const uint32_t __user *regs) { - BCI_LOCALS; + DMA_LOCALS; unsigned int count = cmd_header->state.count; unsigned int start = cmd_header->state.start; unsigned int count2 = 0; @@ -244,18 +246,18 @@ static int savage_dispatch_state(drm_savage_private_t *dev_priv, bci_size = count + (count+254)/255 + count2 + (count2+254)/255; if (cmd_header->state.global) { - BEGIN_BCI(bci_size+1); - BCI_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D); + BEGIN_DMA(bci_size+1); + DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D); dev_priv->waiting = 1; } else { - BEGIN_BCI(bci_size); + BEGIN_DMA(bci_size); } do { while (count > 0) { unsigned int n = count < 255 ? count : 255; - BCI_SET_REGISTERS(start, n); - BCI_COPY_FROM_USER(regs, n); + DMA_SET_REGISTERS(start, n); + DMA_COPY_FROM_USER(regs, n); count -= n; start += n; regs += n; @@ -266,6 +268,8 @@ static int savage_dispatch_state(drm_savage_private_t *dev_priv, count2 = 0; } while (count); + DMA_COMMIT(); + return 0; } @@ -281,6 +285,11 @@ static int savage_dispatch_dma_prim(drm_savage_private_t *dev_priv, unsigned int start = cmd_header->prim.start; unsigned int i; + if (!dmabuf) { + DRM_ERROR("called without dma buffers!\n"); + return DRM_ERR(EINVAL); + } + if (!n) return 0; @@ -335,6 +344,11 @@ static int savage_dispatch_dma_prim(drm_savage_private_t *dev_priv, return DRM_ERR(EINVAL); } + /* Vertex DMA doesn't work with command DMA at the same time, + * so we use BCI_... to submit commands here. Flush buffered + * faked DMA first. */ + DMA_FLUSH(); + if (dmabuf->bus_address != dev_priv->state.common.vbaddr) { BEGIN_BCI(2); BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1); @@ -405,7 +419,7 @@ static int savage_dispatch_vb_prim(drm_savage_private_t *dev_priv, unsigned int vb_size, unsigned int vb_stride) { - BCI_LOCALS; + DMA_LOCALS; unsigned char reorder = 0; unsigned int prim = cmd_header->prim.prim; unsigned int skip = cmd_header->prim.skip; @@ -482,28 +496,32 @@ static int savage_dispatch_vb_prim(drm_savage_private_t *dev_priv, int reorder[3] = {-1, -1, -1}; reorder[start%3] = 2; - BEGIN_BCI(count*vtx_size+1); - BCI_DRAW_PRIMITIVE(count, prim, skip); + BEGIN_DMA(count*vtx_size+1); + DMA_DRAW_PRIMITIVE(count, prim, skip); for (i = start; i < start+count; ++i) { unsigned int j = i + reorder[i % 3]; - BCI_COPY_FROM_USER(&vtxbuf[vb_stride*j], + DMA_COPY_FROM_USER(&vtxbuf[vb_stride*j], vtx_size); } + + DMA_COMMIT(); } else { - BEGIN_BCI(count*vtx_size+1); - BCI_DRAW_PRIMITIVE(count, prim, skip); + BEGIN_DMA(count*vtx_size+1); + DMA_DRAW_PRIMITIVE(count, prim, skip); if (vb_stride == vtx_size) { - BCI_COPY_FROM_USER(&vtxbuf[vb_stride*start], + DMA_COPY_FROM_USER(&vtxbuf[vb_stride*start], vtx_size*count); } else { for (i = start; i < start+count; ++i) { - BCI_COPY_FROM_USER( + DMA_COPY_FROM_USER( &vtxbuf[vb_stride*i], vtx_size); } } + + DMA_COMMIT(); } start += count; @@ -527,6 +545,11 @@ static int savage_dispatch_dma_idx(drm_savage_private_t *dev_priv, unsigned int n = cmd_header->idx.count; unsigned int i; + if (!dmabuf) { + DRM_ERROR("called without dma buffers!\n"); + return DRM_ERR(EINVAL); + } + if (!n) return 0; @@ -575,6 +598,11 @@ static int savage_dispatch_dma_idx(drm_savage_private_t *dev_priv, } } + /* Vertex DMA doesn't work with command DMA at the same time, + * so we use BCI_... to submit commands here. Flush buffered + * faked DMA first. */ + DMA_FLUSH(); + if (dmabuf->bus_address != dev_priv->state.common.vbaddr) { BEGIN_BCI(2); BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1); @@ -658,7 +686,7 @@ static int savage_dispatch_vb_idx(drm_savage_private_t *dev_priv, unsigned int vb_size, unsigned int vb_stride) { - BCI_LOCALS; + DMA_LOCALS; unsigned char reorder = 0; unsigned int prim = cmd_header->idx.prim; unsigned int skip = cmd_header->idx.skip; @@ -740,23 +768,27 @@ static int savage_dispatch_vb_idx(drm_savage_private_t *dev_priv, * for correct culling. Only on Savage3D. */ int reorder[3] = {2, -1, -1}; - BEGIN_BCI(count*vtx_size+1); - BCI_DRAW_PRIMITIVE(count, prim, skip); + BEGIN_DMA(count*vtx_size+1); + DMA_DRAW_PRIMITIVE(count, prim, skip); for (i = 0; i < count; ++i) { unsigned int j = idx[i + reorder[i % 3]]; - BCI_COPY_FROM_USER(&vtxbuf[vb_stride*j], + DMA_COPY_FROM_USER(&vtxbuf[vb_stride*j], vtx_size); } + + DMA_COMMIT(); } else { - BEGIN_BCI(count*vtx_size+1); - BCI_DRAW_PRIMITIVE(count, prim, skip); + BEGIN_DMA(count*vtx_size+1); + DMA_DRAW_PRIMITIVE(count, prim, skip); for (i = 0; i < count; ++i) { unsigned int j = idx[i]; - BCI_COPY_FROM_USER(&vtxbuf[vb_stride*j], + DMA_COPY_FROM_USER(&vtxbuf[vb_stride*j], vtx_size); } + + DMA_COMMIT(); } usr_idx += count; @@ -774,7 +806,7 @@ static int savage_dispatch_clear(drm_savage_private_t *dev_priv, unsigned int nbox, const drm_clip_rect_t __user *usr_boxes) { - BCI_LOCALS; + DMA_LOCALS; unsigned int flags = cmd_header->clear0.flags, mask, value; unsigned int clear_cmd; unsigned int i, nbufs; @@ -799,9 +831,10 @@ static int savage_dispatch_clear(drm_savage_private_t *dev_priv, if (mask != 0xffffffff) { /* set mask */ - BEGIN_BCI(2); - BCI_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1); - BCI_WRITE(mask); + BEGIN_DMA(2); + DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1); + DMA_WRITE(mask); + DMA_COMMIT(); } for (i = 0; i < nbox; ++i) { drm_clip_rect_t box; @@ -811,35 +844,37 @@ static int savage_dispatch_clear(drm_savage_private_t *dev_priv, x = box.x1, y = box.y1; w = box.x2 - box.x1; h = box.y2 - box.y1; - BEGIN_BCI(nbufs*6); + BEGIN_DMA(nbufs*6); for (buf = SAVAGE_FRONT; buf <= SAVAGE_DEPTH; buf <<= 1) { if (!(flags & buf)) continue; - BCI_WRITE(clear_cmd); + DMA_WRITE(clear_cmd); switch(buf) { case SAVAGE_FRONT: - BCI_WRITE(dev_priv->front_offset); - BCI_WRITE(dev_priv->front_bd); + DMA_WRITE(dev_priv->front_offset); + DMA_WRITE(dev_priv->front_bd); break; case SAVAGE_BACK: - BCI_WRITE(dev_priv->back_offset); - BCI_WRITE(dev_priv->back_bd); + DMA_WRITE(dev_priv->back_offset); + DMA_WRITE(dev_priv->back_bd); break; case SAVAGE_DEPTH: - BCI_WRITE(dev_priv->depth_offset); - BCI_WRITE(dev_priv->depth_bd); + DMA_WRITE(dev_priv->depth_offset); + DMA_WRITE(dev_priv->depth_bd); break; } - BCI_WRITE(value); - BCI_WRITE(BCI_X_Y(x, y)); - BCI_WRITE(BCI_W_H(w, h)); + DMA_WRITE(value); + DMA_WRITE(BCI_X_Y(x, y)); + DMA_WRITE(BCI_W_H(w, h)); } + DMA_COMMIT(); } if (mask != 0xffffffff) { /* reset mask */ - BEGIN_BCI(2); - BCI_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1); - BCI_WRITE(0xffffffff); + BEGIN_DMA(2); + DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1); + DMA_WRITE(0xffffffff); + DMA_COMMIT(); } return 0; @@ -849,7 +884,7 @@ static int savage_dispatch_swap(drm_savage_private_t *dev_priv, unsigned int nbox, const drm_clip_rect_t __user *usr_boxes) { - BCI_LOCALS; + DMA_LOCALS; unsigned int swap_cmd; unsigned int i; @@ -864,13 +899,14 @@ static int savage_dispatch_swap(drm_savage_private_t *dev_priv, drm_clip_rect_t box; DRM_COPY_FROM_USER_UNCHECKED(&box, &usr_boxes[i], sizeof(box)); - BEGIN_BCI(6); - BCI_WRITE(swap_cmd); - BCI_WRITE(dev_priv->back_offset); - BCI_WRITE(dev_priv->back_bd); - BCI_WRITE(BCI_X_Y(box.x1, box.y1)); - BCI_WRITE(BCI_X_Y(box.x1, box.y1)); - BCI_WRITE(BCI_W_H(box.x2-box.x1, box.y2-box.y1)); + BEGIN_DMA(6); + DMA_WRITE(swap_cmd); + DMA_WRITE(dev_priv->back_offset); + DMA_WRITE(dev_priv->back_bd); + DMA_WRITE(BCI_X_Y(box.x1, box.y1)); + DMA_WRITE(BCI_X_Y(box.x1, box.y1)); + DMA_WRITE(BCI_W_H(box.x2-box.x1, box.y2-box.y1)); + DMA_COMMIT(); } return 0; @@ -967,12 +1003,16 @@ int savage_bci_cmdbuf(DRM_IOCTL_ARGS) DRM_COPY_FROM_USER_IOCTL(cmdbuf, (drm_savage_cmdbuf_t __user *)data, sizeof(cmdbuf)); - if (cmdbuf.dma_idx > dma->buf_count) { - DRM_ERROR("vertex buffer index %u out of range (0-%u)\n", - cmdbuf.dma_idx, dma->buf_count-1); - return DRM_ERR(EINVAL); + if (dma && dma->buflist) { + if (cmdbuf.dma_idx > dma->buf_count) { + DRM_ERROR("vertex buffer index %u out of range (0-%u)\n", + cmdbuf.dma_idx, dma->buf_count-1); + return DRM_ERR(EINVAL); + } + dmabuf = dma->buflist[cmdbuf.dma_idx]; + } else { + dmabuf = NULL; } - dmabuf = dma->buflist[cmdbuf.dma_idx]; usr_cmdbuf = (drm_savage_cmd_header_t __user *)cmdbuf.cmd_addr; usr_vtxbuf = (unsigned int __user *)cmdbuf.vb_addr; @@ -1011,6 +1051,7 @@ int savage_bci_cmdbuf(DRM_IOCTL_ARGS) if (i + j > cmdbuf.size) { DRM_ERROR("indexed drawing command extends " "beyond end of command buffer\n"); + DMA_FLUSH(); return DRM_ERR(EINVAL); } /* fall through */ @@ -1042,6 +1083,7 @@ int savage_bci_cmdbuf(DRM_IOCTL_ARGS) if (i + j > cmdbuf.size) { DRM_ERROR("command SAVAGE_CMD_STATE extends " "beyond end of command buffer\n"); + DMA_FLUSH(); return DRM_ERR(EINVAL); } ret = savage_dispatch_state( @@ -1054,6 +1096,7 @@ int savage_bci_cmdbuf(DRM_IOCTL_ARGS) if (i + 1 > cmdbuf.size) { DRM_ERROR("command SAVAGE_CMD_CLEAR extends " "beyond end of command buffer\n"); + DMA_FLUSH(); return DRM_ERR(EINVAL); } ret = savage_dispatch_clear(dev_priv, &cmd_header, @@ -1068,11 +1111,14 @@ int savage_bci_cmdbuf(DRM_IOCTL_ARGS) break; default: DRM_ERROR("invalid command 0x%x\n", cmd_header.cmd.cmd); + DMA_FLUSH(); return DRM_ERR(EINVAL); } - if (ret != 0) + if (ret != 0) { + DMA_FLUSH(); return ret; + } } if (first_draw_cmd) { @@ -1080,11 +1126,15 @@ int savage_bci_cmdbuf(DRM_IOCTL_ARGS) dev_priv, first_draw_cmd, usr_cmdbuf, dmabuf, usr_vtxbuf, cmdbuf.vb_size, cmdbuf.vb_stride, cmdbuf.nbox, usr_boxes); - if (ret != 0) + if (ret != 0) { + DMA_FLUSH(); return ret; + } } - if (cmdbuf.discard) { + DMA_FLUSH(); + + if (dmabuf && cmdbuf.discard) { drm_savage_buf_priv_t *buf_priv = dmabuf->dev_private; uint16_t event; event = savage_bci_emit_event(dev_priv, SAVAGE_WAIT_3D); -- cgit v1.2.3