diff options
author | Felix Kuehling <fxkuehl@gmx.de> | 2005-03-06 01:41:06 +0000 |
---|---|---|
committer | Felix Kuehling <fxkuehl@gmx.de> | 2005-03-06 01:41:06 +0000 |
commit | 15995234b4d6cb848d919b0342b5697fffe80c89 (patch) | |
tree | 04c8f9856711f58b458068ec123a9e386b67cc05 /shared-core | |
parent | 26f04a16645edb366fce16060f6d5d01f2ff54b3 (diff) |
Added support for command DMA on Savage4-based hardware. Unfortunately
command and vertex DMA don't work at the same time. Command DMA
performance is superior and works with all vertex formats. Bumped minor
version and driver date.
Diffstat (limited to 'shared-core')
-rw-r--r-- | shared-core/savage_bci.c | 315 | ||||
-rw-r--r-- | shared-core/savage_drv.h | 87 | ||||
-rw-r--r-- | shared-core/savage_state.c | 184 |
3 files changed, 506 insertions, 80 deletions
diff --git a/shared-core/savage_bci.c b/shared-core/savage_bci.c index 3ddcccb6..8c58873a 100644 --- a/shared-core/savage_bci.c +++ b/shared-core/savage_bci.c @@ -47,6 +47,7 @@ savage_bci_wait_fifo_shadow(drm_savage_private_t *dev_priv, unsigned int n) #endif for (i = 0; i < SAVAGE_DEFAULT_USEC_TIMEOUT; i++) { + DRM_MEMORYBARRIER(); status = dev_priv->status_ptr[0]; if ((status & mask) < threshold) return 0; @@ -120,6 +121,7 @@ savage_bci_wait_event_shadow(drm_savage_private_t *dev_priv, uint16_t e) int i; for (i = 0; i < SAVAGE_EVENT_USEC_TIMEOUT; i++) { + DRM_MEMORYBARRIER(); status = dev_priv->status_ptr[1]; if ((((status & 0xffff) - e) & 0xffff) <= 0x7fff || (status & 0xffff) == 0) @@ -247,7 +249,7 @@ static drm_buf_t *savage_freelist_get(drm_device_t *dev) event = SAVAGE_READ(SAVAGE_STATUS_WORD1) & 0xffff; wrap = dev_priv->event_wrap; if (event > dev_priv->event_counter) - wrap--; /* hardware hasn't passed the last wrap yet */ + wrap--; /* hardware hasn't passed the last wrap yet */ DRM_DEBUG(" tail=0x%04x %d\n", tail->age.event, tail->age.wrap); DRM_DEBUG(" head=0x%04x %d\n", event, wrap); @@ -286,6 +288,225 @@ void savage_freelist_put(drm_device_t *dev, drm_buf_t *buf) } /* + * Command DMA + */ +static int savage_dma_init(drm_savage_private_t *dev_priv) +{ + unsigned int i; + + dev_priv->nr_dma_pages = dev_priv->cmd_dma->size / + (SAVAGE_DMA_PAGE_SIZE*4); + dev_priv->dma_pages = drm_alloc(sizeof(drm_savage_dma_page_t) * + dev_priv->nr_dma_pages, + DRM_MEM_DRIVER); + if (dev_priv->dma_pages == NULL) + return DRM_ERR(ENOMEM); + + for (i = 0; i < dev_priv->nr_dma_pages; ++i) { + dev_priv->dma_pages[i].age.event = 0; + dev_priv->dma_pages[i].age.wrap = 0; + dev_priv->dma_pages[i].used = 0; + } + + dev_priv->first_dma_page = 0; + dev_priv->current_dma_page = 0; + + return 0; +} + +void savage_dma_reset(drm_savage_private_t *dev_priv) +{ + uint16_t event; + unsigned int wrap, i; + event = savage_bci_emit_event(dev_priv, 0); + wrap = dev_priv->event_wrap; + for (i = 0; i < dev_priv->nr_dma_pages; ++i) { + SET_AGE(&dev_priv->dma_pages[i].age, event, wrap); + dev_priv->dma_pages[i].used = 0; + } + dev_priv->first_dma_page = dev_priv->current_dma_page = 0; +} + +void savage_dma_wait(drm_savage_private_t *dev_priv, unsigned int page) +{ + uint16_t event; + unsigned int wrap; + + /* Faked DMA buffer pages don't age. */ + if (dev_priv->cmd_dma == &dev_priv->fake_dma) + return; + + UPDATE_EVENT_COUNTER(); + if (dev_priv->status_ptr) + event = dev_priv->status_ptr[1] & 0xffff; + else + event = SAVAGE_READ(SAVAGE_STATUS_WORD1) & 0xffff; + wrap = dev_priv->event_wrap; + if (event > dev_priv->event_counter) + wrap--; /* hardware hasn't passed the last wrap yet */ + + if (dev_priv->dma_pages[page].age.wrap >= wrap && + dev_priv->dma_pages[page].age.event > event) { + if (dev_priv->wait_evnt(dev_priv, + dev_priv->dma_pages[page].age.event) + < 0) + DRM_ERROR("wait_evnt failed!\n"); + } +} + +uint32_t *savage_dma_alloc(drm_savage_private_t *dev_priv, unsigned int n) +{ + unsigned int cur = dev_priv->current_dma_page; + unsigned int rest = SAVAGE_DMA_PAGE_SIZE - + dev_priv->dma_pages[cur].used; + unsigned int nr_pages = (n - rest + SAVAGE_DMA_PAGE_SIZE-1) / + SAVAGE_DMA_PAGE_SIZE; + uint32_t *dma_ptr; + unsigned int i; + + DRM_DEBUG("cur=%u, cur->used=%u, n=%u, rest=%u, nr_pages=%u\n", + cur, dev_priv->dma_pages[cur].used, n, rest, nr_pages); + + if (cur + nr_pages < dev_priv->nr_dma_pages) { + dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle + + cur*SAVAGE_DMA_PAGE_SIZE + + dev_priv->dma_pages[cur].used; + if (n < rest) + rest = n; + dev_priv->dma_pages[cur].used += rest; + n -= rest; + cur++; + } else { + dev_priv->dma_flush(dev_priv); + nr_pages = (n + SAVAGE_DMA_PAGE_SIZE-1) / SAVAGE_DMA_PAGE_SIZE; + for (i = cur+1; i < dev_priv->nr_dma_pages; ++i) { + dev_priv->dma_pages[i].age = + dev_priv->dma_pages[cur].age; + dev_priv->dma_pages[i].used = 0; + } + dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle; + dev_priv->first_dma_page = cur = 0; + } + for (i = cur; nr_pages > 0; ++i, --nr_pages) { +#if SAVAGE_DMA_DEBUG + if (dev_priv->dma_pages[i].used) { + DRM_ERROR("unflushed page %u: used=%u\n", + i, dev_priv->dma_pages[i].used); + } +#endif + if (n > SAVAGE_DMA_PAGE_SIZE) + dev_priv->dma_pages[i].used = SAVAGE_DMA_PAGE_SIZE; + else + dev_priv->dma_pages[i].used = n; + n -= SAVAGE_DMA_PAGE_SIZE; + } + dev_priv->current_dma_page = --i; + + DRM_DEBUG("cur=%u, cur->used=%u, n=%u\n", + i, dev_priv->dma_pages[i].used, n); + + savage_dma_wait(dev_priv, dev_priv->current_dma_page); + + return dma_ptr; +} + +static void savage_dma_flush(drm_savage_private_t *dev_priv) +{ + BCI_LOCALS; + unsigned int cur = dev_priv->current_dma_page; + uint16_t event; + unsigned int wrap, pad, len, i; + unsigned long phys_addr; + + if (dev_priv->first_dma_page == dev_priv->current_dma_page && + dev_priv->dma_pages[dev_priv->current_dma_page].used == 0) + return; + + /* pad to multiples of 8 entries (really needed? 2 should do it) */ + pad = -dev_priv->dma_pages[cur].used & 7; + DRM_DEBUG("used=%d, pad=%u\n", dev_priv->dma_pages[cur].used, pad); + + if (pad) { + uint32_t *dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle + + cur * SAVAGE_DMA_PAGE_SIZE + + dev_priv->dma_pages[cur].used; + dev_priv->dma_pages[cur].used += pad; + while(pad != 0) { + *dma_ptr++ = BCI_CMD_WAIT; + pad--; + } + } + + DRM_MEMORYBARRIER(); + + /* do flush ... */ + phys_addr = dev_priv->cmd_dma->offset + + dev_priv->first_dma_page * SAVAGE_DMA_PAGE_SIZE*4; + len = (cur - dev_priv->first_dma_page) * SAVAGE_DMA_PAGE_SIZE + + dev_priv->dma_pages[cur].used; + + DRM_DEBUG("phys_addr=%lx, len=%u\n", + phys_addr | dev_priv->dma_type, len); + + BEGIN_BCI(3); + BCI_SET_REGISTERS(SAVAGE_DMABUFADDR, 1); + BCI_WRITE(phys_addr | dev_priv->dma_type); + BCI_DMA(len); + + /* age DMA pages */ + event = savage_bci_emit_event(dev_priv, 0); + wrap = dev_priv->event_wrap; + for (i = dev_priv->first_dma_page; + i <= dev_priv->current_dma_page; ++i) { + SET_AGE(&dev_priv->dma_pages[i].age, event, wrap); + dev_priv->dma_pages[i].used = 0; + } + + /* advance to next page */ + if (i == dev_priv->nr_dma_pages) + i = 0; + dev_priv->first_dma_page = dev_priv->current_dma_page = i; +} + +static void savage_fake_dma_flush(drm_savage_private_t *dev_priv) +{ + BCI_LOCALS; + unsigned int i, j; + if (dev_priv->first_dma_page == dev_priv->current_dma_page && + dev_priv->dma_pages[dev_priv->current_dma_page].used == 0) + return; + + DRM_DEBUG("first=%u, cur=%u, cur->used=%u\n", + dev_priv->first_dma_page, dev_priv->current_dma_page, + dev_priv->dma_pages[dev_priv->current_dma_page].used); + + for (i = dev_priv->first_dma_page; + i <= dev_priv->current_dma_page && dev_priv->dma_pages[i].used; + ++i) { + uint32_t *dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle + + i * SAVAGE_DMA_PAGE_SIZE; +#if SAVAGE_DMA_DEBUG + /* Sanity check: all pages except the last one must be full. */ + if (i < dev_priv->current_dma_page && + dev_priv->dma_pages[i].used != SAVAGE_DMA_PAGE_SIZE) { + DRM_ERROR("partial DMA page %u: used=%u", + i, dev_priv->dma_pages[i].used); + } +#endif + BEGIN_BCI(dev_priv->dma_pages[i].used); + for (j = 0; j < dev_priv->dma_pages[i].used; ++j) { + BCI_WRITE(dma_ptr[j]); + } + dev_priv->dma_pages[i].used = 0; + } + + /* advance to next page */ + if (i == dev_priv->nr_dma_pages) + i = 0; + dev_priv->first_dma_page = dev_priv->current_dma_page = i; +} + +/* * Initalize permanent mappings. On Savage4 and SavageIX the alignment * and size of the aperture is not suitable for automatic MTRR setup * in drm_initmap. Therefore we do it manually before the maps are @@ -464,14 +685,20 @@ static int savage_do_init_bci(drm_device_t *dev, drm_savage_init_t *init) } else { dev_priv->status = NULL; } - if (dev_priv->dma_type == SAVAGE_DMA_AGP) { + if (dev_priv->dma_type == SAVAGE_DMA_AGP && init->buffers_offset) { dev->agp_buffer_map = drm_core_findmap(dev, init->buffers_offset); if (!dev->agp_buffer_map) { - DRM_ERROR("could not find dma buffer region!\n"); + DRM_ERROR("could not find DMA buffer region!\n"); savage_do_cleanup_bci(dev); return DRM_ERR(EINVAL); } + drm_core_ioremap(dev->agp_buffer_map, dev); + if (!dev->agp_buffer_map) { + DRM_ERROR("failed to ioremap DMA buffer region!\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(ENOMEM); + } } if (init->agp_textures_offset) { dev_priv->agp_textures = @@ -484,25 +711,65 @@ static int savage_do_init_bci(drm_device_t *dev, drm_savage_init_t *init) } else { dev_priv->agp_textures = NULL; } - if (0 && !S3_SAVAGE3D_SERIES(dev_priv->chipset)) { - /* command DMA not implemented yet */ + + if (init->cmd_dma_offset) { + if (S3_SAVAGE3D_SERIES(dev_priv->chipset)) { + DRM_ERROR("command DMA not supported on " + "Savage3D/MX/IX.\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(EINVAL); + } + if (dev->dma && dev->dma->buflist) { + DRM_ERROR("command and vertex DMA not supported " + "at the same time.\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(EINVAL); + } dev_priv->cmd_dma = drm_core_findmap(dev, init->cmd_dma_offset); if (!dev_priv->cmd_dma) { DRM_ERROR("could not find command DMA region!\n"); savage_do_cleanup_bci(dev); return DRM_ERR(EINVAL); } + if (dev_priv->dma_type == SAVAGE_DMA_AGP) { + if (dev_priv->cmd_dma->type != _DRM_AGP) { + DRM_ERROR("AGP command DMA region is not a " + "_DRM_AGP map!\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(EINVAL); + } + drm_core_ioremap(dev_priv->cmd_dma, dev); + if (!dev_priv->cmd_dma->handle) { + DRM_ERROR("failed to ioremap command " + "DMA region!\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(ENOMEM); + } + } else if (dev_priv->cmd_dma->type != _DRM_CONSISTENT) { + DRM_ERROR("PCI command DMA region is not a " + "_DRM_CONSISTENT map!\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(EINVAL); + } } else { dev_priv->cmd_dma = NULL; } - if (dev_priv->cmd_dma && dev_priv->dma_type == SAVAGE_DMA_AGP) { - drm_core_ioremap(dev_priv->cmd_dma, dev); - if (!dev_priv->cmd_dma->handle) { - DRM_ERROR("failed to ioremap command DMA region!\n"); + dev_priv->dma_flush = savage_dma_flush; + if (!dev_priv->cmd_dma) { + DRM_DEBUG("falling back to faked command DMA.\n"); + dev_priv->fake_dma.offset = 0; + dev_priv->fake_dma.size = SAVAGE_FAKE_DMA_SIZE; + dev_priv->fake_dma.type = _DRM_SHM; + dev_priv->fake_dma.handle = drm_alloc(SAVAGE_FAKE_DMA_SIZE, + DRM_MEM_DRIVER); + if (!dev_priv->fake_dma.handle) { + DRM_ERROR("could not allocate faked DMA buffer!\n"); savage_do_cleanup_bci(dev); return DRM_ERR(ENOMEM); } + dev_priv->cmd_dma = &dev_priv->fake_dma; + dev_priv->dma_flush = savage_fake_dma_flush; } dev_priv->sarea_priv = @@ -578,6 +845,12 @@ static int savage_do_init_bci(drm_device_t *dev, drm_savage_init_t *init) return DRM_ERR(ENOMEM); } + if (savage_dma_init(dev_priv) < 0) { + DRM_ERROR("could not initialize command DMA\n"); + savage_do_cleanup_bci(dev); + return DRM_ERR(ENOMEM); + } + return 0; } @@ -585,9 +858,29 @@ int savage_do_cleanup_bci(drm_device_t *dev) { drm_savage_private_t *dev_priv = dev->dev_private; - if (dev_priv->cmd_dma && dev_priv->dma_type == SAVAGE_DMA_AGP) + if (dev_priv->cmd_dma == &dev_priv->fake_dma) { + if (dev_priv->fake_dma.handle) + drm_free(dev_priv->fake_dma.handle, + SAVAGE_FAKE_DMA_SIZE, DRM_MEM_DRIVER); + } else if (dev_priv->cmd_dma && dev_priv->cmd_dma->handle && + dev_priv->cmd_dma->type == _DRM_AGP && + dev_priv->dma_type == SAVAGE_DMA_AGP) drm_core_ioremapfree(dev_priv->cmd_dma, dev); + if (dev_priv->dma_type == SAVAGE_DMA_AGP && + dev->agp_buffer_map && dev->agp_buffer_map->handle) { + drm_core_ioremapfree(dev->agp_buffer_map, dev); + /* make sure the next instance (which may be running + * in PCI mode) doesn't try to use an old + * agp_buffer_map. */ + dev->agp_buffer_map = NULL; + } + + if (dev_priv->dma_pages) + drm_free(dev_priv->dma_pages, + sizeof(drm_savage_dma_page_t)*dev_priv->nr_dma_pages, + DRM_MEM_DRIVER); + return 0; } @@ -651,7 +944,7 @@ int savage_bci_event_wait(DRM_IOCTL_ARGS) hw_e = SAVAGE_READ(SAVAGE_STATUS_WORD1) & 0xffff; hw_w = dev_priv->event_wrap; if (hw_e > dev_priv->event_counter) - hw_w--; /* hardware hasn't passed the last wrap yet */ + hw_w--; /* hardware hasn't passed the last wrap yet */ event_e = event.count & 0xffff; event_w = event.count >> 16; diff --git a/shared-core/savage_drv.h b/shared-core/savage_drv.h index 2b44e529..ae37a48f 100644 --- a/shared-core/savage_drv.h +++ b/shared-core/savage_drv.h @@ -30,10 +30,10 @@ #define DRIVER_NAME "savage" #define DRIVER_DESC "Savage3D/MX/IX, Savage4, SuperSavage, Twister, ProSavage[DDR]" -#define DRIVER_DATE "20050222" +#define DRIVER_DATE "20050305" #define DRIVER_MAJOR 2 -#define DRIVER_MINOR 3 +#define DRIVER_MINOR 4 #define DRIVER_PATCHLEVEL 0 /* Interface history: * @@ -45,6 +45,8 @@ * 2.3 Event counters used by BCI_EVENT_EMIT/WAIT ioctls are now 32 bits * wide and thus very long lived (unlikely to ever wrap). The size * in the struct was 32 bits before, but only 16 bits were used + * 2.4 Implemented command DMA. Now drm_savage_init_t.cmd_dma_offset is + * actually used */ typedef struct drm_savage_age { @@ -59,6 +61,16 @@ typedef struct drm_savage_buf_priv { drm_buf_t *buf; } drm_savage_buf_priv_t; +typedef struct drm_savage_dma_page { + drm_savage_age_t age; + unsigned int used; +} drm_savage_dma_page_t; +#define SAVAGE_DMA_PAGE_SIZE 1024 /* in dwords */ +/* Fake DMA buffer size in bytes. 4 pages. Allows a maximum command + * size of 16kbytes or 4k entries. Minimum requirement would be + * 10kbytes for 255 40-byte vertices in one drawing command. */ +#define SAVAGE_FAKE_DMA_SIZE (SAVAGE_DMA_PAGE_SIZE*4*4) + /* interesting bits of hardware state that are saved in dev_priv */ typedef union { struct drm_savage_common_state { @@ -143,6 +155,7 @@ typedef struct drm_savage_private { drm_local_map_t *status; drm_local_map_t *agp_textures; drm_local_map_t *cmd_dma; + drm_local_map_t fake_dma; struct { int handle; @@ -155,6 +168,10 @@ typedef struct drm_savage_private { uint16_t event_counter; unsigned int event_wrap; + /* Savage4 command DMA */ + drm_savage_dma_page_t *dma_pages; + unsigned int nr_dma_pages, first_dma_page, current_dma_page; + /* saved hw state for global/local check on S3D */ uint32_t hw_draw_ctrl, hw_zbuf_ctrl; /* and for scissors (global, so don't emit if not changed) */ @@ -172,6 +189,7 @@ typedef struct drm_savage_private { * Avoid unwanted macro expansion. */ void (*emit_clip_rect)(struct drm_savage_private *dev_priv, drm_clip_rect_t *pbox); + void (*dma_flush)(struct drm_savage_private *dev_priv); } drm_savage_private_t; /* ioctls */ @@ -185,6 +203,10 @@ extern int savage_bci_buffers(DRM_IOCTL_ARGS); extern uint16_t savage_bci_emit_event(drm_savage_private_t *dev_priv, unsigned int flags); extern void savage_freelist_put(drm_device_t *dev, drm_buf_t *buf); +extern void savage_dma_reset(drm_savage_private_t *dev_priv); +extern void savage_dma_wait(drm_savage_private_t *dev_priv, unsigned int page); +extern uint32_t *savage_dma_alloc(drm_savage_private_t *dev_priv, + unsigned int n); extern int savage_preinit(drm_device_t *dev, unsigned long chipset); extern int savage_postcleanup(drm_device_t *dev); extern int savage_do_cleanup_bci(drm_device_t *dev); @@ -290,6 +312,7 @@ extern void savage_emit_clip_rect_s4(drm_savage_private_t *dev_priv, /* common stuff */ #define SAVAGE_VERTBUFADDR 0x3e #define SAVAGE_BITPLANEWTMASK 0xd7 +#define SAVAGE_DMABUFADDR 0x51 /* texture enable bits (needed for tex addr checking) */ #define SAVAGE_TEXCTRL_TEXEN_MASK 0x00010000 /* S3D */ @@ -408,6 +431,8 @@ extern void savage_emit_clip_rect_s4(drm_savage_private_t *dev_priv, #define BCI_CMD_DRAW_NO_V1 0x00000080 #define BCI_CMD_DRAW_NO_UV1 0x000000c0 +#define BCI_CMD_DMA 0xa8000000 + #define BCI_W_H(w, h) ((((h) << 16) | (w)) & 0x0FFF0FFF) #define BCI_X_Y(x, y) ((((y) << 16) | (x)) & 0x0FFF0FFF) #define BCI_X_W(x, y) ((((w) << 16) | (x)) & 0x0FFF0FFF) @@ -431,10 +456,17 @@ extern void savage_emit_clip_rect_s4(drm_savage_private_t *dev_priv, BCI_WRITE(BCI_CMD_SET_REGISTER | \ ((uint32_t)(n) & 0xff) << 16 | \ ((uint32_t)(first) & 0xffff)) +#define DMA_SET_REGISTERS( first, n ) \ + DMA_WRITE(BCI_CMD_SET_REGISTER | \ + ((uint32_t)(n) & 0xff) << 16 | \ + ((uint32_t)(first) & 0xffff)) #define BCI_DRAW_PRIMITIVE(n, type, skip) \ BCI_WRITE(BCI_CMD_DRAW_PRIM | (type) | (skip) | \ ((n) << 16)) +#define DMA_DRAW_PRIMITIVE(n, type, skip) \ + DMA_WRITE(BCI_CMD_DRAW_PRIM | (type) | (skip) | \ + ((n) << 16)) #define BCI_DRAW_INDICES_S3D(n, type, i0) \ BCI_WRITE(BCI_CMD_DRAW_INDEXED_PRIM | (type) | \ @@ -444,6 +476,9 @@ extern void savage_emit_clip_rect_s4(drm_savage_private_t *dev_priv, BCI_WRITE(BCI_CMD_DRAW_INDEXED_PRIM | (type) | \ (skip) | ((n) << 16)) +#define BCI_DMA(n) \ + BCI_WRITE(BCI_CMD_DMA | (((n) >> 1) - 1)) + /* * access to MMIO */ @@ -473,6 +508,54 @@ extern void savage_emit_clip_rect_s4(drm_savage_private_t *dev_priv, } \ } while(0) +/* + * command DMA support + */ +#define SAVAGE_DMA_DEBUG 1 + +#define DMA_LOCALS uint32_t *dma_ptr; + +#define BEGIN_DMA( n ) do { \ + unsigned int cur = dev_priv->current_dma_page; \ + unsigned int rest = SAVAGE_DMA_PAGE_SIZE - \ + dev_priv->dma_pages[cur].used; \ + if ((n) > rest) { \ + dma_ptr = savage_dma_alloc(dev_priv, (n)); \ + } else { /* fast path for small allocations */ \ + dma_ptr = (uint32_t *)dev_priv->cmd_dma->handle + \ + cur * SAVAGE_DMA_PAGE_SIZE + \ + dev_priv->dma_pages[cur].used; \ + if (dev_priv->dma_pages[cur].used == 0) \ + savage_dma_wait(dev_priv, cur); \ + dev_priv->dma_pages[cur].used += (n); \ + } \ +} while(0) + +#define DMA_WRITE( val ) *dma_ptr++ = (uint32_t)(val) + +#define DMA_COPY_FROM_USER(src,n) do { \ + DRM_COPY_FROM_USER_UNCHECKED(dma_ptr, (src), (n)*4); \ + dma_ptr += n; \ +} while(0) + +#if SAVAGE_DMA_DEBUG +#define DMA_COMMIT() do { \ + unsigned int cur = dev_priv->current_dma_page; \ + uint32_t *expected = (uint32_t *)dev_priv->cmd_dma->handle + \ + cur * SAVAGE_DMA_PAGE_SIZE + \ + dev_priv->dma_pages[cur].used; \ + if (dma_ptr != expected) { \ + DRM_ERROR("DMA allocation and use don't match: " \ + "%p != %p\n", expected, dma_ptr); \ + savage_dma_reset(dev_priv); \ + } \ +} while(0) +#else +#define DMA_COMMIT() do {/* nothing */} while(0) +#endif + +#define DMA_FLUSH() dev_priv->dma_flush(dev_priv) + /* Buffer aging via event tag */ diff --git a/shared-core/savage_state.c b/shared-core/savage_state.c index f1e424a7..cc386527 100644 --- a/shared-core/savage_state.c +++ b/shared-core/savage_state.c @@ -39,15 +39,16 @@ void savage_emit_clip_rect_s3d(drm_savage_private_t *dev_priv, ((((uint32_t)pbox->y2-1) << 16) & 0x07ff0000); if (scstart != dev_priv->state.s3d.scstart || scend != dev_priv->state.s3d.scend) { - BCI_LOCALS; - BEGIN_BCI(4); - BCI_WRITE(BCI_CMD_WAIT|BCI_CMD_WAIT_3D); - BCI_SET_REGISTERS(SAVAGE_SCSTART_S3D, 2); - BCI_WRITE(scstart); - BCI_WRITE(scend); + DMA_LOCALS; + BEGIN_DMA(4); + DMA_WRITE(BCI_CMD_WAIT|BCI_CMD_WAIT_3D); + DMA_SET_REGISTERS(SAVAGE_SCSTART_S3D, 2); + DMA_WRITE(scstart); + DMA_WRITE(scend); dev_priv->state.s3d.scstart = scstart; dev_priv->state.s3d.scend = scend; dev_priv->waiting = 1; + DMA_COMMIT(); } } @@ -64,15 +65,16 @@ void savage_emit_clip_rect_s4(drm_savage_private_t *dev_priv, ((((uint32_t)pbox->y2-1) << 12) & 0x00fff000); if (drawctrl0 != dev_priv->state.s4.drawctrl0 || drawctrl1 != dev_priv->state.s4.drawctrl1) { - BCI_LOCALS; - BEGIN_BCI(4); - BCI_WRITE(BCI_CMD_WAIT|BCI_CMD_WAIT_3D); - BCI_SET_REGISTERS(SAVAGE_DRAWCTRL0_S4, 2); - BCI_WRITE(drawctrl0); - BCI_WRITE(drawctrl1); + DMA_LOCALS; + BEGIN_DMA(4); + DMA_WRITE(BCI_CMD_WAIT|BCI_CMD_WAIT_3D); + DMA_SET_REGISTERS(SAVAGE_DRAWCTRL0_S4, 2); + DMA_WRITE(drawctrl0); + DMA_WRITE(drawctrl1); dev_priv->state.s4.drawctrl0 = drawctrl0; dev_priv->state.s4.drawctrl1 = drawctrl1; dev_priv->waiting = 1; + DMA_COMMIT(); } } @@ -192,7 +194,7 @@ static int savage_dispatch_state(drm_savage_private_t *dev_priv, const drm_savage_cmd_header_t *cmd_header, const uint32_t __user *regs) { - BCI_LOCALS; + DMA_LOCALS; unsigned int count = cmd_header->state.count; unsigned int start = cmd_header->state.start; unsigned int count2 = 0; @@ -244,18 +246,18 @@ static int savage_dispatch_state(drm_savage_private_t *dev_priv, bci_size = count + (count+254)/255 + count2 + (count2+254)/255; if (cmd_header->state.global) { - BEGIN_BCI(bci_size+1); - BCI_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D); + BEGIN_DMA(bci_size+1); + DMA_WRITE(BCI_CMD_WAIT | BCI_CMD_WAIT_3D); dev_priv->waiting = 1; } else { - BEGIN_BCI(bci_size); + BEGIN_DMA(bci_size); } do { while (count > 0) { unsigned int n = count < 255 ? count : 255; - BCI_SET_REGISTERS(start, n); - BCI_COPY_FROM_USER(regs, n); + DMA_SET_REGISTERS(start, n); + DMA_COPY_FROM_USER(regs, n); count -= n; start += n; regs += n; @@ -266,6 +268,8 @@ static int savage_dispatch_state(drm_savage_private_t *dev_priv, count2 = 0; } while (count); + DMA_COMMIT(); + return 0; } @@ -281,6 +285,11 @@ static int savage_dispatch_dma_prim(drm_savage_private_t *dev_priv, unsigned int start = cmd_header->prim.start; unsigned int i; + if (!dmabuf) { + DRM_ERROR("called without dma buffers!\n"); + return DRM_ERR(EINVAL); + } + if (!n) return 0; @@ -335,6 +344,11 @@ static int savage_dispatch_dma_prim(drm_savage_private_t *dev_priv, return DRM_ERR(EINVAL); } + /* Vertex DMA doesn't work with command DMA at the same time, + * so we use BCI_... to submit commands here. Flush buffered + * faked DMA first. */ + DMA_FLUSH(); + if (dmabuf->bus_address != dev_priv->state.common.vbaddr) { BEGIN_BCI(2); BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1); @@ -405,7 +419,7 @@ static int savage_dispatch_vb_prim(drm_savage_private_t *dev_priv, unsigned int vb_size, unsigned int vb_stride) { - BCI_LOCALS; + DMA_LOCALS; unsigned char reorder = 0; unsigned int prim = cmd_header->prim.prim; unsigned int skip = cmd_header->prim.skip; @@ -482,28 +496,32 @@ static int savage_dispatch_vb_prim(drm_savage_private_t *dev_priv, int reorder[3] = {-1, -1, -1}; reorder[start%3] = 2; - BEGIN_BCI(count*vtx_size+1); - BCI_DRAW_PRIMITIVE(count, prim, skip); + BEGIN_DMA(count*vtx_size+1); + DMA_DRAW_PRIMITIVE(count, prim, skip); for (i = start; i < start+count; ++i) { unsigned int j = i + reorder[i % 3]; - BCI_COPY_FROM_USER(&vtxbuf[vb_stride*j], + DMA_COPY_FROM_USER(&vtxbuf[vb_stride*j], vtx_size); } + + DMA_COMMIT(); } else { - BEGIN_BCI(count*vtx_size+1); - BCI_DRAW_PRIMITIVE(count, prim, skip); + BEGIN_DMA(count*vtx_size+1); + DMA_DRAW_PRIMITIVE(count, prim, skip); if (vb_stride == vtx_size) { - BCI_COPY_FROM_USER(&vtxbuf[vb_stride*start], + DMA_COPY_FROM_USER(&vtxbuf[vb_stride*start], vtx_size*count); } else { for (i = start; i < start+count; ++i) { - BCI_COPY_FROM_USER( + DMA_COPY_FROM_USER( &vtxbuf[vb_stride*i], vtx_size); } } + + DMA_COMMIT(); } start += count; @@ -527,6 +545,11 @@ static int savage_dispatch_dma_idx(drm_savage_private_t *dev_priv, unsigned int n = cmd_header->idx.count; unsigned int i; + if (!dmabuf) { + DRM_ERROR("called without dma buffers!\n"); + return DRM_ERR(EINVAL); + } + if (!n) return 0; @@ -575,6 +598,11 @@ static int savage_dispatch_dma_idx(drm_savage_private_t *dev_priv, } } + /* Vertex DMA doesn't work with command DMA at the same time, + * so we use BCI_... to submit commands here. Flush buffered + * faked DMA first. */ + DMA_FLUSH(); + if (dmabuf->bus_address != dev_priv->state.common.vbaddr) { BEGIN_BCI(2); BCI_SET_REGISTERS(SAVAGE_VERTBUFADDR, 1); @@ -658,7 +686,7 @@ static int savage_dispatch_vb_idx(drm_savage_private_t *dev_priv, unsigned int vb_size, unsigned int vb_stride) { - BCI_LOCALS; + DMA_LOCALS; unsigned char reorder = 0; unsigned int prim = cmd_header->idx.prim; unsigned int skip = cmd_header->idx.skip; @@ -740,23 +768,27 @@ static int savage_dispatch_vb_idx(drm_savage_private_t *dev_priv, * for correct culling. Only on Savage3D. */ int reorder[3] = {2, -1, -1}; - BEGIN_BCI(count*vtx_size+1); - BCI_DRAW_PRIMITIVE(count, prim, skip); + BEGIN_DMA(count*vtx_size+1); + DMA_DRAW_PRIMITIVE(count, prim, skip); for (i = 0; i < count; ++i) { unsigned int j = idx[i + reorder[i % 3]]; - BCI_COPY_FROM_USER(&vtxbuf[vb_stride*j], + DMA_COPY_FROM_USER(&vtxbuf[vb_stride*j], vtx_size); } + + DMA_COMMIT(); } else { - BEGIN_BCI(count*vtx_size+1); - BCI_DRAW_PRIMITIVE(count, prim, skip); + BEGIN_DMA(count*vtx_size+1); + DMA_DRAW_PRIMITIVE(count, prim, skip); for (i = 0; i < count; ++i) { unsigned int j = idx[i]; - BCI_COPY_FROM_USER(&vtxbuf[vb_stride*j], + DMA_COPY_FROM_USER(&vtxbuf[vb_stride*j], vtx_size); } + + DMA_COMMIT(); } usr_idx += count; @@ -774,7 +806,7 @@ static int savage_dispatch_clear(drm_savage_private_t *dev_priv, unsigned int nbox, const drm_clip_rect_t __user *usr_boxes) { - BCI_LOCALS; + DMA_LOCALS; unsigned int flags = cmd_header->clear0.flags, mask, value; unsigned int clear_cmd; unsigned int i, nbufs; @@ -799,9 +831,10 @@ static int savage_dispatch_clear(drm_savage_private_t *dev_priv, if (mask != 0xffffffff) { /* set mask */ - BEGIN_BCI(2); - BCI_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1); - BCI_WRITE(mask); + BEGIN_DMA(2); + DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1); + DMA_WRITE(mask); + DMA_COMMIT(); } for (i = 0; i < nbox; ++i) { drm_clip_rect_t box; @@ -811,35 +844,37 @@ static int savage_dispatch_clear(drm_savage_private_t *dev_priv, x = box.x1, y = box.y1; w = box.x2 - box.x1; h = box.y2 - box.y1; - BEGIN_BCI(nbufs*6); + BEGIN_DMA(nbufs*6); for (buf = SAVAGE_FRONT; buf <= SAVAGE_DEPTH; buf <<= 1) { if (!(flags & buf)) continue; - BCI_WRITE(clear_cmd); + DMA_WRITE(clear_cmd); switch(buf) { case SAVAGE_FRONT: - BCI_WRITE(dev_priv->front_offset); - BCI_WRITE(dev_priv->front_bd); + DMA_WRITE(dev_priv->front_offset); + DMA_WRITE(dev_priv->front_bd); break; case SAVAGE_BACK: - BCI_WRITE(dev_priv->back_offset); - BCI_WRITE(dev_priv->back_bd); + DMA_WRITE(dev_priv->back_offset); + DMA_WRITE(dev_priv->back_bd); break; case SAVAGE_DEPTH: - BCI_WRITE(dev_priv->depth_offset); - BCI_WRITE(dev_priv->depth_bd); + DMA_WRITE(dev_priv->depth_offset); + DMA_WRITE(dev_priv->depth_bd); break; } - BCI_WRITE(value); - BCI_WRITE(BCI_X_Y(x, y)); - BCI_WRITE(BCI_W_H(w, h)); + DMA_WRITE(value); + DMA_WRITE(BCI_X_Y(x, y)); + DMA_WRITE(BCI_W_H(w, h)); } + DMA_COMMIT(); } if (mask != 0xffffffff) { /* reset mask */ - BEGIN_BCI(2); - BCI_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1); - BCI_WRITE(0xffffffff); + BEGIN_DMA(2); + DMA_SET_REGISTERS(SAVAGE_BITPLANEWTMASK, 1); + DMA_WRITE(0xffffffff); + DMA_COMMIT(); } return 0; @@ -849,7 +884,7 @@ static int savage_dispatch_swap(drm_savage_private_t *dev_priv, unsigned int nbox, const drm_clip_rect_t __user *usr_boxes) { - BCI_LOCALS; + DMA_LOCALS; unsigned int swap_cmd; unsigned int i; @@ -864,13 +899,14 @@ static int savage_dispatch_swap(drm_savage_private_t *dev_priv, drm_clip_rect_t box; DRM_COPY_FROM_USER_UNCHECKED(&box, &usr_boxes[i], sizeof(box)); - BEGIN_BCI(6); - BCI_WRITE(swap_cmd); - BCI_WRITE(dev_priv->back_offset); - BCI_WRITE(dev_priv->back_bd); - BCI_WRITE(BCI_X_Y(box.x1, box.y1)); - BCI_WRITE(BCI_X_Y(box.x1, box.y1)); - BCI_WRITE(BCI_W_H(box.x2-box.x1, box.y2-box.y1)); + BEGIN_DMA(6); + DMA_WRITE(swap_cmd); + DMA_WRITE(dev_priv->back_offset); + DMA_WRITE(dev_priv->back_bd); + DMA_WRITE(BCI_X_Y(box.x1, box.y1)); + DMA_WRITE(BCI_X_Y(box.x1, box.y1)); + DMA_WRITE(BCI_W_H(box.x2-box.x1, box.y2-box.y1)); + DMA_COMMIT(); } return 0; @@ -967,12 +1003,16 @@ int savage_bci_cmdbuf(DRM_IOCTL_ARGS) DRM_COPY_FROM_USER_IOCTL(cmdbuf, (drm_savage_cmdbuf_t __user *)data, sizeof(cmdbuf)); - if (cmdbuf.dma_idx > dma->buf_count) { - DRM_ERROR("vertex buffer index %u out of range (0-%u)\n", - cmdbuf.dma_idx, dma->buf_count-1); - return DRM_ERR(EINVAL); + if (dma && dma->buflist) { + if (cmdbuf.dma_idx > dma->buf_count) { + DRM_ERROR("vertex buffer index %u out of range (0-%u)\n", + cmdbuf.dma_idx, dma->buf_count-1); + return DRM_ERR(EINVAL); + } + dmabuf = dma->buflist[cmdbuf.dma_idx]; + } else { + dmabuf = NULL; } - dmabuf = dma->buflist[cmdbuf.dma_idx]; usr_cmdbuf = (drm_savage_cmd_header_t __user *)cmdbuf.cmd_addr; usr_vtxbuf = (unsigned int __user *)cmdbuf.vb_addr; @@ -1011,6 +1051,7 @@ int savage_bci_cmdbuf(DRM_IOCTL_ARGS) if (i + j > cmdbuf.size) { DRM_ERROR("indexed drawing command extends " "beyond end of command buffer\n"); + DMA_FLUSH(); return DRM_ERR(EINVAL); } /* fall through */ @@ -1042,6 +1083,7 @@ int savage_bci_cmdbuf(DRM_IOCTL_ARGS) if (i + j > cmdbuf.size) { DRM_ERROR("command SAVAGE_CMD_STATE extends " "beyond end of command buffer\n"); + DMA_FLUSH(); return DRM_ERR(EINVAL); } ret = savage_dispatch_state( @@ -1054,6 +1096,7 @@ int savage_bci_cmdbuf(DRM_IOCTL_ARGS) if (i + 1 > cmdbuf.size) { DRM_ERROR("command SAVAGE_CMD_CLEAR extends " "beyond end of command buffer\n"); + DMA_FLUSH(); return DRM_ERR(EINVAL); } ret = savage_dispatch_clear(dev_priv, &cmd_header, @@ -1068,11 +1111,14 @@ int savage_bci_cmdbuf(DRM_IOCTL_ARGS) break; default: DRM_ERROR("invalid command 0x%x\n", cmd_header.cmd.cmd); + DMA_FLUSH(); return DRM_ERR(EINVAL); } - if (ret != 0) + if (ret != 0) { + DMA_FLUSH(); return ret; + } } if (first_draw_cmd) { @@ -1080,11 +1126,15 @@ int savage_bci_cmdbuf(DRM_IOCTL_ARGS) dev_priv, first_draw_cmd, usr_cmdbuf, dmabuf, usr_vtxbuf, cmdbuf.vb_size, cmdbuf.vb_stride, cmdbuf.nbox, usr_boxes); - if (ret != 0) + if (ret != 0) { + DMA_FLUSH(); return ret; + } } - if (cmdbuf.discard) { + DMA_FLUSH(); + + if (dmabuf && cmdbuf.discard) { drm_savage_buf_priv_t *buf_priv = dmabuf->dev_private; uint16_t event; event = savage_bci_emit_event(dev_priv, SAVAGE_WAIT_3D); |