From b03fa556b2c7e19d7021c017e35aaacaf24e5694 Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Fri, 6 Dec 2002 12:22:43 +0000 Subject: Rewrite radeon_cp_dispatch_texture() to avoid pingponging back to userspace when issue large (multi-buffer) uploads. --- shared/radeon_state.c | 201 +++++++++++++++++++++++++------------------------- 1 file changed, 99 insertions(+), 102 deletions(-) (limited to 'shared') diff --git a/shared/radeon_state.c b/shared/radeon_state.c index 7b480a7e..1fe007b3 100644 --- a/shared/radeon_state.c +++ b/shared/radeon_state.c @@ -1074,19 +1074,30 @@ static int radeon_cp_dispatch_texture( drm_device_t *dev, const u8 *data; int size, dwords, tex_width, blit_width; u32 y, height; - int ret = 0, i; + int i; RING_LOCALS; dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD; - /* FIXME: Be smarter about this... + /* Flush the pixel cache. This ensures no pixel data gets mixed + * up with the texture data from the host data blit, otherwise + * part of the texture image may be corrupted. */ - buf = radeon_freelist_get( dev ); - if ( !buf ) return DRM_ERR(EAGAIN); + BEGIN_RING( 4 ); + RADEON_FLUSH_CACHE(); + RADEON_WAIT_UNTIL_IDLE(); + ADVANCE_RING(); + +#ifdef __BIG_ENDIAN + /* The Mesa texture functions provide the data in little endian as the + * chip wants it, but we need to compensate for the fact that the CP + * ring gets byte-swapped + */ + BEGIN_RING( 2 ); + OUT_RING_REG( RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_32BIT ); + ADVANCE_RING(); +#endif - DRM_DEBUG( "tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n", - tex->offset >> 10, tex->pitch, tex->format, - image->x, image->y, image->width, image->height ); /* The compiler won't optimize away a division by a variable, * even if the only legal values are powers of two. Thus, we'll @@ -1120,127 +1131,113 @@ static int radeon_cp_dispatch_texture( drm_device_t *dev, return DRM_ERR(EINVAL); } - DRM_DEBUG( " tex=%dx%d blit=%d\n", - tex_width, tex->height, blit_width ); - - /* Flush the pixel cache. This ensures no pixel data gets mixed - * up with the texture data from the host data blit, otherwise - * part of the texture image may be corrupted. - */ - BEGIN_RING( 4 ); - - RADEON_FLUSH_CACHE(); - RADEON_WAIT_UNTIL_IDLE(); - - ADVANCE_RING(); - -#ifdef __BIG_ENDIAN - /* The Mesa texture functions provide the data in little endian as the - * chip wants it, but we need to compensate for the fact that the CP - * ring gets byte-swapped - */ - BEGIN_RING( 2 ); - OUT_RING_REG( RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_32BIT ); - ADVANCE_RING(); -#endif - - /* Make a copy of the parameters in case we have to update them - * for a multi-pass texture blit. - */ - y = image->y; - height = image->height; - data = (const u8 *)image->data; - - size = height * blit_width; + DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width ); - if ( size > RADEON_MAX_TEXTURE_SIZE ) { - /* Texture image is too large, do a multipass upload */ - ret = DRM_ERR(EAGAIN); + do { + DRM_DEBUG( "tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n", + tex->offset >> 10, tex->pitch, tex->format, + image->x, image->y, image->width, image->height ); - /* Adjust the blit size to fit the indirect buffer */ - height = RADEON_MAX_TEXTURE_SIZE / blit_width; + /* Make a copy of the parameters in case we have to + * update them for a multi-pass texture blit. + */ + y = image->y; + height = image->height; + data = (const u8 *)image->data; + size = height * blit_width; + if ( size > RADEON_MAX_TEXTURE_SIZE ) { + height = RADEON_MAX_TEXTURE_SIZE / blit_width; + size = height * blit_width; + } else if ( size < 4 && size > 0 ) { + size = 4; + } else if ( size == 0 ) { + return 0; + } + /* Update the input parameters for next time */ image->y += height; image->height -= height; - image->data = (const char *)image->data + size; + image->data += size; - if ( DRM_COPY_TO_USER( tex->image, image, sizeof(*image) ) ) { - DRM_ERROR( "EFAULT on tex->image\n" ); - return DRM_ERR(EFAULT); + buf = radeon_freelist_get( dev ); + if ( 0 && !buf ) { + radeon_do_cp_idle( dev_priv ); + buf = radeon_freelist_get( dev ); + } + if ( !buf ) { + DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n"); + DRM_COPY_TO_USER( tex->image, image, sizeof(*image) ); + return DRM_ERR(EAGAIN); } - } else if ( size < 4 && size > 0 ) { - size = 4; - } - dwords = size / 4; - /* Dispatch the indirect buffer. - */ - buffer = (u32 *)((char *)dev_priv->buffers->handle + buf->offset); - - buffer[0] = CP_PACKET3( RADEON_CNTL_HOSTDATA_BLT, dwords + 6 ); - buffer[1] = (RADEON_GMC_DST_PITCH_OFFSET_CNTL | - RADEON_GMC_BRUSH_NONE | - (format << 8) | - RADEON_GMC_SRC_DATATYPE_COLOR | - RADEON_ROP3_S | - RADEON_DP_SRC_SOURCE_HOST_DATA | - RADEON_GMC_CLR_CMP_CNTL_DIS | - RADEON_GMC_WR_MSK_DIS); - - buffer[2] = (tex->pitch << 22) | (tex->offset >> 10); - buffer[3] = 0xffffffff; - buffer[4] = 0xffffffff; - buffer[5] = (y << 16) | image->x; - buffer[6] = (height << 16) | image->width; - buffer[7] = dwords; - - buffer += 8; - - if ( tex_width >= 32 ) { - /* Texture image width is larger than the minimum, so we - * can upload it directly. - */ - if ( DRM_COPY_FROM_USER( buffer, data, dwords * sizeof(u32) ) ) { - DRM_ERROR( "EFAULT on data, %d dwords\n", dwords ); - return DRM_ERR(EFAULT); - } - } else { - /* Texture image width is less than the minimum, so we - * need to pad out each image scanline to the minimum - * width. + /* Dispatch the indirect buffer. */ - for ( i = 0 ; i < tex->height ; i++ ) { - if ( DRM_COPY_FROM_USER( buffer, data, tex_width ) ) { - DRM_ERROR( "EFAULT on pad, %d bytes\n", - tex_width ); + buffer = (u32*)((char*)dev_priv->buffers->handle + buf->offset); + dwords = size / 4; + buffer[0] = CP_PACKET3( RADEON_CNTL_HOSTDATA_BLT, dwords + 6 ); + buffer[1] = (RADEON_GMC_DST_PITCH_OFFSET_CNTL | + RADEON_GMC_BRUSH_NONE | + (format << 8) | + RADEON_GMC_SRC_DATATYPE_COLOR | + RADEON_ROP3_S | + RADEON_DP_SRC_SOURCE_HOST_DATA | + RADEON_GMC_CLR_CMP_CNTL_DIS | + RADEON_GMC_WR_MSK_DIS); + + buffer[2] = (tex->pitch << 22) | (tex->offset >> 10); + buffer[3] = 0xffffffff; + buffer[4] = 0xffffffff; + buffer[5] = (y << 16) | image->x; + buffer[6] = (height << 16) | image->width; + buffer[7] = dwords; + buffer += 8; + + if ( tex_width >= 32 ) { + /* Texture image width is larger than the minimum, so we + * can upload it directly. + */ + if ( DRM_COPY_FROM_USER( buffer, data, + dwords * sizeof(u32) ) ) { + DRM_ERROR( "EFAULT on data, %d dwords\n", + dwords ); return DRM_ERR(EFAULT); } - buffer += 8; - data += tex_width; + } else { + /* Texture image width is less than the minimum, so we + * need to pad out each image scanline to the minimum + * width. + */ + for ( i = 0 ; i < tex->height ; i++ ) { + if ( DRM_COPY_FROM_USER( buffer, data, + tex_width ) ) { + DRM_ERROR( "EFAULT on pad, %d bytes\n", + tex_width ); + return DRM_ERR(EFAULT); + } + buffer += 8; + data += tex_width; + } } - } - buf->pid = DRM_CURRENTPID; - buf->used = (dwords + 8) * sizeof(u32); + buf->pid = DRM_CURRENTPID; + buf->used = (dwords + 8) * sizeof(u32); + radeon_cp_dispatch_indirect( dev, buf, 0, buf->used ); + radeon_cp_discard_buffer( dev, buf ); - radeon_cp_dispatch_indirect( dev, buf, 0, buf->used ); - radeon_cp_discard_buffer( dev, buf ); + } while (image->height > 0); /* Flush the pixel cache after the blit completes. This ensures * the texture data is written out to memory before rendering * continues. */ BEGIN_RING( 4 ); - RADEON_FLUSH_CACHE(); RADEON_WAIT_UNTIL_2D_IDLE(); - ADVANCE_RING(); - - return ret; + return 0; } -- cgit v1.2.3