From b03fa556b2c7e19d7021c017e35aaacaf24e5694 Mon Sep 17 00:00:00 2001
From: Keith Whitwell <keith@tungstengraphics.com>
Date: Fri, 6 Dec 2002 12:22:43 +0000
Subject: Rewrite radeon_cp_dispatch_texture() to avoid pingponging back to
 userspace     when issue large (multi-buffer) uploads.

---
 shared/radeon_state.c | 201 +++++++++++++++++++++++++-------------------------
 1 file changed, 99 insertions(+), 102 deletions(-)

(limited to 'shared')

diff --git a/shared/radeon_state.c b/shared/radeon_state.c
index 7b480a7e..1fe007b3 100644
--- a/shared/radeon_state.c
+++ b/shared/radeon_state.c
@@ -1074,19 +1074,30 @@ static int radeon_cp_dispatch_texture( drm_device_t *dev,
 	const u8 *data;
 	int size, dwords, tex_width, blit_width;
 	u32 y, height;
-	int ret = 0, i;
+	int i;
 	RING_LOCALS;
 
 	dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
 
-	/* FIXME: Be smarter about this...
+	/* Flush the pixel cache.  This ensures no pixel data gets mixed
+	 * up with the texture data from the host data blit, otherwise
+	 * part of the texture image may be corrupted.
 	 */
-	buf = radeon_freelist_get( dev );
-	if ( !buf ) return DRM_ERR(EAGAIN);
+	BEGIN_RING( 4 );
+	RADEON_FLUSH_CACHE();
+	RADEON_WAIT_UNTIL_IDLE();
+	ADVANCE_RING();
+
+#ifdef __BIG_ENDIAN
+	/* The Mesa texture functions provide the data in little endian as the
+	 * chip wants it, but we need to compensate for the fact that the CP
+	 * ring gets byte-swapped
+	 */
+	BEGIN_RING( 2 );
+	OUT_RING_REG( RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_32BIT );
+	ADVANCE_RING();
+#endif
 
-	DRM_DEBUG( "tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
-		   tex->offset >> 10, tex->pitch, tex->format,
-		   image->x, image->y, image->width, image->height );
 
 	/* The compiler won't optimize away a division by a variable,
 	 * even if the only legal values are powers of two.  Thus, we'll
@@ -1120,127 +1131,113 @@ static int radeon_cp_dispatch_texture( drm_device_t *dev,
 		return DRM_ERR(EINVAL);
 	}
 
-	DRM_DEBUG( "   tex=%dx%d  blit=%d\n",
-		   tex_width, tex->height, blit_width );
-
-	/* Flush the pixel cache.  This ensures no pixel data gets mixed
-	 * up with the texture data from the host data blit, otherwise
-	 * part of the texture image may be corrupted.
-	 */
-	BEGIN_RING( 4 );
-
-	RADEON_FLUSH_CACHE();
-	RADEON_WAIT_UNTIL_IDLE();
-
-	ADVANCE_RING();
-
-#ifdef __BIG_ENDIAN
-	/* The Mesa texture functions provide the data in little endian as the
-	 * chip wants it, but we need to compensate for the fact that the CP
-	 * ring gets byte-swapped
-	 */
-	BEGIN_RING( 2 );
-	OUT_RING_REG( RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_32BIT );
-	ADVANCE_RING();
-#endif
-
-	/* Make a copy of the parameters in case we have to update them
-	 * for a multi-pass texture blit.
-	 */
-	y = image->y;
-	height = image->height;
-	data = (const u8 *)image->data;
-
-	size = height * blit_width;
+	DRM_DEBUG("tex=%dx%d blit=%d\n", tex_width, tex->height, blit_width );
 
-	if ( size > RADEON_MAX_TEXTURE_SIZE ) {
-		/* Texture image is too large, do a multipass upload */
-		ret = DRM_ERR(EAGAIN);
+	do {
+		DRM_DEBUG( "tex: ofs=0x%x p=%d f=%d x=%hd y=%hd w=%hd h=%hd\n",
+			   tex->offset >> 10, tex->pitch, tex->format,
+			   image->x, image->y, image->width, image->height );
 
-		/* Adjust the blit size to fit the indirect buffer */
-		height = RADEON_MAX_TEXTURE_SIZE / blit_width;
+		/* Make a copy of the parameters in case we have to
+		 * update them for a multi-pass texture blit.
+		 */
+		y = image->y;
+		height = image->height;
+		data = (const u8 *)image->data;
+		
 		size = height * blit_width;
 
+		if ( size > RADEON_MAX_TEXTURE_SIZE ) {
+			height = RADEON_MAX_TEXTURE_SIZE / blit_width;
+			size = height * blit_width;
+		} else if ( size < 4 && size > 0 ) {
+			size = 4;
+		} else if ( size == 0 ) {
+			return 0;
+		}
+
 		/* Update the input parameters for next time */
 		image->y += height;
 		image->height -= height;
-		image->data = (const char *)image->data + size;
+		image->data += size;
 
-		if ( DRM_COPY_TO_USER( tex->image, image, sizeof(*image) ) ) {
-			DRM_ERROR( "EFAULT on tex->image\n" );
-			return DRM_ERR(EFAULT);
+		buf = radeon_freelist_get( dev );
+		if ( 0 && !buf ) {
+			radeon_do_cp_idle( dev_priv );
+			buf = radeon_freelist_get( dev );
+		}
+		if ( !buf ) {
+			DRM_DEBUG("radeon_cp_dispatch_texture: EAGAIN\n");
+			DRM_COPY_TO_USER( tex->image, image, sizeof(*image) );
+			return DRM_ERR(EAGAIN);
 		}
-	} else if ( size < 4 && size > 0 ) {
-		size = 4;
-	}
 
-	dwords = size / 4;
 
-	/* Dispatch the indirect buffer.
-	 */
-	buffer = (u32 *)((char *)dev_priv->buffers->handle + buf->offset);
-
-	buffer[0] = CP_PACKET3( RADEON_CNTL_HOSTDATA_BLT, dwords + 6 );
-	buffer[1] = (RADEON_GMC_DST_PITCH_OFFSET_CNTL |
-		     RADEON_GMC_BRUSH_NONE |
-		     (format << 8) |
-		     RADEON_GMC_SRC_DATATYPE_COLOR |
-		     RADEON_ROP3_S |
-		     RADEON_DP_SRC_SOURCE_HOST_DATA |
-		     RADEON_GMC_CLR_CMP_CNTL_DIS |
-		     RADEON_GMC_WR_MSK_DIS);
-
-	buffer[2] = (tex->pitch << 22) | (tex->offset >> 10);
-	buffer[3] = 0xffffffff;
-	buffer[4] = 0xffffffff;
-	buffer[5] = (y << 16) | image->x;
-	buffer[6] = (height << 16) | image->width;
-	buffer[7] = dwords;
-
-	buffer += 8;
-
-	if ( tex_width >= 32 ) {
-		/* Texture image width is larger than the minimum, so we
-		 * can upload it directly.
-		 */
-		if ( DRM_COPY_FROM_USER( buffer, data, dwords * sizeof(u32) ) ) {
-			DRM_ERROR( "EFAULT on data, %d dwords\n", dwords );
-			return DRM_ERR(EFAULT);
-		}
-	} else {
-		/* Texture image width is less than the minimum, so we
-		 * need to pad out each image scanline to the minimum
-		 * width.
+		/* Dispatch the indirect buffer.
 		 */
-		for ( i = 0 ; i < tex->height ; i++ ) {
-			if ( DRM_COPY_FROM_USER( buffer, data, tex_width ) ) {
-				DRM_ERROR( "EFAULT on pad, %d bytes\n",
-					   tex_width );
+		buffer = (u32*)((char*)dev_priv->buffers->handle + buf->offset);
+		dwords = size / 4;
+		buffer[0] = CP_PACKET3( RADEON_CNTL_HOSTDATA_BLT, dwords + 6 );
+		buffer[1] = (RADEON_GMC_DST_PITCH_OFFSET_CNTL |
+			     RADEON_GMC_BRUSH_NONE |
+			     (format << 8) |
+			     RADEON_GMC_SRC_DATATYPE_COLOR |
+			     RADEON_ROP3_S |
+			     RADEON_DP_SRC_SOURCE_HOST_DATA |
+			     RADEON_GMC_CLR_CMP_CNTL_DIS |
+			     RADEON_GMC_WR_MSK_DIS);
+		
+		buffer[2] = (tex->pitch << 22) | (tex->offset >> 10);
+		buffer[3] = 0xffffffff;
+		buffer[4] = 0xffffffff;
+		buffer[5] = (y << 16) | image->x;
+		buffer[6] = (height << 16) | image->width;
+		buffer[7] = dwords;
+		buffer += 8;
+
+		if ( tex_width >= 32 ) {
+			/* Texture image width is larger than the minimum, so we
+			 * can upload it directly.
+			 */
+			if ( DRM_COPY_FROM_USER( buffer, data, 
+						 dwords * sizeof(u32) ) ) {
+				DRM_ERROR( "EFAULT on data, %d dwords\n", 
+					   dwords );
 				return DRM_ERR(EFAULT);
 			}
-			buffer += 8;
-			data += tex_width;
+		} else {
+			/* Texture image width is less than the minimum, so we
+			 * need to pad out each image scanline to the minimum
+			 * width.
+			 */
+			for ( i = 0 ; i < tex->height ; i++ ) {
+				if ( DRM_COPY_FROM_USER( buffer, data, 
+							 tex_width ) ) {
+					DRM_ERROR( "EFAULT on pad, %d bytes\n",
+						   tex_width );
+					return DRM_ERR(EFAULT);
+				}
+				buffer += 8;
+				data += tex_width;
+			}
 		}
-	}
 
-	buf->pid = DRM_CURRENTPID;
-	buf->used = (dwords + 8) * sizeof(u32);
+		buf->pid = DRM_CURRENTPID;
+		buf->used = (dwords + 8) * sizeof(u32);
+		radeon_cp_dispatch_indirect( dev, buf, 0, buf->used );
+		radeon_cp_discard_buffer( dev, buf );
 
-	radeon_cp_dispatch_indirect( dev, buf, 0, buf->used );
-	radeon_cp_discard_buffer( dev, buf );
+	} while (image->height > 0);
 
 	/* Flush the pixel cache after the blit completes.  This ensures
 	 * the texture data is written out to memory before rendering
 	 * continues.
 	 */
 	BEGIN_RING( 4 );
-
 	RADEON_FLUSH_CACHE();
 	RADEON_WAIT_UNTIL_2D_IDLE();
-
 	ADVANCE_RING();
-
-	return ret;
+	return 0;
 }
 
 
-- 
cgit v1.2.3