diff options
| -rw-r--r-- | shared-core/radeon_cp.c | 12 | ||||
| -rw-r--r-- | shared-core/radeon_drm.h | 6 | ||||
| -rw-r--r-- | shared-core/radeon_drv.h | 18 | ||||
| -rw-r--r-- | shared-core/radeon_state.c | 176 | ||||
| -rw-r--r-- | shared/radeon.h | 6 | ||||
| -rw-r--r-- | shared/radeon_cp.c | 12 | ||||
| -rw-r--r-- | shared/radeon_drm.h | 6 | ||||
| -rw-r--r-- | shared/radeon_drv.h | 12 | ||||
| -rw-r--r-- | shared/radeon_state.c | 173 | 
9 files changed, 406 insertions, 15 deletions
diff --git a/shared-core/radeon_cp.c b/shared-core/radeon_cp.c index ea061ebc..3b3604ba 100644 --- a/shared-core/radeon_cp.c +++ b/shared-core/radeon_cp.c @@ -2007,6 +2007,18 @@ int radeon_preinit(struct drm_device *dev, unsigned long flags)  	dev->dev_private = (void *)dev_priv;  	dev_priv->flags = flags; +	switch (flags & CHIP_FAMILY_MASK) { +	case CHIP_R100: +	case CHIP_RV200: +	case CHIP_R200: +	case CHIP_R300: +		dev_priv->flags |= CHIP_HAS_HIERZ; +		break; +	default: +	/* all other chips have no hierarchical z buffer */ +		break; +	} +  #ifdef __linux__  	/* registers */  	if ((ret = drm_initmap(dev, pci_resource_start(dev->pdev, 2), diff --git a/shared-core/radeon_drm.h b/shared-core/radeon_drm.h index 7c600930..78c3e611 100644 --- a/shared-core/radeon_drm.h +++ b/shared-core/radeon_drm.h @@ -144,7 +144,8 @@  #define RADEON_EMIT_PP_TEX_SIZE_1                   74  #define RADEON_EMIT_PP_TEX_SIZE_2                   75  #define R200_EMIT_RB3D_BLENDCOLOR                   76 -#define RADEON_MAX_STATE_PACKETS                    77 +#define R200_EMIT_TCL_POINT_SPRITE_CNTL             77 +#define RADEON_MAX_STATE_PACKETS                    78  /* Commands understood by cmd_buffer ioctl.  More can be added but   * obviously these can't be removed or changed: @@ -189,6 +190,9 @@ typedef union {  #define RADEON_BACK			0x2  #define RADEON_DEPTH			0x4  #define RADEON_STENCIL                  0x8 +#define RADEON_CLEAR_FASTZ		0x80000000 +#define RADEON_USE_HIERZ		0x40000000 +#define RADEON_USE_COMP_ZBUF		0x20000000  /* Primitive types   */ diff --git a/shared-core/radeon_drv.h b/shared-core/radeon_drv.h index 617a7eda..cd75bc17 100644 --- a/shared-core/radeon_drv.h +++ b/shared-core/radeon_drv.h @@ -42,7 +42,7 @@  #define DRIVER_NAME		"radeon"  #define DRIVER_DESC		"ATI Radeon" -#define DRIVER_DATE		"20020828" +#define DRIVER_DATE		"20041207"  /* Interface history:   * @@ -78,10 +78,12 @@   *       and GL_EXT_blend_[func|equation]_separate on r200   * 1.12- Add R300 CP microcode support - this just loads the CP on r300   *       (No 3D support yet - just microcode loading). + * 1.13- Add packet R200_EMIT_TCL_POINT_SPRITE_CNTL for ARB_point_parameters + *     - Add hyperz support, add hyperz flags to clear ioctl.   */  #define DRIVER_MAJOR		1 -#define DRIVER_MINOR		12 +#define DRIVER_MINOR		13  #define DRIVER_PATCHLEVEL	0  enum radeon_family { @@ -117,6 +119,7 @@ enum radeon_chip_flags {  	CHIP_IS_IGP = 0x00020000UL,  	CHIP_SINGLE_CRTC = 0x00040000UL,  	CHIP_IS_AGP = 0x00080000UL, +	CHIP_HAS_HIERZ = 0x00100000UL,   };  #define GET_RING_HEAD(dev_priv)		DRM_READ32(  (dev_priv)->ring_rptr, 0 ) @@ -466,6 +469,7 @@ extern void radeon_driver_free_filp_priv(drm_device_t * dev,  #	define RADEON_STENCIL_ENABLE		(1 << 7)  #	define RADEON_Z_ENABLE			(1 << 8)  #define RADEON_RB3D_DEPTHOFFSET		0x1c24 +#define RADEON_RB3D_DEPTHCLEARVALUE	0x3230  #define RADEON_RB3D_DEPTHPITCH		0x1c28  #define RADEON_RB3D_PLANEMASK		0x1d84  #define RADEON_RB3D_STENCILREFMASK	0x1d7c @@ -478,11 +482,15 @@ extern void radeon_driver_free_filp_priv(drm_device_t * dev,  #define RADEON_RB3D_ZSTENCILCNTL	0x1c2c  #	define RADEON_Z_TEST_MASK		(7 << 4)  #	define RADEON_Z_TEST_ALWAYS		(7 << 4) +#	define RADEON_Z_HIERARCHY_ENABLE        (1 << 8)  #	define RADEON_STENCIL_TEST_ALWAYS	(7 << 12)  #	define RADEON_STENCIL_S_FAIL_REPLACE	(2 << 16)  #	define RADEON_STENCIL_ZPASS_REPLACE	(2 << 20)  #	define RADEON_STENCIL_ZFAIL_REPLACE	(2 << 24) +#	define RADEON_Z_COMPRESSION_ENABLE      (1 << 28) +#	define RADEON_FORCE_Z_DIRTY             (1 << 29)  #	define RADEON_Z_WRITE_ENABLE		(1 << 30) +#	define RADEON_Z_DECOMPRESSION_ENABLE    (1 << 31)  #define RADEON_RBBM_SOFT_RESET		0x00f0  #	define RADEON_SOFT_RESET_CP		(1 <<  0)  #	define RADEON_SOFT_RESET_HI		(1 <<  1) @@ -590,7 +598,7 @@ extern void radeon_driver_free_filp_priv(drm_device_t * dev,  #	define RADEON_WAIT_3D_IDLECLEAN		(1 << 17)  #	define RADEON_WAIT_HOST_IDLECLEAN	(1 << 18) -#define RADEON_RB3D_ZMASKOFFSET		0x1c34 +#define RADEON_RB3D_ZMASKOFFSET		0x3234  #define RADEON_RB3D_ZSTENCILCNTL	0x1c2c  #	define RADEON_DEPTH_FORMAT_16BIT_INT_Z	(0 << 0)  #	define RADEON_DEPTH_FORMAT_24BIT_INT_Z	(2 << 0) @@ -644,6 +652,8 @@ extern void radeon_driver_free_filp_priv(drm_device_t * dev,  #	define RADEON_3D_DRAW_IMMD		0x00002900  #	define RADEON_3D_DRAW_INDX		0x00002A00  #	define RADEON_3D_LOAD_VBPNTR		0x00002F00 +#	define RADEON_3D_CLEAR_ZMASK		0x00003200 +#	define RADEON_3D_CLEAR_HIZ		0x00003700  #	define RADEON_CNTL_HOSTDATA_BLT		0x00009400  #	define RADEON_CNTL_PAINT_MULTI		0x00009A00  #	define RADEON_CNTL_BITBLT_MULTI		0x00009B00 @@ -801,6 +811,8 @@ extern void radeon_driver_free_filp_priv(drm_device_t * dev,  #define R200_RB3D_BLENDCOLOR              0x3218 +#define R200_SE_TCL_POINT_SPRITE_CNTL     0x22c4 +  /* Constants */  #define RADEON_MAX_USEC_TIMEOUT		100000	/* 100 ms */ diff --git a/shared-core/radeon_state.c b/shared-core/radeon_state.c index ec85efac..5ec9b35e 100644 --- a/shared-core/radeon_state.c +++ b/shared-core/radeon_state.c @@ -271,6 +271,7 @@ static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *  	case RADEON_EMIT_PP_TEX_SIZE_1:  	case RADEON_EMIT_PP_TEX_SIZE_2:  	case R200_EMIT_RB3D_BLENDCOLOR: +	case R200_EMIT_TCL_POINT_SPRITE_CNTL:  		/* These packets don't contain memory offsets */  		break; @@ -646,7 +647,9 @@ static struct {  	RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"}, {  	RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"}, {  	RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"}, { -R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},}; +	R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"}, { +	R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"}, +};  /* ================================================================   * Performance monitoring functions @@ -858,11 +861,159 @@ static void radeon_cp_dispatch_clear(drm_device_t * dev,  		}  	} +	/* hyper z clear */ +	/* no docs available, based on reverse engeneering by Stephane Marchesin */ +	if ((flags & (RADEON_DEPTH | RADEON_STENCIL)) && (flags & RADEON_CLEAR_FASTZ)) { + +		int i; +		int depthpixperline = dev_priv->depth_fmt==RADEON_DEPTH_FORMAT_16BIT_INT_Z?  +			(dev_priv->depth_pitch / 2): (dev_priv->depth_pitch / 4); +		 +		u32 clearmask; + +		u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth | +			((clear->depth_mask & 0xff) << 24); +	 +		 +		/* Make sure we restore the 3D state next time. +		 * we haven't touched any "normal" state - still need this? +		 */ +		dev_priv->sarea_priv->ctx_owner = 0; + +		if ((dev_priv->flags & CHIP_HAS_HIERZ) && (flags & RADEON_USE_HIERZ)) { +		/* FIXME : reverse engineer that for Rx00 cards */ +		/* FIXME : the mask supposedly contains low-res z values. So can't set +		   just to the max (0xff? or actually 0x3fff?), need to take z clear +		   value into account? */ +		/* pattern seems to work for r100, though get slight +		   rendering errors with glxgears. If hierz is not enabled for r100, +		   only 4 bits which indicate clear (15,16,31,32, all zero) matter, the +		   other ones are ignored, and the same clear mask can be used. That's +		   very different behaviour than R200 which needs different clear mask +		   and different number of tiles to clear if hierz is enabled or not !?! +		*/ +			clearmask = (0xff<<22)|(0xff<<6)| 0x003f003f; +		} +		else { +		/* clear mask : chooses the clearing pattern. +		   rv250: could be used to clear only parts of macrotiles +		   (but that would get really complicated...)? +		   bit 0 and 1 (either or both of them ?!?!) are used to +		   not clear tile (or maybe one of the bits indicates if the tile is +		   compressed or not), bit 2 and 3 to not clear tile 1,...,. +		   Pattern is as follows: +		        | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29| +		   bits ------------------------------------------------- +		        | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31| +		   rv100: clearmask covers 2x8 4x1 tiles, but one clear still +		   covers 256 pixels ?!? +		*/ +			clearmask = 0x0; +		} + +		BEGIN_RING( 8 ); +		RADEON_WAIT_UNTIL_2D_IDLE(); +		OUT_RING_REG( RADEON_RB3D_DEPTHCLEARVALUE, +			tempRB3D_DEPTHCLEARVALUE); +		/* what offset is this exactly ? */ +		OUT_RING_REG( RADEON_RB3D_ZMASKOFFSET, 0 ); +		/* need ctlstat, otherwise get some strange black flickering */ +		OUT_RING_REG( RADEON_RB3D_ZCACHE_CTLSTAT, RADEON_RB3D_ZC_FLUSH_ALL ); +		ADVANCE_RING(); + +		for (i = 0; i < nbox; i++) { +			int tileoffset, nrtilesx, nrtilesy, j; +			/* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */ +			if ((dev_priv->flags&CHIP_HAS_HIERZ) && !(dev_priv->microcode_version==UCODE_R200)) { +				/* FIXME : figure this out for r200 (when hierz is enabled). Or +				   maybe r200 actually doesn't need to put the low-res z value into +				   the tile cache like r100, but just needs to clear the hi-level z-buffer? +				   Works for R100, both with hierz and without. +				   R100 seems to operate on 2x1 8x8 tiles, but... +				   odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially +				   problematic with resolutions which are not 64 pix aligned? */ +				tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 6; +				nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4; +				nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3); +				for (j = 0; j <= nrtilesy; j++) { +					BEGIN_RING( 4 ); +					OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) ); +					/* first tile */ +					OUT_RING( tileoffset * 8 ); +					/* the number of tiles to clear */ +					OUT_RING( nrtilesx + 4 ); +					/* clear mask : chooses the clearing pattern. */ +					OUT_RING( clearmask ); +					ADVANCE_RING(); +					tileoffset += depthpixperline >> 6; +				} +			} +			else if (dev_priv->microcode_version==UCODE_R200) { +				/* works for rv250. */ +				/* find first macro tile (8x2 4x4 z-pixels on rv250) */ +				tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 5; +				nrtilesx = (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5); +				nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3); +				for (j = 0; j <= nrtilesy; j++) { +					BEGIN_RING( 4 ); +					OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) ); +					/* first tile */ +					/* judging by the first tile offset needed, could possibly +					   directly address/clear 4x4 tiles instead of 8x2 * 4x4 +					   macro tiles, though would still need clear mask for +					   right/bottom if truely 4x4 granularity is desired ? */ +					OUT_RING( tileoffset * 16 ); +					/* the number of tiles to clear */ +					OUT_RING( nrtilesx + 1 ); +					/* clear mask : chooses the clearing pattern. */ +					OUT_RING( clearmask ); +					ADVANCE_RING(); +					tileoffset += depthpixperline >> 5; +				} +			} +			else { /* rv 100 */ +				/* rv100 might not need 64 pix alignment, who knows */ +				/* offsets are, hmm, weird */ +				tileoffset = ((pbox[i].y1 >> 4) * depthpixperline + pbox[i].x1) >> 6; +				nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4; +				nrtilesy = (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4); +				for (j = 0; j <= nrtilesy; j++) { +					BEGIN_RING( 4 ); +					OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) ); +					OUT_RING( tileoffset * 128 ); +					/* the number of tiles to clear */ +					OUT_RING( nrtilesx + 4 ); +					/* clear mask : chooses the clearing pattern. */ +					OUT_RING( clearmask ); +					ADVANCE_RING(); +					tileoffset += depthpixperline >> 6; +				} +			} +		} + +		/* TODO don't always clear all hi-level z tiles */ +		if ((dev_priv->flags & CHIP_HAS_HIERZ) && (dev_priv->microcode_version==UCODE_R200) +			&& (flags & RADEON_USE_HIERZ)) +		/* r100 and cards without hierarchical z-buffer have no high-level z-buffer */ +		/* FIXME : the mask supposedly contains low-res z values. So can't set +		   just to the max (0xff? or actually 0x3fff?), need to take z clear +		   value into account? */ +		{ +			BEGIN_RING( 4 ); +			OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_HIZ, 2 ) ); +			OUT_RING( 0x0 ); /* First tile */ +			OUT_RING( 0x3cc0 ); +			OUT_RING( (0xff<<22)|(0xff<<6)| 0x003f003f); +			ADVANCE_RING(); +		} +	} +  	/* We have to clear the depth and/or stencil buffers by  	 * rendering a quad into just those buffers.  Thus, we have to  	 * make sure the 3D engine is configured correctly.  	 */ -	if ((dev_priv->microcode_version == UCODE_R200) && (flags & (RADEON_DEPTH | RADEON_STENCIL))) { +	else if ((dev_priv->microcode_version == UCODE_R200) && +		(flags & (RADEON_DEPTH | RADEON_STENCIL))) {  		int tempPP_CNTL;  		int tempRE_CNTL; @@ -929,6 +1080,14 @@ static void radeon_cp_dispatch_clear(drm_device_t * dev,  			tempRB3D_STENCILREFMASK = 0x00000000;  		} +		if (flags & RADEON_USE_COMP_ZBUF) { +			tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE | +				RADEON_Z_DECOMPRESSION_ENABLE; +		} +		if (flags & RADEON_USE_HIERZ) { +			tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE; +		} +  		BEGIN_RING(26);  		RADEON_WAIT_UNTIL_2D_IDLE(); @@ -979,6 +1138,8 @@ static void radeon_cp_dispatch_clear(drm_device_t * dev,  		}  	} else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) { +		int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl; +		  		rb3d_cntl = depth_clear->rb3d_cntl;  		if (flags & RADEON_DEPTH) { @@ -995,6 +1156,14 @@ static void radeon_cp_dispatch_clear(drm_device_t * dev,  			rb3d_stencilrefmask = 0x00000000;  		} +		if (flags & RADEON_USE_COMP_ZBUF) { +			tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE | +				RADEON_Z_DECOMPRESSION_ENABLE; +		} +		if (flags & RADEON_USE_HIERZ) { +			tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE; +		} +  		BEGIN_RING(13);  		RADEON_WAIT_UNTIL_2D_IDLE(); @@ -1002,8 +1171,7 @@ static void radeon_cp_dispatch_clear(drm_device_t * dev,  		OUT_RING(0x00000000);  		OUT_RING(rb3d_cntl); -		OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, -			     depth_clear->rb3d_zstencilcntl); +		OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);  		OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);  		OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);  		OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl); diff --git a/shared/radeon.h b/shared/radeon.h index 06f50173..80bfa0c1 100644 --- a/shared/radeon.h +++ b/shared/radeon.h @@ -42,10 +42,10 @@  #define DRIVER_NAME		"radeon"  #define DRIVER_DESC		"ATI Radeon" -#define DRIVER_DATE		"20020828" +#define DRIVER_DATE		"20041207"  #define DRIVER_MAJOR		1 -#define DRIVER_MINOR		12 +#define DRIVER_MINOR		13  #define DRIVER_PATCHLEVEL	0  /* Interface history: @@ -82,6 +82,8 @@   *       and GL_EXT_blend_[func|equation]_separate on r200   * 1.12- Add R300 CP microcode support - this just loads the CP on r300   *       (No 3D support yet - just microcode loading). + * 1.13- Add packet R200_EMIT_TCL_POINT_SPRITE_CNTL for ARB_point_parameters + *     - Add hyperz support, add hyperz flags to clear ioctl.   */  #define DRIVER_IOCTLS							     \   [DRM_IOCTL_NR(DRM_IOCTL_DMA)]               = { radeon_cp_buffers,  1, 0 }, \ diff --git a/shared/radeon_cp.c b/shared/radeon_cp.c index 75a7bd52..5d13f479 100644 --- a/shared/radeon_cp.c +++ b/shared/radeon_cp.c @@ -2017,6 +2017,18 @@ int radeon_preinit( struct drm_device *dev, unsigned long flags )  	dev->dev_private = (void *)dev_priv;  	dev_priv->flags = flags; +	switch (flags & CHIP_FAMILY_MASK) { +	case CHIP_R100: +	case CHIP_RV200: +	case CHIP_R200: +	case CHIP_R300: +		dev_priv->flags |= CHIP_HAS_HIERZ; +		break; +	default: +	/* all other chips have no hierarchical z buffer */ +		break; +	} +  	/* registers */  	if( (ret = DRM(initmap)( dev, pci_resource_start( dev->pdev, 2 ),  			pci_resource_len( dev->pdev, 2 ), _DRM_REGISTERS, 0 ))) diff --git a/shared/radeon_drm.h b/shared/radeon_drm.h index 14d65ea1..e086938f 100644 --- a/shared/radeon_drm.h +++ b/shared/radeon_drm.h @@ -145,7 +145,8 @@  #define RADEON_EMIT_PP_TEX_SIZE_1                   74  #define RADEON_EMIT_PP_TEX_SIZE_2                   75  #define R200_EMIT_RB3D_BLENDCOLOR                   76 -#define RADEON_MAX_STATE_PACKETS                    77 +#define R200_EMIT_TCL_POINT_SPRITE_CNTL             77 +#define RADEON_MAX_STATE_PACKETS                    78  /* Commands understood by cmd_buffer ioctl.  More can be added but @@ -193,6 +194,9 @@ typedef union {  #define RADEON_BACK			0x2  #define RADEON_DEPTH			0x4  #define RADEON_STENCIL                  0x8 +#define RADEON_CLEAR_FASTZ		0x80000000 +#define RADEON_USE_HIERZ		0x40000000 +#define RADEON_USE_COMP_ZBUF		0x20000000  /* Primitive types   */ diff --git a/shared/radeon_drv.h b/shared/radeon_drv.h index 32a6c3fa..9e0e8fed 100644 --- a/shared/radeon_drv.h +++ b/shared/radeon_drv.h @@ -68,6 +68,7 @@ enum radeon_chip_flags {  	CHIP_IS_IGP		= 0x00020000UL,  	CHIP_SINGLE_CRTC	= 0x00040000UL,  	CHIP_IS_AGP		= 0x00080000UL,  +	CHIP_HAS_HIERZ		= 0x00100000UL,   };  #define GET_RING_HEAD(dev_priv)		DRM_READ32(  (dev_priv)->ring_rptr, 0 ) @@ -411,6 +412,7 @@ extern void radeon_driver_irq_uninstall( drm_device_t *dev );  #	define RADEON_STENCIL_ENABLE		(1 << 7)  #	define RADEON_Z_ENABLE			(1 << 8)  #define RADEON_RB3D_DEPTHOFFSET		0x1c24 +#define RADEON_RB3D_DEPTHCLEARVALUE	0x3230  #define RADEON_RB3D_DEPTHPITCH		0x1c28  #define RADEON_RB3D_PLANEMASK		0x1d84  #define RADEON_RB3D_STENCILREFMASK	0x1d7c @@ -423,11 +425,15 @@ extern void radeon_driver_irq_uninstall( drm_device_t *dev );  #define RADEON_RB3D_ZSTENCILCNTL	0x1c2c  #	define RADEON_Z_TEST_MASK		(7 << 4)  #	define RADEON_Z_TEST_ALWAYS		(7 << 4) +#	define RADEON_Z_HIERARCHY_ENABLE        (1 << 8)  #	define RADEON_STENCIL_TEST_ALWAYS	(7 << 12)  #	define RADEON_STENCIL_S_FAIL_REPLACE	(2 << 16)  #	define RADEON_STENCIL_ZPASS_REPLACE	(2 << 20)  #	define RADEON_STENCIL_ZFAIL_REPLACE	(2 << 24) +#	define RADEON_Z_COMPRESSION_ENABLE      (1 << 28) +#	define RADEON_FORCE_Z_DIRTY             (1 << 29)  #	define RADEON_Z_WRITE_ENABLE		(1 << 30) +#	define RADEON_Z_DECOMPRESSION_ENABLE    (1 << 31)  #define RADEON_RBBM_SOFT_RESET		0x00f0  #	define RADEON_SOFT_RESET_CP		(1 <<  0)  #	define RADEON_SOFT_RESET_HI		(1 <<  1) @@ -535,7 +541,7 @@ extern void radeon_driver_irq_uninstall( drm_device_t *dev );  #	define RADEON_WAIT_3D_IDLECLEAN		(1 << 17)  #	define RADEON_WAIT_HOST_IDLECLEAN	(1 << 18) -#define RADEON_RB3D_ZMASKOFFSET		0x1c34 +#define RADEON_RB3D_ZMASKOFFSET		0x3234  #define RADEON_RB3D_ZSTENCILCNTL	0x1c2c  #	define RADEON_DEPTH_FORMAT_16BIT_INT_Z	(0 << 0)  #	define RADEON_DEPTH_FORMAT_24BIT_INT_Z	(2 << 0) @@ -590,6 +596,8 @@ extern void radeon_driver_irq_uninstall( drm_device_t *dev );  #	define RADEON_3D_DRAW_IMMD		0x00002900  #	define RADEON_3D_DRAW_INDX		0x00002A00  #	define RADEON_3D_LOAD_VBPNTR		0x00002F00 +#	define RADEON_3D_CLEAR_ZMASK		0x00003200 +#	define RADEON_3D_CLEAR_HIZ		0x00003700  #	define RADEON_CNTL_HOSTDATA_BLT		0x00009400  #	define RADEON_CNTL_PAINT_MULTI		0x00009A00  #	define RADEON_CNTL_BITBLT_MULTI		0x00009B00 @@ -748,6 +756,8 @@ extern void radeon_driver_irq_uninstall( drm_device_t *dev );  #define R200_RB3D_BLENDCOLOR              0x3218 +#define R200_SE_TCL_POINT_SPRITE_CNTL     0x22c4 +  /* Constants */  #define RADEON_MAX_USEC_TIMEOUT		100000	/* 100 ms */ diff --git a/shared/radeon_state.c b/shared/radeon_state.c index 3cafd9a3..caba6a3a 100644 --- a/shared/radeon_state.c +++ b/shared/radeon_state.c @@ -205,6 +205,7 @@ static __inline__ int radeon_check_and_fixup_packets( drm_radeon_private_t *dev_  	case RADEON_EMIT_PP_TEX_SIZE_1:  	case RADEON_EMIT_PP_TEX_SIZE_2:  	case R200_EMIT_RB3D_BLENDCOLOR: +	case R200_EMIT_TCL_POINT_SPRITE_CNTL:  		/* These packets don't contain memory offsets */  		break; @@ -569,6 +570,7 @@ static struct {  	{ RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1" },  	{ RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2" },  	{ R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR" }, +	{ R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},  }; @@ -780,12 +782,159 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,  		}  	} +	/* hyper z clear */ +	/* no docs available, based on reverse engeneering by Stephane Marchesin */ +	if ((flags & (RADEON_DEPTH | RADEON_STENCIL)) && (flags & RADEON_CLEAR_FASTZ)) { + +		int i; +		int depthpixperline = dev_priv->depth_fmt==RADEON_DEPTH_FORMAT_16BIT_INT_Z?  +			(dev_priv->depth_pitch / 2): (dev_priv->depth_pitch / 4); +		 +		u32 clearmask; + +		u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth | +			((clear->depth_mask & 0xff) << 24); +	 +		 +		/* Make sure we restore the 3D state next time. +		 * we haven't touched any "normal" state - still need this? +		 */ +		dev_priv->sarea_priv->ctx_owner = 0; + +		if ((dev_priv->flags & CHIP_HAS_HIERZ) && (flags & RADEON_USE_HIERZ)) { +		/* FIXME : reverse engineer that for Rx00 cards */ +		/* FIXME : the mask supposedly contains low-res z values. So can't set +		   just to the max (0xff? or actually 0x3fff?), need to take z clear +		   value into account? */ +		/* pattern seems to work for r100, though get slight +		   rendering errors with glxgears. If hierz is not enabled for r100, +		   only 4 bits which indicate clear (15,16,31,32, all zero) matter, the +		   other ones are ignored, and the same clear mask can be used. That's +		   very different behaviour than R200 which needs different clear mask +		   and different number of tiles to clear if hierz is enabled or not !?! +		*/ +			clearmask = (0xff<<22)|(0xff<<6)| 0x003f003f; +		} +		else { +		/* clear mask : chooses the clearing pattern. +		   rv250: could be used to clear only parts of macrotiles +		   (but that would get really complicated...)? +		   bit 0 and 1 (either or both of them ?!?!) are used to +		   not clear tile (or maybe one of the bits indicates if the tile is +		   compressed or not), bit 2 and 3 to not clear tile 1,...,. +		   Pattern is as follows: +		        | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29| +		   bits ------------------------------------------------- +		        | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31| +		   rv100: clearmask covers 2x8 4x1 tiles, but one clear still +		   covers 256 pixels ?!? +		*/ +			clearmask = 0x0; +		} + +		BEGIN_RING( 8 ); +		RADEON_WAIT_UNTIL_2D_IDLE(); +		OUT_RING_REG( RADEON_RB3D_DEPTHCLEARVALUE, +			tempRB3D_DEPTHCLEARVALUE); +		/* what offset is this exactly ? */ +		OUT_RING_REG( RADEON_RB3D_ZMASKOFFSET, 0 ); +		/* need ctlstat, otherwise get some strange black flickering */ +		OUT_RING_REG( RADEON_RB3D_ZCACHE_CTLSTAT, RADEON_RB3D_ZC_FLUSH_ALL ); +		ADVANCE_RING(); + +		for (i = 0; i < nbox; i++) { +			int tileoffset, nrtilesx, nrtilesy, j; +			/* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */ +			if ((dev_priv->flags&CHIP_HAS_HIERZ) && !(dev_priv->microcode_version==UCODE_R200)) { +				/* FIXME : figure this out for r200 (when hierz is enabled). Or +				   maybe r200 actually doesn't need to put the low-res z value into +				   the tile cache like r100, but just needs to clear the hi-level z-buffer? +				   Works for R100, both with hierz and without. +				   R100 seems to operate on 2x1 8x8 tiles, but... +				   odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially +				   problematic with resolutions which are not 64 pix aligned? */ +				tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 6; +				nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4; +				nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3); +				for (j = 0; j <= nrtilesy; j++) { +					BEGIN_RING( 4 ); +					OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) ); +					/* first tile */ +					OUT_RING( tileoffset * 8 ); +					/* the number of tiles to clear */ +					OUT_RING( nrtilesx + 4 ); +					/* clear mask : chooses the clearing pattern. */ +					OUT_RING( clearmask ); +					ADVANCE_RING(); +					tileoffset += depthpixperline >> 6; +				} +			} +			else if (dev_priv->microcode_version==UCODE_R200) { +				/* works for rv250. */ +				/* find first macro tile (8x2 4x4 z-pixels on rv250) */ +				tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 5; +				nrtilesx = (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5); +				nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3); +				for (j = 0; j <= nrtilesy; j++) { +					BEGIN_RING( 4 ); +					OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) ); +					/* first tile */ +					/* judging by the first tile offset needed, could possibly +					   directly address/clear 4x4 tiles instead of 8x2 * 4x4 +					   macro tiles, though would still need clear mask for +					   right/bottom if truely 4x4 granularity is desired ? */ +					OUT_RING( tileoffset * 16 ); +					/* the number of tiles to clear */ +					OUT_RING( nrtilesx + 1 ); +					/* clear mask : chooses the clearing pattern. */ +					OUT_RING( clearmask ); +					ADVANCE_RING(); +					tileoffset += depthpixperline >> 5; +				} +			} +			else { /* rv 100 */ +				/* rv100 might not need 64 pix alignment, who knows */ +				/* offsets are, hmm, weird */ +				tileoffset = ((pbox[i].y1 >> 4) * depthpixperline + pbox[i].x1) >> 6; +				nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4; +				nrtilesy = (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4); +				for (j = 0; j <= nrtilesy; j++) { +					BEGIN_RING( 4 ); +					OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) ); +					OUT_RING( tileoffset * 128 ); +					/* the number of tiles to clear */ +					OUT_RING( nrtilesx + 4 ); +					/* clear mask : chooses the clearing pattern. */ +					OUT_RING( clearmask ); +					ADVANCE_RING(); +					tileoffset += depthpixperline >> 6; +				} +			} +		} + +		/* TODO don't always clear all hi-level z tiles */ +		if ((dev_priv->flags & CHIP_HAS_HIERZ) && (dev_priv->microcode_version==UCODE_R200) +			&& (flags & RADEON_USE_HIERZ)) +		/* r100 and cards without hierarchical z-buffer have no high-level z-buffer */ +		/* FIXME : the mask supposedly contains low-res z values. So can't set +		   just to the max (0xff? or actually 0x3fff?), need to take z clear +		   value into account? */ +		{ +			BEGIN_RING( 4 ); +			OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_HIZ, 2 ) ); +			OUT_RING( 0x0 ); /* First tile */ +			OUT_RING( 0x3cc0 ); +			OUT_RING( (0xff<<22)|(0xff<<6)| 0x003f003f); +			ADVANCE_RING(); +		} +	} +  	/* We have to clear the depth and/or stencil buffers by  	 * rendering a quad into just those buffers.  Thus, we have to  	 * make sure the 3D engine is configured correctly.  	 */ -	if ( (dev_priv->microcode_version==UCODE_R200) && -	     (flags & (RADEON_DEPTH | RADEON_STENCIL)) ) { +	else if ((dev_priv->microcode_version == UCODE_R200) && +		(flags & (RADEON_DEPTH | RADEON_STENCIL))) {  		int tempPP_CNTL;  		int tempRE_CNTL; @@ -855,6 +1004,14 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,  			tempRB3D_STENCILREFMASK = 0x00000000;  		} +		if (flags & RADEON_USE_COMP_ZBUF) { +			tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE | +				RADEON_Z_DECOMPRESSION_ENABLE; +		} +		if (flags & RADEON_USE_HIERZ) { +			tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE; +		} +  		BEGIN_RING( 26 );  		RADEON_WAIT_UNTIL_2D_IDLE(); @@ -909,6 +1066,8 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,  	}   	else if ( (flags & (RADEON_DEPTH | RADEON_STENCIL)) ) { +		int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl; +  		rb3d_cntl = depth_clear->rb3d_cntl;  		if ( flags & RADEON_DEPTH ) { @@ -925,6 +1084,14 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,  			rb3d_stencilrefmask = 0x00000000;  		} +		if (flags & RADEON_USE_COMP_ZBUF) { +			tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE | +				RADEON_Z_DECOMPRESSION_ENABLE; +		} +		if (flags & RADEON_USE_HIERZ) { +			tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE; +		} +  		BEGIN_RING( 13 );  		RADEON_WAIT_UNTIL_2D_IDLE(); @@ -933,7 +1100,7 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,  		OUT_RING( rb3d_cntl );  		OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL, -			      depth_clear->rb3d_zstencilcntl ); +			      tempRB3D_ZSTENCILCNTL );  		OUT_RING_REG( RADEON_RB3D_STENCILREFMASK,  			      rb3d_stencilrefmask );  		OUT_RING_REG( RADEON_RB3D_PLANEMASK,  | 
