summaryrefslogtreecommitdiff
path: root/shared-core/radeon_state.c
diff options
context:
space:
mode:
authorRoland Scheidegger <rscheidegger_lists@hispeed.ch>2004-12-08 16:43:00 +0000
committerRoland Scheidegger <rscheidegger_lists@hispeed.ch>2004-12-08 16:43:00 +0000
commitc4a87c6883ede7bbf486743efe3e9325d96f8e54 (patch)
treeb5006f47de9c3acf0db8e5dff7e2c772e871ab79 /shared-core/radeon_state.c
parent98d01f9542d7f70aa10d68c0e41e631b5f156770 (diff)
(Stephane Marchesin, me) add hyperz support to radeon drm. Only fast z
clear and z buffer compression are working correctly, hierarchical-z is not.
Diffstat (limited to 'shared-core/radeon_state.c')
-rw-r--r--shared-core/radeon_state.c176
1 files changed, 172 insertions, 4 deletions
diff --git a/shared-core/radeon_state.c b/shared-core/radeon_state.c
index ec85efac..5ec9b35e 100644
--- a/shared-core/radeon_state.c
+++ b/shared-core/radeon_state.c
@@ -271,6 +271,7 @@ static __inline__ int radeon_check_and_fixup_packets(drm_radeon_private_t *
case RADEON_EMIT_PP_TEX_SIZE_1:
case RADEON_EMIT_PP_TEX_SIZE_2:
case R200_EMIT_RB3D_BLENDCOLOR:
+ case R200_EMIT_TCL_POINT_SPRITE_CNTL:
/* These packets don't contain memory offsets */
break;
@@ -646,7 +647,9 @@ static struct {
RADEON_PP_TEX_SIZE_0, 2, "RADEON_PP_TEX_SIZE_0"}, {
RADEON_PP_TEX_SIZE_1, 2, "RADEON_PP_TEX_SIZE_1"}, {
RADEON_PP_TEX_SIZE_2, 2, "RADEON_PP_TEX_SIZE_2"}, {
-R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"},};
+ R200_RB3D_BLENDCOLOR, 3, "R200_RB3D_BLENDCOLOR"}, {
+ R200_SE_TCL_POINT_SPRITE_CNTL, 1, "R200_SE_TCL_POINT_SPRITE_CNTL"},
+};
/* ================================================================
* Performance monitoring functions
@@ -858,11 +861,159 @@ static void radeon_cp_dispatch_clear(drm_device_t * dev,
}
}
+ /* hyper z clear */
+ /* no docs available, based on reverse engeneering by Stephane Marchesin */
+ if ((flags & (RADEON_DEPTH | RADEON_STENCIL)) && (flags & RADEON_CLEAR_FASTZ)) {
+
+ int i;
+ int depthpixperline = dev_priv->depth_fmt==RADEON_DEPTH_FORMAT_16BIT_INT_Z?
+ (dev_priv->depth_pitch / 2): (dev_priv->depth_pitch / 4);
+
+ u32 clearmask;
+
+ u32 tempRB3D_DEPTHCLEARVALUE = clear->clear_depth |
+ ((clear->depth_mask & 0xff) << 24);
+
+
+ /* Make sure we restore the 3D state next time.
+ * we haven't touched any "normal" state - still need this?
+ */
+ dev_priv->sarea_priv->ctx_owner = 0;
+
+ if ((dev_priv->flags & CHIP_HAS_HIERZ) && (flags & RADEON_USE_HIERZ)) {
+ /* FIXME : reverse engineer that for Rx00 cards */
+ /* FIXME : the mask supposedly contains low-res z values. So can't set
+ just to the max (0xff? or actually 0x3fff?), need to take z clear
+ value into account? */
+ /* pattern seems to work for r100, though get slight
+ rendering errors with glxgears. If hierz is not enabled for r100,
+ only 4 bits which indicate clear (15,16,31,32, all zero) matter, the
+ other ones are ignored, and the same clear mask can be used. That's
+ very different behaviour than R200 which needs different clear mask
+ and different number of tiles to clear if hierz is enabled or not !?!
+ */
+ clearmask = (0xff<<22)|(0xff<<6)| 0x003f003f;
+ }
+ else {
+ /* clear mask : chooses the clearing pattern.
+ rv250: could be used to clear only parts of macrotiles
+ (but that would get really complicated...)?
+ bit 0 and 1 (either or both of them ?!?!) are used to
+ not clear tile (or maybe one of the bits indicates if the tile is
+ compressed or not), bit 2 and 3 to not clear tile 1,...,.
+ Pattern is as follows:
+ | 0,1 | 4,5 | 8,9 |12,13|16,17|20,21|24,25|28,29|
+ bits -------------------------------------------------
+ | 2,3 | 6,7 |10,11|14,15|18,19|22,23|26,27|30,31|
+ rv100: clearmask covers 2x8 4x1 tiles, but one clear still
+ covers 256 pixels ?!?
+ */
+ clearmask = 0x0;
+ }
+
+ BEGIN_RING( 8 );
+ RADEON_WAIT_UNTIL_2D_IDLE();
+ OUT_RING_REG( RADEON_RB3D_DEPTHCLEARVALUE,
+ tempRB3D_DEPTHCLEARVALUE);
+ /* what offset is this exactly ? */
+ OUT_RING_REG( RADEON_RB3D_ZMASKOFFSET, 0 );
+ /* need ctlstat, otherwise get some strange black flickering */
+ OUT_RING_REG( RADEON_RB3D_ZCACHE_CTLSTAT, RADEON_RB3D_ZC_FLUSH_ALL );
+ ADVANCE_RING();
+
+ for (i = 0; i < nbox; i++) {
+ int tileoffset, nrtilesx, nrtilesy, j;
+ /* it looks like r200 needs rv-style clears, at least if hierz is not enabled? */
+ if ((dev_priv->flags&CHIP_HAS_HIERZ) && !(dev_priv->microcode_version==UCODE_R200)) {
+ /* FIXME : figure this out for r200 (when hierz is enabled). Or
+ maybe r200 actually doesn't need to put the low-res z value into
+ the tile cache like r100, but just needs to clear the hi-level z-buffer?
+ Works for R100, both with hierz and without.
+ R100 seems to operate on 2x1 8x8 tiles, but...
+ odd: offset/nrtiles need to be 64 pix (4 block) aligned? Potentially
+ problematic with resolutions which are not 64 pix aligned? */
+ tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 6;
+ nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4;
+ nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
+ for (j = 0; j <= nrtilesy; j++) {
+ BEGIN_RING( 4 );
+ OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
+ /* first tile */
+ OUT_RING( tileoffset * 8 );
+ /* the number of tiles to clear */
+ OUT_RING( nrtilesx + 4 );
+ /* clear mask : chooses the clearing pattern. */
+ OUT_RING( clearmask );
+ ADVANCE_RING();
+ tileoffset += depthpixperline >> 6;
+ }
+ }
+ else if (dev_priv->microcode_version==UCODE_R200) {
+ /* works for rv250. */
+ /* find first macro tile (8x2 4x4 z-pixels on rv250) */
+ tileoffset = ((pbox[i].y1 >> 3) * depthpixperline + pbox[i].x1) >> 5;
+ nrtilesx = (pbox[i].x2 >> 5) - (pbox[i].x1 >> 5);
+ nrtilesy = (pbox[i].y2 >> 3) - (pbox[i].y1 >> 3);
+ for (j = 0; j <= nrtilesy; j++) {
+ BEGIN_RING( 4 );
+ OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
+ /* first tile */
+ /* judging by the first tile offset needed, could possibly
+ directly address/clear 4x4 tiles instead of 8x2 * 4x4
+ macro tiles, though would still need clear mask for
+ right/bottom if truely 4x4 granularity is desired ? */
+ OUT_RING( tileoffset * 16 );
+ /* the number of tiles to clear */
+ OUT_RING( nrtilesx + 1 );
+ /* clear mask : chooses the clearing pattern. */
+ OUT_RING( clearmask );
+ ADVANCE_RING();
+ tileoffset += depthpixperline >> 5;
+ }
+ }
+ else { /* rv 100 */
+ /* rv100 might not need 64 pix alignment, who knows */
+ /* offsets are, hmm, weird */
+ tileoffset = ((pbox[i].y1 >> 4) * depthpixperline + pbox[i].x1) >> 6;
+ nrtilesx = ((pbox[i].x2 & ~63) - (pbox[i].x1 & ~63)) >> 4;
+ nrtilesy = (pbox[i].y2 >> 4) - (pbox[i].y1 >> 4);
+ for (j = 0; j <= nrtilesy; j++) {
+ BEGIN_RING( 4 );
+ OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_ZMASK, 2 ) );
+ OUT_RING( tileoffset * 128 );
+ /* the number of tiles to clear */
+ OUT_RING( nrtilesx + 4 );
+ /* clear mask : chooses the clearing pattern. */
+ OUT_RING( clearmask );
+ ADVANCE_RING();
+ tileoffset += depthpixperline >> 6;
+ }
+ }
+ }
+
+ /* TODO don't always clear all hi-level z tiles */
+ if ((dev_priv->flags & CHIP_HAS_HIERZ) && (dev_priv->microcode_version==UCODE_R200)
+ && (flags & RADEON_USE_HIERZ))
+ /* r100 and cards without hierarchical z-buffer have no high-level z-buffer */
+ /* FIXME : the mask supposedly contains low-res z values. So can't set
+ just to the max (0xff? or actually 0x3fff?), need to take z clear
+ value into account? */
+ {
+ BEGIN_RING( 4 );
+ OUT_RING( CP_PACKET3( RADEON_3D_CLEAR_HIZ, 2 ) );
+ OUT_RING( 0x0 ); /* First tile */
+ OUT_RING( 0x3cc0 );
+ OUT_RING( (0xff<<22)|(0xff<<6)| 0x003f003f);
+ ADVANCE_RING();
+ }
+ }
+
/* We have to clear the depth and/or stencil buffers by
* rendering a quad into just those buffers. Thus, we have to
* make sure the 3D engine is configured correctly.
*/
- if ((dev_priv->microcode_version == UCODE_R200) && (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
+ else if ((dev_priv->microcode_version == UCODE_R200) &&
+ (flags & (RADEON_DEPTH | RADEON_STENCIL))) {
int tempPP_CNTL;
int tempRE_CNTL;
@@ -929,6 +1080,14 @@ static void radeon_cp_dispatch_clear(drm_device_t * dev,
tempRB3D_STENCILREFMASK = 0x00000000;
}
+ if (flags & RADEON_USE_COMP_ZBUF) {
+ tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
+ RADEON_Z_DECOMPRESSION_ENABLE;
+ }
+ if (flags & RADEON_USE_HIERZ) {
+ tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
+ }
+
BEGIN_RING(26);
RADEON_WAIT_UNTIL_2D_IDLE();
@@ -979,6 +1138,8 @@ static void radeon_cp_dispatch_clear(drm_device_t * dev,
}
} else if ((flags & (RADEON_DEPTH | RADEON_STENCIL))) {
+ int tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
+
rb3d_cntl = depth_clear->rb3d_cntl;
if (flags & RADEON_DEPTH) {
@@ -995,6 +1156,14 @@ static void radeon_cp_dispatch_clear(drm_device_t * dev,
rb3d_stencilrefmask = 0x00000000;
}
+ if (flags & RADEON_USE_COMP_ZBUF) {
+ tempRB3D_ZSTENCILCNTL |= RADEON_Z_COMPRESSION_ENABLE |
+ RADEON_Z_DECOMPRESSION_ENABLE;
+ }
+ if (flags & RADEON_USE_HIERZ) {
+ tempRB3D_ZSTENCILCNTL |= RADEON_Z_HIERARCHY_ENABLE;
+ }
+
BEGIN_RING(13);
RADEON_WAIT_UNTIL_2D_IDLE();
@@ -1002,8 +1171,7 @@ static void radeon_cp_dispatch_clear(drm_device_t * dev,
OUT_RING(0x00000000);
OUT_RING(rb3d_cntl);
- OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL,
- depth_clear->rb3d_zstencilcntl);
+ OUT_RING_REG(RADEON_RB3D_ZSTENCILCNTL, tempRB3D_ZSTENCILCNTL);
OUT_RING_REG(RADEON_RB3D_STENCILREFMASK, rb3d_stencilrefmask);
OUT_RING_REG(RADEON_RB3D_PLANEMASK, 0x00000000);
OUT_RING_REG(RADEON_SE_CNTL, depth_clear->se_cntl);