diff options
| -rw-r--r-- | bsd-core/Makefile | 2 | ||||
| -rw-r--r-- | bsd-core/drm_drv.c | 2 | ||||
| -rw-r--r-- | bsd-core/radeon_drv.c | 1 | ||||
| -rw-r--r-- | bsd/Makefile | 2 | ||||
| -rw-r--r-- | bsd/Makefile.bsd | 2 | ||||
| -rw-r--r-- | bsd/drm_drv.h | 2 | ||||
| -rw-r--r-- | bsd/radeon_drv.c | 1 | ||||
| -rw-r--r-- | shared-core/radeon_cp.c | 363 | ||||
| -rw-r--r-- | shared-core/radeon_drm.h | 56 | ||||
| -rw-r--r-- | shared-core/radeon_drv.h | 127 | ||||
| -rw-r--r-- | shared-core/radeon_state.c | 481 | ||||
| -rw-r--r-- | shared/radeon.h | 20 | ||||
| -rw-r--r-- | shared/radeon_cp.c | 363 | ||||
| -rw-r--r-- | shared/radeon_drm.h | 56 | ||||
| -rw-r--r-- | shared/radeon_drv.h | 127 | ||||
| -rw-r--r-- | shared/radeon_state.c | 481 | 
16 files changed, 1733 insertions, 353 deletions
| diff --git a/bsd-core/Makefile b/bsd-core/Makefile index 9c87d963..f26fd53d 100644 --- a/bsd-core/Makefile +++ b/bsd-core/Makefile @@ -1,6 +1,6 @@  # $FreeBSD$  # i810, i830 & sis are not complete -SUBDIR = tdfx mga r128 radeon gamma # i810 sis i830 +SUBDIR = radeon # r128 i810 sis i830 tdfx mga gamma  .include <bsd.subdir.mk> diff --git a/bsd-core/drm_drv.c b/bsd-core/drm_drv.c index fb0454dd..81ca644a 100644 --- a/bsd-core/drm_drv.c +++ b/bsd-core/drm_drv.c @@ -1207,7 +1207,7 @@ int DRM(unlock)( DRM_IOCTL_ARGS )  	DRM(dma_schedule)( dev, 1 );  #endif -	/* FIXME: Do we ever really need to check this??? +	/* FIXME: Do we ever really need to check this?  	 */  	if ( 1 /* !dev->context_flag */ ) {  		if ( DRM(lock_free)( dev, &dev->lock.hw_lock->lock, diff --git a/bsd-core/radeon_drv.c b/bsd-core/radeon_drv.c index fe69fbb8..d02ab959 100644 --- a/bsd-core/radeon_drv.c +++ b/bsd-core/radeon_drv.c @@ -52,6 +52,7 @@ drm_chipinfo_t DRM(devicelist)[] = {  	{0x1002, 0x5157, 1, "ATI Radeon QW 7500 (AGP)"},  	{0x1002, 0x5159, 1, "ATI Radeon QY VE (AGP)"},  	{0x1002, 0x515A, 1, "ATI Radeon QZ VE (AGP)"}, +	{0x1002, 0x514C, 1, "ATI Radeon QL 8500 (AGP)"},  	{0, 0, 0, NULL}  }; diff --git a/bsd/Makefile b/bsd/Makefile index 9c87d963..f26fd53d 100644 --- a/bsd/Makefile +++ b/bsd/Makefile @@ -1,6 +1,6 @@  # $FreeBSD$  # i810, i830 & sis are not complete -SUBDIR = tdfx mga r128 radeon gamma # i810 sis i830 +SUBDIR = radeon # r128 i810 sis i830 tdfx mga gamma  .include <bsd.subdir.mk> diff --git a/bsd/Makefile.bsd b/bsd/Makefile.bsd index 9c87d963..f26fd53d 100644 --- a/bsd/Makefile.bsd +++ b/bsd/Makefile.bsd @@ -1,6 +1,6 @@  # $FreeBSD$  # i810, i830 & sis are not complete -SUBDIR = tdfx mga r128 radeon gamma # i810 sis i830 +SUBDIR = radeon # r128 i810 sis i830 tdfx mga gamma  .include <bsd.subdir.mk> diff --git a/bsd/drm_drv.h b/bsd/drm_drv.h index fb0454dd..81ca644a 100644 --- a/bsd/drm_drv.h +++ b/bsd/drm_drv.h @@ -1207,7 +1207,7 @@ int DRM(unlock)( DRM_IOCTL_ARGS )  	DRM(dma_schedule)( dev, 1 );  #endif -	/* FIXME: Do we ever really need to check this??? +	/* FIXME: Do we ever really need to check this?  	 */  	if ( 1 /* !dev->context_flag */ ) {  		if ( DRM(lock_free)( dev, &dev->lock.hw_lock->lock, diff --git a/bsd/radeon_drv.c b/bsd/radeon_drv.c index fe69fbb8..d02ab959 100644 --- a/bsd/radeon_drv.c +++ b/bsd/radeon_drv.c @@ -52,6 +52,7 @@ drm_chipinfo_t DRM(devicelist)[] = {  	{0x1002, 0x5157, 1, "ATI Radeon QW 7500 (AGP)"},  	{0x1002, 0x5159, 1, "ATI Radeon QY VE (AGP)"},  	{0x1002, 0x515A, 1, "ATI Radeon QZ VE (AGP)"}, +	{0x1002, 0x514C, 1, "ATI Radeon QL 8500 (AGP)"},  	{0, 0, 0, NULL}  }; diff --git a/shared-core/radeon_cp.c b/shared-core/radeon_cp.c index 8250c09b..01069e49 100644 --- a/shared-core/radeon_cp.c +++ b/shared-core/radeon_cp.c @@ -44,6 +44,266 @@  /* CP microcode (from ATI) */ +static u32 R200_cp_microcode[][2] = { +	{ 0x21007000, 0000000000 },         +	{ 0x20007000, 0000000000 },  +	{ 0x000000ab, 0x00000004 }, +	{ 0x000000af, 0x00000004 }, +	{ 0x66544a49, 0000000000 }, +	{ 0x49494174, 0000000000 }, +	{ 0x54517d83, 0000000000 }, +	{ 0x498d8b64, 0000000000 }, +	{ 0x49494949, 0000000000 }, +	{ 0x49da493c, 0000000000 }, +	{ 0x49989898, 0000000000 }, +	{ 0xd34949d5, 0000000000 }, +	{ 0x9dc90e11, 0000000000 }, +	{ 0xce9b9b9b, 0000000000 }, +	{ 0x000f0000, 0x00000016 }, +	{ 0x352e232c, 0000000000 }, +	{ 0x00000013, 0x00000004 }, +	{ 0x000f0000, 0x00000016 }, +	{ 0x352e272c, 0000000000 }, +	{ 0x000f0001, 0x00000016 }, +	{ 0x3239362f, 0000000000 }, +	{ 0x000077ef, 0x00000002 }, +	{ 0x00061000, 0x00000002 }, +	{ 0x00000020, 0x0000001a }, +	{ 0x00004000, 0x0000001e }, +	{ 0x00061000, 0x00000002 }, +	{ 0x00000020, 0x0000001a }, +	{ 0x00004000, 0x0000001e }, +	{ 0x00061000, 0x00000002 }, +	{ 0x00000020, 0x0000001a }, +	{ 0x00004000, 0x0000001e }, +	{ 0x00000016, 0x00000004 }, +	{ 0x0003802a, 0x00000002 }, +	{ 0x040067e0, 0x00000002 }, +	{ 0x00000016, 0x00000004 }, +	{ 0x000077e0, 0x00000002 }, +	{ 0x00065000, 0x00000002 }, +	{ 0x000037e1, 0x00000002 }, +	{ 0x040067e1, 0x00000006 }, +	{ 0x000077e0, 0x00000002 }, +	{ 0x000077e1, 0x00000002 }, +	{ 0x000077e1, 0x00000006 }, +	{ 0xffffffff, 0000000000 }, +	{ 0x10000000, 0000000000 }, +	{ 0x0003802a, 0x00000002 }, +	{ 0x040067e0, 0x00000006 }, +	{ 0x00007675, 0x00000002 }, +	{ 0x00007676, 0x00000002 }, +	{ 0x00007677, 0x00000002 }, +	{ 0x00007678, 0x00000006 }, +	{ 0x0003802b, 0x00000002 }, +	{ 0x04002676, 0x00000002 }, +	{ 0x00007677, 0x00000002 }, +	{ 0x00007678, 0x00000006 }, +	{ 0x0000002e, 0x00000018 }, +	{ 0x0000002e, 0x00000018 }, +	{ 0000000000, 0x00000006 }, +	{ 0x0000002f, 0x00000018 }, +	{ 0x0000002f, 0x00000018 }, +	{ 0000000000, 0x00000006 }, +	{ 0x01605000, 0x00000002 }, +	{ 0x00065000, 0x00000002 }, +	{ 0x00098000, 0x00000002 }, +	{ 0x00061000, 0x00000002 }, +	{ 0x64c0603d, 0x00000004 }, +	{ 0x00080000, 0x00000016 }, +	{ 0000000000, 0000000000 }, +	{ 0x0400251d, 0x00000002 }, +	{ 0x00007580, 0x00000002 }, +	{ 0x00067581, 0x00000002 }, +	{ 0x04002580, 0x00000002 }, +	{ 0x00067581, 0x00000002 }, +	{ 0x00000046, 0x00000004 }, +	{ 0x00005000, 0000000000 }, +	{ 0x00061000, 0x00000002 }, +	{ 0x0000750e, 0x00000002 }, +	{ 0x00019000, 0x00000002 }, +	{ 0x00011055, 0x00000014 }, +	{ 0x00000055, 0x00000012 }, +	{ 0x0400250f, 0x00000002 }, +	{ 0x0000504a, 0x00000004 }, +	{ 0x00007565, 0x00000002 }, +	{ 0x00007566, 0x00000002 }, +	{ 0x00000051, 0x00000004 }, +	{ 0x01e655b4, 0x00000002 }, +	{ 0x4401b0dc, 0x00000002 }, +	{ 0x01c110dc, 0x00000002 }, +	{ 0x2666705d, 0x00000018 }, +	{ 0x040c2565, 0x00000002 }, +	{ 0x0000005d, 0x00000018 }, +	{ 0x04002564, 0x00000002 }, +	{ 0x00007566, 0x00000002 }, +	{ 0x00000054, 0x00000004 }, +	{ 0x00401060, 0x00000008 }, +	{ 0x00101000, 0x00000002 }, +	{ 0x000d80ff, 0x00000002 }, +	{ 0x00800063, 0x00000008 }, +	{ 0x000f9000, 0x00000002 }, +	{ 0x000e00ff, 0x00000002 }, +	{ 0000000000, 0x00000006 }, +	{ 0x00000080, 0x00000018 }, +	{ 0x00000054, 0x00000004 }, +	{ 0x00007576, 0x00000002 }, +	{ 0x00065000, 0x00000002 }, +	{ 0x00009000, 0x00000002 }, +	{ 0x00041000, 0x00000002 }, +	{ 0x0c00350e, 0x00000002 }, +	{ 0x00049000, 0x00000002 }, +	{ 0x00051000, 0x00000002 }, +	{ 0x01e785f8, 0x00000002 }, +	{ 0x00200000, 0x00000002 }, +	{ 0x00600073, 0x0000000c }, +	{ 0x00007563, 0x00000002 }, +	{ 0x006075f0, 0x00000021 }, +	{ 0x20007068, 0x00000004 }, +	{ 0x00005068, 0x00000004 }, +	{ 0x00007576, 0x00000002 }, +	{ 0x00007577, 0x00000002 }, +	{ 0x0000750e, 0x00000002 }, +	{ 0x0000750f, 0x00000002 }, +	{ 0x00a05000, 0x00000002 }, +	{ 0x00600076, 0x0000000c }, +	{ 0x006075f0, 0x00000021 }, +	{ 0x000075f8, 0x00000002 }, +	{ 0x00000076, 0x00000004 }, +	{ 0x000a750e, 0x00000002 }, +	{ 0x0020750f, 0x00000002 }, +	{ 0x00600079, 0x00000004 }, +	{ 0x00007570, 0x00000002 }, +	{ 0x00007571, 0x00000002 }, +	{ 0x00007572, 0x00000006 }, +	{ 0x00005000, 0x00000002 }, +	{ 0x00a05000, 0x00000002 }, +	{ 0x00007568, 0x00000002 }, +	{ 0x00061000, 0x00000002 }, +	{ 0x00000084, 0x0000000c }, +	{ 0x00058000, 0x00000002 }, +	{ 0x0c607562, 0x00000002 }, +	{ 0x00000086, 0x00000004 }, +	{ 0x00600085, 0x00000004 }, +	{ 0x400070dd, 0000000000 }, +	{ 0x000380dd, 0x00000002 }, +	{ 0x00000093, 0x0000001c }, +	{ 0x00065095, 0x00000018 }, +	{ 0x040025bb, 0x00000002 }, +	{ 0x00061096, 0x00000018 }, +	{ 0x040075bc, 0000000000 }, +	{ 0x000075bb, 0x00000002 }, +	{ 0x000075bc, 0000000000 }, +	{ 0x00090000, 0x00000006 }, +	{ 0x00090000, 0x00000002 }, +	{ 0x000d8002, 0x00000006 }, +	{ 0x00005000, 0x00000002 }, +	{ 0x00007821, 0x00000002 }, +	{ 0x00007800, 0000000000 }, +	{ 0x00007821, 0x00000002 }, +	{ 0x00007800, 0000000000 }, +	{ 0x01665000, 0x00000002 }, +	{ 0x000a0000, 0x00000002 }, +	{ 0x000671cc, 0x00000002 }, +	{ 0x0286f1cd, 0x00000002 }, +	{ 0x000000a3, 0x00000010 }, +	{ 0x21007000, 0000000000 }, +	{ 0x000000aa, 0x0000001c }, +	{ 0x00065000, 0x00000002 }, +	{ 0x000a0000, 0x00000002 }, +	{ 0x00061000, 0x00000002 }, +	{ 0x000b0000, 0x00000002 }, +	{ 0x38067000, 0x00000002 }, +	{ 0x000a00a6, 0x00000004 }, +	{ 0x20007000, 0000000000 }, +	{ 0x01200000, 0x00000002 }, +	{ 0x20077000, 0x00000002 }, +	{ 0x01200000, 0x00000002 }, +	{ 0x20007000, 0000000000 }, +	{ 0x00061000, 0x00000002 }, +	{ 0x0120751b, 0x00000002 }, +	{ 0x8040750a, 0x00000002 }, +	{ 0x8040750b, 0x00000002 }, +	{ 0x00110000, 0x00000002 }, +	{ 0x000380dd, 0x00000002 }, +	{ 0x000000bd, 0x0000001c }, +	{ 0x00061096, 0x00000018 }, +	{ 0x844075bd, 0x00000002 }, +	{ 0x00061095, 0x00000018 }, +	{ 0x840075bb, 0x00000002 }, +	{ 0x00061096, 0x00000018 }, +	{ 0x844075bc, 0x00000002 }, +	{ 0x000000c0, 0x00000004 }, +	{ 0x804075bd, 0x00000002 }, +	{ 0x800075bb, 0x00000002 }, +	{ 0x804075bc, 0x00000002 }, +	{ 0x00108000, 0x00000002 }, +	{ 0x01400000, 0x00000002 }, +	{ 0x006000c4, 0x0000000c }, +	{ 0x20c07000, 0x00000020 }, +	{ 0x000000c6, 0x00000012 }, +	{ 0x00800000, 0x00000006 }, +	{ 0x0080751d, 0x00000006 }, +	{ 0x000025bb, 0x00000002 }, +	{ 0x000040c0, 0x00000004 }, +	{ 0x0000775c, 0x00000002 }, +	{ 0x00a05000, 0x00000002 }, +	{ 0x00661000, 0x00000002 }, +	{ 0x0460275d, 0x00000020 }, +	{ 0x00004000, 0000000000 }, +	{ 0x00007999, 0x00000002 }, +	{ 0x00a05000, 0x00000002 }, +	{ 0x00661000, 0x00000002 }, +	{ 0x0460299b, 0x00000020 }, +	{ 0x00004000, 0000000000 }, +	{ 0x01e00830, 0x00000002 }, +	{ 0x21007000, 0000000000 }, +	{ 0x00005000, 0x00000002 }, +	{ 0x00038042, 0x00000002 }, +	{ 0x040025e0, 0x00000002 }, +	{ 0x000075e1, 0000000000 }, +	{ 0x00000001, 0000000000 }, +	{ 0x000380d9, 0x00000002 }, +	{ 0x04007394, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +}; + +  static u32 radeon_cp_microcode[][2] = {  	{ 0x21007000, 0000000000 },  	{ 0x20007000, 0000000000 }, @@ -345,6 +605,8 @@ static int radeon_do_pixcache_flush( drm_radeon_private_t *dev_priv )  	u32 tmp;  	int i; +	dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; +  	tmp  = RADEON_READ( RADEON_RB2D_DSTCACHE_CTLSTAT );  	tmp |= RADEON_RB2D_DC_FLUSH_ALL;  	RADEON_WRITE( RADEON_RB2D_DSTCACHE_CTLSTAT, tmp ); @@ -369,6 +631,8 @@ static int radeon_do_wait_for_fifo( drm_radeon_private_t *dev_priv,  {  	int i; +	dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; +  	for ( i = 0 ; i < dev_priv->usec_timeout ; i++ ) {  		int slots = ( RADEON_READ( RADEON_RBBM_STATUS )  			      & RADEON_RBBM_FIFOCNT_MASK ); @@ -387,6 +651,8 @@ static int radeon_do_wait_for_idle( drm_radeon_private_t *dev_priv )  {  	int i, ret; +	dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; +  	ret = radeon_do_wait_for_fifo( dev_priv, 64 );  	if ( ret ) return ret; @@ -420,11 +686,26 @@ static void radeon_cp_load_microcode( drm_radeon_private_t *dev_priv )  	radeon_do_wait_for_idle( dev_priv );  	RADEON_WRITE( RADEON_CP_ME_RAM_ADDR, 0 ); -	for ( i = 0 ; i < 256 ; i++ ) { -		RADEON_WRITE( RADEON_CP_ME_RAM_DATAH, -			      radeon_cp_microcode[i][1] ); -		RADEON_WRITE( RADEON_CP_ME_RAM_DATAL, -			      radeon_cp_microcode[i][0] ); + +	if (dev_priv->is_r200) +	{ +		DRM_INFO("Loading R200 Microcode\n"); +		for ( i = 0 ; i < 256 ; i++ )  +		{ +			RADEON_WRITE( RADEON_CP_ME_RAM_DATAH, +				      R200_cp_microcode[i][1] ); +			RADEON_WRITE( RADEON_CP_ME_RAM_DATAL, +				      R200_cp_microcode[i][0] ); +		} +	} +	else +	{ +		for ( i = 0 ; i < 256 ; i++ ) { +			RADEON_WRITE( RADEON_CP_ME_RAM_DATAH, +				      radeon_cp_microcode[i][1] ); +			RADEON_WRITE( RADEON_CP_ME_RAM_DATAL, +				      radeon_cp_microcode[i][0] ); +		}  	}  } @@ -736,12 +1017,10 @@ static int radeon_do_init_cp( drm_device_t *dev, drm_radeon_init_t *init )  		return DRM_ERR(EINVAL);  	} +	dev_priv->is_r200 = (init->func == RADEON_INIT_R200_CP); +	dev_priv->do_boxes = 1;  	dev_priv->cp_mode = init->cp_mode; -	/* Simple idle check. -	 */ -	atomic_set( &dev_priv->idle_count, 0 ); -  	/* We don't support anything other than bus-mastering ring mode,  	 * but the ring can be in either AGP or PCI space for the ring  	 * read pointer. @@ -1028,6 +1307,7 @@ int radeon_cp_init( DRM_IOCTL_ARGS )  	switch ( init.func ) {  	case RADEON_INIT_CP: +	case RADEON_INIT_R200_CP:  		return radeon_do_init_cp( dev, &init );  	case RADEON_CLEANUP_CP:  		return radeon_do_cleanup_cp( dev ); @@ -1169,6 +1449,14 @@ int radeon_fullscreen( DRM_IOCTL_ARGS )   *   completed rendering.     *   * KW:  It's also a good way to find free buffers quickly. + * + * KW: Ideally this loop wouldn't exist, and freelist_get wouldn't + * sleep.  However, bugs in older versions of radeon_accel.c mean that + * we essentially have to do this, else old clients will break. + *  + * However, it does leave open a potential deadlock where all the + * buffers are held by other clients, which can't release them because + * they can't get the lock.     */  drm_buf_t *radeon_freelist_get( drm_device_t *dev ) @@ -1193,17 +1481,56 @@ drm_buf_t *radeon_freelist_get( drm_device_t *dev )  			buf_priv = buf->dev_private;  			if ( buf->pid == 0 || (buf->pending &&   					       buf_priv->age <= done_age) ) { +				dev_priv->stats.requested_bufs++;  				buf->pending = 0;  				return buf;  			}  			start = 0;  		} -		DRM_UDELAY( 1 ); + +		if (t) { +			DRM_UDELAY( 1 ); +			dev_priv->stats.freelist_loops++; +		}  	}  	DRM_ERROR( "returning NULL!\n" );  	return NULL;  } +#if 0 +drm_buf_t *radeon_freelist_get( drm_device_t *dev ) +{ +	drm_device_dma_t *dma = dev->dma; +	drm_radeon_private_t *dev_priv = dev->dev_private; +	drm_radeon_buf_priv_t *buf_priv; +	drm_buf_t *buf; +	int i, t; +	int start; +	u32 done_age = DRM_READ32(&dev_priv->scratch[1]); + +	if ( ++dev_priv->last_buf >= dma->buf_count ) +		dev_priv->last_buf = 0; + +	start = dev_priv->last_buf; +	dev_priv->stats.freelist_loops++; +	 +	for ( t = 0 ; t < 2 ; t++ ) { +		for ( i = start ; i < dma->buf_count ; i++ ) { +			buf = dma->buflist[i]; +			buf_priv = buf->dev_private; +			if ( buf->pid == 0 || (buf->pending &&  +					       buf_priv->age <= done_age) ) { +				dev_priv->stats.requested_bufs++; +				buf->pending = 0; +				return buf; +			} +		} +		start = 0; +	} + +	return NULL; +} +#endif  void radeon_freelist_reset( drm_device_t *dev )  { @@ -1228,11 +1555,23 @@ int radeon_wait_ring( drm_radeon_private_t *dev_priv, int n )  {  	drm_radeon_ring_buffer_t *ring = &dev_priv->ring;  	int i; +	u32 last_head = GET_RING_HEAD(ring);  	for ( i = 0 ; i < dev_priv->usec_timeout ; i++ ) { -		radeon_update_ring_snapshot( ring ); +		u32 head = GET_RING_HEAD(ring); + +		ring->space = (head - ring->tail) * sizeof(u32); +		if ( ring->space <= 0 ) +			ring->space += ring->size;  		if ( ring->space > n )  			return 0; +		 +		dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; + +		if (head != last_head) +			i = 0; +		last_head = head; +  		DRM_UDELAY( 1 );  	} @@ -1251,7 +1590,7 @@ static int radeon_cp_get_buffers( drm_device_t *dev, drm_dma_t *d )  	for ( i = d->granted_count ; i < d->request_count ; i++ ) {  		buf = radeon_freelist_get( dev ); -		if ( !buf ) return DRM_ERR(EAGAIN); +		if ( !buf ) return DRM_ERR(EBUSY); /* NOTE: broken client */  		buf->pid = DRM_CURRENTPID; diff --git a/shared-core/radeon_drm.h b/shared-core/radeon_drm.h index 3802e46c..6469bfb8 100644 --- a/shared-core/radeon_drm.h +++ b/shared-core/radeon_drm.h @@ -89,7 +89,47 @@  #define RADEON_EMIT_SE_ZBIAS_FACTOR                 18 /* zbias/2 */  #define RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT           19 /* tcl/11 */  #define RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED   20 /* material/17 */ -#define RADEON_MAX_STATE_PACKETS                    21 +#define R200_EMIT_PP_TXCBLEND_0                     21 /* tex0/4 */ +#define R200_EMIT_PP_TXCBLEND_1                     22 /* tex1/4 */ +#define R200_EMIT_PP_TXCBLEND_2                     23 /* tex2/4 */ +#define R200_EMIT_PP_TXCBLEND_3                     24 /* tex3/4 */ +#define R200_EMIT_PP_TXCBLEND_4                     25 /* tex4/4 */ +#define R200_EMIT_PP_TXCBLEND_5                     26 /* tex5/4 */ +#define R200_EMIT_PP_TXCBLEND_6                     27 /* /4 */ +#define R200_EMIT_PP_TXCBLEND_7                     28 /* /4 */ +#define R200_EMIT_TCL_LIGHT_MODEL_CTL_0             29 /* tcl/7 */ +#define R200_EMIT_TFACTOR_0                         30 /* tf/7 */ +#define R200_EMIT_VTX_FMT_0                         31 /* vtx/5 */ +#define R200_EMIT_VAP_CTL                           32 /* vap/1 */ +#define R200_EMIT_MATRIX_SELECT_0                   33 /* msl/5 */ +#define R200_EMIT_TEX_PROC_CTL_2                    34 /* tcg/5 */ +#define R200_EMIT_TCL_UCP_VERT_BLEND_CTL            35 /* tcl/1 */ +#define R200_EMIT_PP_TXFILTER_0                     36 /* tex0/6 */ +#define R200_EMIT_PP_TXFILTER_1                     37 /* tex1/6 */ +#define R200_EMIT_PP_TXFILTER_2                     38 /* tex2/6 */ +#define R200_EMIT_PP_TXFILTER_3                     39 /* tex3/6 */ +#define R200_EMIT_PP_TXFILTER_4                     40 /* tex4/6 */ +#define R200_EMIT_PP_TXFILTER_5                     41 /* tex5/6 */ +#define R200_EMIT_PP_TXOFFSET_0                     42 /* tex0/1 */ +#define R200_EMIT_PP_TXOFFSET_1                     43 /* tex1/1 */ +#define R200_EMIT_PP_TXOFFSET_2                     44 /* tex2/1 */ +#define R200_EMIT_PP_TXOFFSET_3                     45 /* tex3/1 */ +#define R200_EMIT_PP_TXOFFSET_4                     46 /* tex4/1 */ +#define R200_EMIT_PP_TXOFFSET_5                     47 /* tex5/1 */ +#define R200_EMIT_VTE_CNTL                          48 /* vte/1 */ +#define R200_EMIT_OUTPUT_VTX_COMP_SEL               49 /* vtx/1 */ +#define R200_EMIT_PP_TAM_DEBUG3                     50 /* tam/1 */ +#define R200_EMIT_PP_CNTL_X                         51 /* cst/1 */ +#define R200_EMIT_RB3D_DEPTHXY_OFFSET               52 /* cst/1 */ +#define R200_EMIT_RE_AUX_SCISSOR_CNTL               53 /* cst/1 */ +#define R200_EMIT_RE_SCISSOR_TL_0                   54 /* cst/2 */ +#define R200_EMIT_RE_SCISSOR_TL_1                   55 /* cst/2 */ +#define R200_EMIT_RE_SCISSOR_TL_2                   56 /* cst/2 */ +#define R200_EMIT_SE_VAP_CNTL_STATUS                57 /* cst/1 */ +#define R200_EMIT_SE_VTX_STATE_CNTL                 58 /* cst/1 */ +#define R200_EMIT_RE_POINTSIZE                      59 /* cst/1 */ +#define R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0       60 /* cst/4 */ +#define RADEON_MAX_STATE_PACKETS                    61  /* Commands understood by cmd_buffer ioctl.  More can be added but @@ -101,24 +141,25 @@  #define RADEON_CMD_DMA_DISCARD 4 /* discard current dma buf */  #define RADEON_CMD_PACKET3     5 /* emit hw packet */  #define RADEON_CMD_PACKET3_CLIP 6 /* emit hw packet wrapped in cliprects */ +#define RADEON_CMD_SCALARS2     7 /* r200 stopgap */  typedef union {  	int i;  	struct {  -		char cmd_type, pad0, pad1, pad2; +		unsigned char cmd_type, pad0, pad1, pad2;  	} header;  	struct {  -		char cmd_type, packet_id, pad0, pad1; +		unsigned char cmd_type, packet_id, pad0, pad1;  	} packet;  	struct {  -		char cmd_type, offset, stride, count;  +		unsigned char cmd_type, offset, stride, count;   	} scalars;  	struct {  -		char cmd_type, offset, stride, count;  +		unsigned char cmd_type, offset, stride, count;   	} vectors;  	struct {  -		char cmd_type, buf_idx, pad0, pad1;  +		unsigned char cmd_type, buf_idx, pad0, pad1;   	} dma;  } drm_radeon_cmd_header_t; @@ -327,7 +368,8 @@ typedef struct {  typedef struct drm_radeon_init {  	enum {  		RADEON_INIT_CP    = 0x01, -		RADEON_CLEANUP_CP = 0x02 +		RADEON_CLEANUP_CP = 0x02, +		RADEON_INIT_R200_CP = 0x03,	  	} func;  	unsigned long sarea_priv_offset;  	int is_pci; diff --git a/shared-core/radeon_drv.h b/shared-core/radeon_drv.h index 15c0d4dd..7c341b39 100644 --- a/shared-core/radeon_drv.h +++ b/shared-core/radeon_drv.h @@ -79,12 +79,25 @@ typedef struct drm_radeon_private {  	int writeback_works;  	int usec_timeout; + +	int is_r200; +  	int is_pci;  	unsigned long phys_pci_gart;  	dma_addr_t bus_pci_gart; -	atomic_t idle_count; - +	struct { +		u32 boxes; +		int freelist_timeouts; +		int freelist_loops; +		int requested_bufs; +		int last_frame_reads; +		int last_clear_reads; +		int clears; +		int texture_uploads; +	} stats; + +	int do_boxes;  	int page_flipping;  	int current_page;  	u32 crtc_offset; @@ -134,14 +147,6 @@ extern drm_buf_t *radeon_freelist_get( drm_device_t *dev );  extern int radeon_wait_ring( drm_radeon_private_t *dev_priv, int n ); -static __inline__ void -radeon_update_ring_snapshot( drm_radeon_ring_buffer_t *ring ) -{ -	ring->space = (GET_RING_HEAD(ring) - ring->tail) * sizeof(u32); -	if ( ring->space <= 0 ) -		ring->space += ring->size; -} -  extern int radeon_do_cp_idle( drm_radeon_private_t *dev_priv );  extern int radeon_do_cleanup_cp( drm_device_t *dev );  extern int radeon_do_cleanup_pageflip( drm_device_t *dev ); @@ -159,6 +164,14 @@ extern int radeon_cp_cmdbuf( DRM_IOCTL_ARGS );  extern int radeon_cp_getparam( DRM_IOCTL_ARGS );  extern int radeon_cp_flip( DRM_IOCTL_ARGS ); +/* Flags for stats.boxes + */ +#define RADEON_BOX_DMA_IDLE      0x1 +#define RADEON_BOX_RING_FULL     0x2 +#define RADEON_BOX_FLIP          0x4 +#define RADEON_BOX_WAIT_IDLE     0x8 +#define RADEON_BOX_TEXTURE_LOAD  0x10 +  /* Register definitions, register access macros and drmAddMap constants @@ -282,6 +295,7 @@ extern int radeon_cp_flip( DRM_IOCTL_ARGS );  #	define RADEON_STENCIL_ENABLE		(1 << 7)  #	define RADEON_Z_ENABLE			(1 << 8)  #define RADEON_RB3D_DEPTHOFFSET		0x1c24 +#define RADEON_RB3D_DEPTHPITCH		0x1c28  #define RADEON_RB3D_PLANEMASK		0x1d84  #define RADEON_RB3D_STENCILREFMASK	0x1d7c  #define RADEON_RB3D_ZCACHE_MODE		0x3250 @@ -513,6 +527,62 @@ extern int radeon_cp_flip( DRM_IOCTL_ARGS );  #define RADEON_TXFORMAT_ARGB8888	6  #define RADEON_TXFORMAT_RGBA8888	7 +#define R200_PP_TXCBLEND_0                0x2f00 +#define R200_PP_TXCBLEND_1                0x2f10 +#define R200_PP_TXCBLEND_2                0x2f20 +#define R200_PP_TXCBLEND_3                0x2f30 +#define R200_PP_TXCBLEND_4                0x2f40 +#define R200_PP_TXCBLEND_5                0x2f50 +#define R200_PP_TXCBLEND_6                0x2f60 +#define R200_PP_TXCBLEND_7                0x2f70 +#define R200_SE_TCL_LIGHT_MODEL_CTL_0     0x2268  +#define R200_PP_TFACTOR_0                 0x2ee0 +#define R200_SE_VTX_FMT_0                 0x2088 +#define R200_SE_VAP_CNTL                  0x2080 +#define R200_SE_TCL_MATRIX_SEL_0          0x2230 +#define R200_SE_TCL_TEX_PROC_CTL_2        0x22a8  +#define R200_SE_TCL_UCP_VERT_BLEND_CTL    0x22c0  +#define R200_PP_TXFILTER_5                0x2ca0  +#define R200_PP_TXFILTER_4                0x2c80  +#define R200_PP_TXFILTER_3                0x2c60  +#define R200_PP_TXFILTER_2                0x2c40  +#define R200_PP_TXFILTER_1                0x2c20  +#define R200_PP_TXFILTER_0                0x2c00  +#define R200_PP_TXOFFSET_5                0x2d78 +#define R200_PP_TXOFFSET_4                0x2d60 +#define R200_PP_TXOFFSET_3                0x2d48 +#define R200_PP_TXOFFSET_2                0x2d30 +#define R200_PP_TXOFFSET_1                0x2d18 +#define R200_PP_TXOFFSET_0                0x2d00 +#define R200_RE_AUX_SCISSOR_CNTL          0x26f0 +#define R200_SE_VTE_CNTL                  0x20b0 +#define R200_SE_TCL_OUTPUT_VTX_COMP_SEL   0x2250 +#define R200_PP_TAM_DEBUG3                0x2d9c +#define R200_PP_CNTL_X                    0x2cc4 +#define R200_SE_VAP_CNTL_STATUS           0x2140 +#define R200_RE_SCISSOR_TL_0              0x1cd8 +#define R200_RE_SCISSOR_TL_1              0x1ce0 +#define R200_RE_SCISSOR_TL_2              0x1ce8 +#define R200_RB3D_DEPTHXY_OFFSET          0x1d60  +#define R200_RE_AUX_SCISSOR_CNTL          0x26f0 +#define R200_SE_VTX_STATE_CNTL            0x2180 +#define R200_RE_POINTSIZE                 0x2648 +#define R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0 0x2254 + + +#define SE_VAP_CNTL__TCL_ENA_MASK                          0x00000001 +#define SE_VAP_CNTL__FORCE_W_TO_ONE_MASK                   0x00010000 +#define SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT                 0x00000012 +#define SE_VTE_CNTL__VTX_XY_FMT_MASK                       0x00000100 +#define SE_VTE_CNTL__VTX_Z_FMT_MASK                        0x00000200 +#define SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK                  0x00000001 +#define SE_VTX_FMT_0__VTX_W0_PRESENT_MASK                  0x00000002 +#define SE_VTX_FMT_0__VTX_COLOR_0_FMT__SHIFT               0x0000000b +#define R200_3D_DRAW_IMMD_2      0xC0003500 +#define R200_SE_VTX_FMT_1                 0x208c +#define R200_RE_CNTL                      0x1c50  + +  /* Constants */  #define RADEON_MAX_USEC_TIMEOUT		100000	/* 100 ms */ @@ -620,30 +690,16 @@ do {									\  	}								\  } while (0) + +/* Perfbox functionality only.   + */  #define RING_SPACE_TEST_WITH_RETURN( dev_priv )				\  do {									\ -	drm_radeon_ring_buffer_t *ring = &dev_priv->ring; int i;	\ -	if ( ring->space < ring->high_mark ) {				\ -		for ( i = 0 ; i < dev_priv->usec_timeout ; i++ ) {	\ -			radeon_update_ring_snapshot( ring );		\ -			if ( ring->space >= ring->high_mark )		\ -				goto __ring_space_done;			\ -			DRM_UDELAY( 1 );				\ -		}							\ -		DRM_ERROR( "ring space check from memory failed, reading register...\n" );	\ -		/* If ring space check fails from RAM, try reading the	\ -		   register directly */					\ -		ring->space = 4 * ( RADEON_READ( RADEON_CP_RB_RPTR ) - ring->tail );	\ -		if ( ring->space <= 0 )					\ -			ring->space += ring->size;			\ -		if ( ring->space >= ring->high_mark )			\ -			goto __ring_space_done;				\ -									\ -		DRM_ERROR( "ring space check failed!\n" );		\ -		return DRM_ERR(EBUSY);				\ +	if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE)) {		\ +		u32 head = GET_RING_HEAD(&dev_priv->ring);		\ +		if (head == dev_priv->ring.tail)			\ +			dev_priv->stats.boxes |= RADEON_BOX_DMA_IDLE;	\  	}								\ - __ring_space_done:							\ -	;								\  } while (0)  #define VB_AGE_TEST_WITH_RETURN( dev_priv )				\ @@ -710,16 +766,15 @@ do {									\  	}								\  	if (((dev_priv->ring.tail + _nr) & mask) != write) {		\  		DRM_ERROR( 						\ -			"ADVANCE_RING(): mismatch: nr: %x write: %x\n",	\ +			"ADVANCE_RING(): mismatch: nr: %x write: %x line: %d\n",	\  			((dev_priv->ring.tail + _nr) & mask),		\ -			write);						\ +			write, __LINE__);						\  	} else								\  		dev_priv->ring.tail = write;				\  } while (0)  #define COMMIT_RING() do {					    \ -	radeon_flush_write_combine();					\ -	RADEON_WRITE( RADEON_CP_RB_WPTR, dev_priv->ring.tail );		\ +	RADEON_WRITE( RADEON_CP_RB_WPTR, dev_priv->ring.tail );		    \  } while (0)  #define OUT_RING( x ) do {						\ @@ -760,6 +815,4 @@ do {									\  } while (0) -#define RADEON_PERFORMANCE_BOXES	0 -  #endif /* __RADEON_DRV_H__ */ diff --git a/shared-core/radeon_state.c b/shared-core/radeon_state.c index 1cc6bde8..7f84e739 100644 --- a/shared-core/radeon_state.c +++ b/shared-core/radeon_state.c @@ -239,18 +239,50 @@ static struct {  	{ RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" },  	{ RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" },  	{ RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" }, +	{ R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0" }, +	{ R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1" }, +	{ R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2" }, +	{ R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3" }, +	{ R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4" }, +	{ R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5" }, +	{ R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6" }, +	{ R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7" }, +	{ R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0" }, +	{ R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0" }, +	{ R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0" }, +	{ R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL" }, +	{ R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0" }, +	{ R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2" }, +	{ R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL" }, +	{ R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0" }, +	{ R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1" }, +	{ R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2" }, +	{ R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3" }, +	{ R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4" }, +	{ R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5" }, +	{ R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0" }, +	{ R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1" }, +	{ R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2" }, +	{ R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3" }, +	{ R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4" }, +	{ R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5" }, +	{ R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL" }, +	{ R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL" }, +	{ R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3" }, +	{ R200_PP_CNTL_X, 1, "R200_PP_CNTL_X" },  +	{ R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET" },  +	{ R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL" },  +	{ R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0" },  +	{ R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1" },  +	{ R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2" },  +	{ R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS" },  +	{ R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL" },  +	{ R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE" },  +	{ R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0" },  }; - - - - - - - -#if RADEON_PERFORMANCE_BOXES  /* ================================================================   * Performance monitoring functions   */ @@ -259,10 +291,12 @@ static void radeon_clear_box( drm_radeon_private_t *dev_priv,  			      int x, int y, int w, int h,  			      int r, int g, int b )  { -	u32 pitch, offset;  	u32 color;  	RING_LOCALS; +	x += dev_priv->sarea_priv->boxes[0].x1; +	y += dev_priv->sarea_priv->boxes[0].y1; +  	switch ( dev_priv->color_fmt ) {  	case RADEON_COLOR_FORMAT_RGB565:  		color = (((r & 0xf8) << 8) | @@ -275,8 +309,11 @@ static void radeon_clear_box( drm_radeon_private_t *dev_priv,  		break;  	} -	offset = dev_priv->back_offset; -	pitch = dev_priv->back_pitch >> 3; +	BEGIN_RING( 4 ); +	RADEON_WAIT_UNTIL_3D_IDLE();		 +	OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) ); +	OUT_RING( 0xffffffff ); +	ADVANCE_RING();  	BEGIN_RING( 6 ); @@ -288,7 +325,12 @@ static void radeon_clear_box( drm_radeon_private_t *dev_priv,  		  RADEON_ROP3_P |  		  RADEON_GMC_CLR_CMP_CNTL_DIS ); -	OUT_RING( (pitch << 22) | (offset >> 5) ); + 	if ( dev_priv->page_flipping && dev_priv->current_page == 1 ) {  +		OUT_RING( dev_priv->front_pitch_offset ); + 	} else {	  +		OUT_RING( dev_priv->back_pitch_offset ); + 	}  +  	OUT_RING( color );  	OUT_RING( (x << 16) | y ); @@ -299,16 +341,57 @@ static void radeon_clear_box( drm_radeon_private_t *dev_priv,  static void radeon_cp_performance_boxes( drm_radeon_private_t *dev_priv )  { -	if ( atomic_read( &dev_priv->idle_count ) == 0 ) { -		radeon_clear_box( dev_priv, 64, 4, 8, 8, 0, 255, 0 ); -	} else { -		atomic_set( &dev_priv->idle_count, 0 ); +	/* Collapse various things into a wait flag -- trying to +	 * guess if userspase slept -- better just to have them tell us. +	 */ +	if (dev_priv->stats.last_frame_reads > 1 || +	    dev_priv->stats.last_clear_reads > dev_priv->stats.clears) { +		dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;  	} -} -#endif +	if (dev_priv->stats.freelist_loops) { +		dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; +	} +	/* Purple box for page flipping +	 */ +	if ( dev_priv->stats.boxes & RADEON_BOX_FLIP )  +		radeon_clear_box( dev_priv, 4, 4, 8, 8, 255, 0, 255 ); + +	/* Red box if we have to wait for idle at any point +	 */ +	if ( dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE )  +		radeon_clear_box( dev_priv, 16, 4, 8, 8, 255, 0, 0 ); + +	/* Blue box: lost context? +	 */ +	/* Yellow box for texture swaps +	 */ +	if ( dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD )  +		radeon_clear_box( dev_priv, 40, 4, 8, 8, 255, 255, 0 ); + +	/* Green box if hardware never idles (as far as we can tell) +	 */ +	if ( !(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE) )  +		radeon_clear_box( dev_priv, 64, 4, 8, 8, 0, 255, 0 ); + + +	/* Draw bars indicating number of buffers allocated  +	 * (not a great measure, easily confused) +	 */ +	if (dev_priv->stats.requested_bufs) { +		if (dev_priv->stats.requested_bufs > 100) +			dev_priv->stats.requested_bufs = 100; + +		radeon_clear_box( dev_priv, 4, 16,   +				  dev_priv->stats.requested_bufs, 4, +				  196, 128, 128 ); +	} + +	memset( &dev_priv->stats, 0, sizeof(dev_priv->stats) ); + +}  /* ================================================================   * CP command dispatch functions   */ @@ -328,6 +411,8 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,  	RING_LOCALS;  	DRM_DEBUG( "flags = 0x%x\n", flags ); +	dev_priv->stats.clears++; +  	if ( dev_priv->page_flipping && dev_priv->current_page == 1 ) {  		unsigned int tmp = flags; @@ -336,120 +421,251 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,  		if ( tmp & RADEON_BACK )  flags |= RADEON_FRONT;  	} +	if ( flags & (RADEON_FRONT | RADEON_BACK) ) { + +		BEGIN_RING( 4 ); + +		/* Ensure the 3D stream is idle before doing a +		 * 2D fill to clear the front or back buffer. +		 */ +		RADEON_WAIT_UNTIL_3D_IDLE(); +		 +		OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) ); +		OUT_RING( clear->color_mask ); + +		ADVANCE_RING(); + +		/* Make sure we restore the 3D state next time. +		 */ +		dev_priv->sarea_priv->ctx_owner = 0; + +		for ( i = 0 ; i < nbox ; i++ ) { +			int x = pbox[i].x1; +			int y = pbox[i].y1; +			int w = pbox[i].x2 - x; +			int h = pbox[i].y2 - y; + +			DRM_DEBUG( "dispatch clear %d,%d-%d,%d flags 0x%x\n", +				   x, y, w, h, flags ); + +			if ( flags & RADEON_FRONT ) { +				BEGIN_RING( 6 ); +				 +				OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) ); +				OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL | +					  RADEON_GMC_BRUSH_SOLID_COLOR | +					  (dev_priv->color_fmt << 8) | +					  RADEON_GMC_SRC_DATATYPE_COLOR | +					  RADEON_ROP3_P | +					  RADEON_GMC_CLR_CMP_CNTL_DIS ); + +				OUT_RING( dev_priv->front_pitch_offset ); +				OUT_RING( clear->clear_color ); +				 +				OUT_RING( (x << 16) | y ); +				OUT_RING( (w << 16) | h ); +				 +				ADVANCE_RING(); +			} +			 +			if ( flags & RADEON_BACK ) { +				BEGIN_RING( 6 ); +				 +				OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) ); +				OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL | +					  RADEON_GMC_BRUSH_SOLID_COLOR | +					  (dev_priv->color_fmt << 8) | +					  RADEON_GMC_SRC_DATATYPE_COLOR | +					  RADEON_ROP3_P | +					  RADEON_GMC_CLR_CMP_CNTL_DIS ); +				 +				OUT_RING( dev_priv->back_pitch_offset ); +				OUT_RING( clear->clear_color ); + +				OUT_RING( (x << 16) | y ); +				OUT_RING( (w << 16) | h ); + +				ADVANCE_RING(); +			} +		} +	} +  	/* We have to clear the depth and/or stencil buffers by  	 * rendering a quad into just those buffers.  Thus, we have to  	 * make sure the 3D engine is configured correctly.  	 */ -	if ( flags & (RADEON_DEPTH | RADEON_STENCIL) ) { -		rb3d_cntl = depth_clear->rb3d_cntl; +	if ( dev_priv->is_r200 && +	     (flags & (RADEON_DEPTH | RADEON_STENCIL)) ) { -		if ( flags & RADEON_DEPTH ) { -			rb3d_cntl |=  RADEON_Z_ENABLE; -		} else { -			rb3d_cntl &= ~RADEON_Z_ENABLE; -		} +		int tempPP_CNTL; +		int tempRE_CNTL; +		int tempRB3D_CNTL; +		int tempRB3D_ZSTENCILCNTL; +		int tempRB3D_STENCILREFMASK; +		int tempRB3D_PLANEMASK; +		int tempSE_CNTL; +		int tempSE_VTE_CNTL; +		int tempSE_VTX_FMT_0; +		int tempSE_VTX_FMT_1; +		int tempSE_VAP_CNTL; +		int tempRE_AUX_SCISSOR_CNTL; -		if ( flags & RADEON_STENCIL ) { -			rb3d_cntl |=  RADEON_STENCIL_ENABLE; -			rb3d_stencilrefmask = clear->depth_mask; /* misnamed field */ -		} else { -			rb3d_cntl &= ~RADEON_STENCIL_ENABLE; -			rb3d_stencilrefmask = 0x00000000; -		} -	} +		tempPP_CNTL = 0; +		tempRE_CNTL = 0; -	for ( i = 0 ; i < nbox ; i++ ) { -		int x = pbox[i].x1; -		int y = pbox[i].y1; -		int w = pbox[i].x2 - x; -		int h = pbox[i].y2 - y; +		tempRB3D_CNTL = depth_clear->rb3d_cntl; +		tempRB3D_CNTL &= ~(1<<15); /* unset radeon magic flag */ -		DRM_DEBUG( "dispatch clear %d,%d-%d,%d flags 0x%x\n", -			   x, y, w, h, flags ); +		tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl; +		tempRB3D_STENCILREFMASK = 0x0; -		if ( flags & (RADEON_FRONT | RADEON_BACK) ) { -			BEGIN_RING( 4 ); +		tempSE_CNTL = depth_clear->se_cntl; -			/* Ensure the 3D stream is idle before doing a -			 * 2D fill to clear the front or back buffer. -			 */ -			RADEON_WAIT_UNTIL_3D_IDLE(); -			OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) ); -			OUT_RING( clear->color_mask ); -			ADVANCE_RING(); +		/* Disable TCL */ -			/* Make sure we restore the 3D state next time. -			 */ -			dev_priv->sarea_priv->ctx_owner = 0; -		} +		tempSE_VAP_CNTL = (/* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */ +				   (0x9 << SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT)); -		if ( flags & RADEON_FRONT ) { -			BEGIN_RING( 6 ); +		tempRB3D_PLANEMASK = 0x0; -			OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) ); -			OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL | -				  RADEON_GMC_BRUSH_SOLID_COLOR | -				  (dev_priv->color_fmt << 8) | -				  RADEON_GMC_SRC_DATATYPE_COLOR | -				  RADEON_ROP3_P | -				  RADEON_GMC_CLR_CMP_CNTL_DIS ); +		tempRE_AUX_SCISSOR_CNTL = 0x0; -			OUT_RING( dev_priv->front_pitch_offset ); -			OUT_RING( clear->clear_color ); +		tempSE_VTE_CNTL = +			SE_VTE_CNTL__VTX_XY_FMT_MASK | +			SE_VTE_CNTL__VTX_Z_FMT_MASK; -			OUT_RING( (x << 16) | y ); -			OUT_RING( (w << 16) | h ); +		/* Vertex format (X, Y, Z, W)*/ +		tempSE_VTX_FMT_0 = +			SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK | +			SE_VTX_FMT_0__VTX_W0_PRESENT_MASK; +		tempSE_VTX_FMT_1 = 0x0; -			ADVANCE_RING(); + +		/*  +		 * Depth buffer specific enables  +		 */ +		if (flags & RADEON_DEPTH) { +			/* Enable depth buffer */ +			tempRB3D_CNTL |= RADEON_Z_ENABLE; +		} else { +			/* Disable depth buffer */ +			tempRB3D_CNTL &= ~RADEON_Z_ENABLE;  		} -		if ( flags & RADEON_BACK ) { -			BEGIN_RING( 6 ); +		/*  +		 * Stencil buffer specific enables +		 */ +		if ( flags & RADEON_STENCIL ) { +			tempRB3D_CNTL |=  RADEON_STENCIL_ENABLE; +			tempRB3D_STENCILREFMASK = clear->depth_mask;  +		} else { +			tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE; +			tempRB3D_STENCILREFMASK = 0x00000000; +		} -			OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) ); -			OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL | -				  RADEON_GMC_BRUSH_SOLID_COLOR | -				  (dev_priv->color_fmt << 8) | -				  RADEON_GMC_SRC_DATATYPE_COLOR | -				  RADEON_ROP3_P | -				  RADEON_GMC_CLR_CMP_CNTL_DIS ); +		BEGIN_RING( 26 ); +		RADEON_WAIT_UNTIL_2D_IDLE(); + +		OUT_RING_REG( RADEON_PP_CNTL, tempPP_CNTL ); +		OUT_RING_REG( R200_RE_CNTL, tempRE_CNTL ); +		OUT_RING_REG( RADEON_RB3D_CNTL, tempRB3D_CNTL ); +		OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL, +			      tempRB3D_ZSTENCILCNTL ); +		OUT_RING_REG( RADEON_RB3D_STENCILREFMASK,  +			      tempRB3D_STENCILREFMASK ); +		OUT_RING_REG( RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK ); +		OUT_RING_REG( RADEON_SE_CNTL, tempSE_CNTL ); +		OUT_RING_REG( R200_SE_VTE_CNTL, tempSE_VTE_CNTL ); +		OUT_RING_REG( R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0 ); +		OUT_RING_REG( R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1 ); +		OUT_RING_REG( R200_SE_VAP_CNTL, tempSE_VAP_CNTL ); +		OUT_RING_REG( R200_RE_AUX_SCISSOR_CNTL,  +			      tempRE_AUX_SCISSOR_CNTL ); +		ADVANCE_RING(); -			OUT_RING( dev_priv->back_pitch_offset ); -			OUT_RING( clear->clear_color ); +		/* Make sure we restore the 3D state next time. +		 */ +		dev_priv->sarea_priv->ctx_owner = 0; -			OUT_RING( (x << 16) | y ); -			OUT_RING( (w << 16) | h ); +		for ( i = 0 ; i < nbox ; i++ ) { +			 +			/* Funny that this should be required --  +			 *  sets top-left? +			 */ +			radeon_emit_clip_rect( dev_priv, +					       &sarea_priv->boxes[i] ); +			BEGIN_RING( 14 ); +			OUT_RING( CP_PACKET3( R200_3D_DRAW_IMMD_2, 12 ) ); +			OUT_RING( (RADEON_PRIM_TYPE_RECT_LIST | +				   RADEON_PRIM_WALK_RING | +				   (3 << RADEON_NUM_VERTICES_SHIFT)) ); +			OUT_RING( depth_boxes[i].ui[CLEAR_X1] ); +			OUT_RING( depth_boxes[i].ui[CLEAR_Y1] ); +			OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] ); +			OUT_RING( 0x3f800000 ); +			OUT_RING( depth_boxes[i].ui[CLEAR_X1] ); +			OUT_RING( depth_boxes[i].ui[CLEAR_Y2] ); +			OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] ); +			OUT_RING( 0x3f800000 ); +			OUT_RING( depth_boxes[i].ui[CLEAR_X2] ); +			OUT_RING( depth_boxes[i].ui[CLEAR_Y2] ); +			OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] ); +			OUT_RING( 0x3f800000 );  			ADVANCE_RING();  		} +	}  +	else if ( (flags & (RADEON_DEPTH | RADEON_STENCIL)) ) { -		if ( flags & (RADEON_DEPTH | RADEON_STENCIL) ) { +		rb3d_cntl = depth_clear->rb3d_cntl; -			radeon_emit_clip_rect( dev_priv, -					       &sarea_priv->boxes[i] ); +		if ( flags & RADEON_DEPTH ) { +			rb3d_cntl |=  RADEON_Z_ENABLE; +		} else { +			rb3d_cntl &= ~RADEON_Z_ENABLE; +		} -			BEGIN_RING( 28 ); +		if ( flags & RADEON_STENCIL ) { +			rb3d_cntl |=  RADEON_STENCIL_ENABLE; +			rb3d_stencilrefmask = clear->depth_mask; /* misnamed field */ +		} else { +			rb3d_cntl &= ~RADEON_STENCIL_ENABLE; +			rb3d_stencilrefmask = 0x00000000; +		} -			RADEON_WAIT_UNTIL_2D_IDLE(); +		BEGIN_RING( 13 ); +		RADEON_WAIT_UNTIL_2D_IDLE(); -			OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 1 ) ); -			OUT_RING( 0x00000000 ); -			OUT_RING( rb3d_cntl ); +		OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 1 ) ); +		OUT_RING( 0x00000000 ); +		OUT_RING( rb3d_cntl ); +		 +		OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL, +			      depth_clear->rb3d_zstencilcntl ); +		OUT_RING_REG( RADEON_RB3D_STENCILREFMASK, +			      rb3d_stencilrefmask ); +		OUT_RING_REG( RADEON_RB3D_PLANEMASK, +			      0x00000000 ); +		OUT_RING_REG( RADEON_SE_CNTL, +			      depth_clear->se_cntl ); +		ADVANCE_RING(); -			OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL, -				      depth_clear->rb3d_zstencilcntl ); -			OUT_RING_REG( RADEON_RB3D_STENCILREFMASK, -				      rb3d_stencilrefmask ); -			OUT_RING_REG( RADEON_RB3D_PLANEMASK, -				      0x00000000 ); -			OUT_RING_REG( RADEON_SE_CNTL, -				      depth_clear->se_cntl ); +		/* Make sure we restore the 3D state next time. +		 */ +		dev_priv->sarea_priv->ctx_owner = 0; -			/* Radeon 7500 doesn't like vertices without -			 * color. +		for ( i = 0 ; i < nbox ; i++ ) { +			 +			/* Funny that this should be required --  +			 *  sets top-left?  			 */ +			radeon_emit_clip_rect( dev_priv, +					       &sarea_priv->boxes[i] ); + +			BEGIN_RING( 15 ); +  			OUT_RING( CP_PACKET3( RADEON_3D_DRAW_IMMD, 13 ) );  			OUT_RING( RADEON_VTX_Z_PRESENT |  				  RADEON_VTX_PKCOLOR_PRESENT); @@ -459,6 +675,7 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,  				   RADEON_VTX_FMT_RADEON_MODE |  				   (3 << RADEON_NUM_VERTICES_SHIFT)) ); +  			OUT_RING( depth_boxes[i].ui[CLEAR_X1] );  			OUT_RING( depth_boxes[i].ui[CLEAR_Y1] );  			OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] ); @@ -475,10 +692,6 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,  			OUT_RING( 0x0 );  			ADVANCE_RING(); - -			/* Make sure we restore the 3D state next time. -			 */ -			dev_priv->sarea_priv->ctx_owner = 0;  		}  	} @@ -506,11 +719,12 @@ static void radeon_cp_dispatch_swap( drm_device_t *dev )  	RING_LOCALS;  	DRM_DEBUG( "\n" ); -#if RADEON_PERFORMANCE_BOXES +  	/* Do some trivial performance monitoring...  	 */ -	radeon_cp_performance_boxes( dev_priv ); -#endif +	if (dev_priv->do_boxes) +		radeon_cp_performance_boxes( dev_priv ); +  	/* Wait for the 3D stream to idle before dispatching the bitblt.  	 * This will prevent data corruption between the two streams. @@ -579,20 +793,21 @@ static void radeon_cp_dispatch_flip( drm_device_t *dev )  {  	drm_radeon_private_t *dev_priv = dev->dev_private;  	RING_LOCALS; -	DRM_DEBUG( "page=%d\n", dev_priv->current_page ); +	DRM_DEBUG( "%s: page=%d pfCurrentPage=%d\n",  +		__FUNCTION__,  +		dev_priv->current_page, +		dev_priv->sarea_priv->pfCurrentPage); -#if RADEON_PERFORMANCE_BOXES  	/* Do some trivial performance monitoring...  	 */ -	radeon_cp_performance_boxes( dev_priv ); -#endif +	if (dev_priv->do_boxes) { +		dev_priv->stats.boxes |= RADEON_BOX_FLIP; +		radeon_cp_performance_boxes( dev_priv ); +	}  	BEGIN_RING( 4 );  	RADEON_WAIT_UNTIL_3D_IDLE(); -/* -	RADEON_WAIT_UNTIL_PAGE_FLIPPED(); -*/  	OUT_RING( CP_PACKET0( RADEON_CRTC_OFFSET, 0 ) );  	if ( dev_priv->current_page == 0 ) { @@ -847,6 +1062,8 @@ static int radeon_cp_dispatch_texture( drm_device_t *dev,  	int ret = 0, i;  	RING_LOCALS; +	dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD; +  	/* FIXME: Be smarter about this...  	 */  	buf = radeon_freelist_get( dev ); @@ -1611,6 +1828,30 @@ static __inline__ int radeon_emit_scalars(  	return 0;  } +/* God this is ugly + */ +static __inline__ int radeon_emit_scalars2(  +	drm_radeon_private_t *dev_priv, +	drm_radeon_cmd_header_t header, +	drm_radeon_cmd_buffer_t *cmdbuf ) +{ +	int sz = header.scalars.count; +	int *data = (int *)cmdbuf->buf; +	int start = ((unsigned int)header.scalars.offset) + 0x100; +	int stride = header.scalars.stride; +	RING_LOCALS; + +	BEGIN_RING( 3+sz ); +	OUT_RING( CP_PACKET0( RADEON_SE_TCL_SCALAR_INDX_REG, 0 ) ); +	OUT_RING( start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); +	OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_SCALAR_DATA_REG, sz-1 ) ); +	OUT_RING_USER_TABLE( data, sz ); +	ADVANCE_RING(); +	cmdbuf->buf += sz * sizeof(int); +	cmdbuf->bufsz -= sz * sizeof(int); +	return 0; +} +  static __inline__ int radeon_emit_vectors(   	drm_radeon_private_t *dev_priv,  	drm_radeon_cmd_header_t header, @@ -1775,6 +2016,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )  		switch (header.header.cmd_type) {  		case RADEON_CMD_PACKET:  +			DRM_DEBUG("RADEON_CMD_PACKET\n");  			if (radeon_emit_packets( dev_priv, header, &cmdbuf )) {  				DRM_ERROR("radeon_emit_packets failed\n");  				return DRM_ERR(EINVAL); @@ -1782,6 +2024,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )  			break;  		case RADEON_CMD_SCALARS: +			DRM_DEBUG("RADEON_CMD_SCALARS\n");  			if (radeon_emit_scalars( dev_priv, header, &cmdbuf )) {  				DRM_ERROR("radeon_emit_scalars failed\n");  				return DRM_ERR(EINVAL); @@ -1789,6 +2032,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )  			break;  		case RADEON_CMD_VECTORS: +			DRM_DEBUG("RADEON_CMD_VECTORS\n");  			if (radeon_emit_vectors( dev_priv, header, &cmdbuf )) {  				DRM_ERROR("radeon_emit_vectors failed\n");  				return DRM_ERR(EINVAL); @@ -1796,6 +2040,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )  			break;  		case RADEON_CMD_DMA_DISCARD: +			DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");  			idx = header.dma.buf_idx;  			if ( idx < 0 || idx >= dma->buf_count ) {  				DRM_ERROR( "buffer index %d (of %d max)\n", @@ -1813,6 +2058,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )  			break;  		case RADEON_CMD_PACKET3: +			DRM_DEBUG("RADEON_CMD_PACKET3\n");  			if (radeon_emit_packet3( dev, &cmdbuf )) {  				DRM_ERROR("radeon_emit_packet3 failed\n");  				return DRM_ERR(EINVAL); @@ -1820,12 +2066,20 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )  			break;  		case RADEON_CMD_PACKET3_CLIP: +			DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");  			if (radeon_emit_packet3_cliprect( dev, &cmdbuf, orig_nbox )) {  				DRM_ERROR("radeon_emit_packet3_clip failed\n");  				return DRM_ERR(EINVAL);  			}  			break; +		case RADEON_CMD_SCALARS2: +			DRM_DEBUG("RADEON_CMD_SCALARS2\n"); +			if (radeon_emit_scalars2( dev_priv, header, &cmdbuf )) { +				DRM_ERROR("radeon_emit_scalars2 failed\n"); +				return DRM_ERR(EINVAL); +			} +			break;  		default:  			DRM_ERROR("bad cmd_type %d at %p\n",   				  header.header.cmd_type, @@ -1835,6 +2089,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )  	} +	DRM_DEBUG("DONE\n");  	COMMIT_RING();  	return 0;  } @@ -1863,12 +2118,14 @@ int radeon_cp_getparam( DRM_IOCTL_ARGS )  		value = dev_priv->agp_buffers_offset;  		break;  	case RADEON_PARAM_LAST_FRAME: +		dev_priv->stats.last_frame_reads++;  		value = GET_SCRATCH( 0 );  		break;  	case RADEON_PARAM_LAST_DISPATCH:  		value = GET_SCRATCH( 1 );  		break;  	case RADEON_PARAM_LAST_CLEAR: +		dev_priv->stats.last_clear_reads++;  		value = GET_SCRATCH( 2 );  		break;  	default: diff --git a/shared/radeon.h b/shared/radeon.h index ad36c868..885e4297 100644 --- a/shared/radeon.h +++ b/shared/radeon.h @@ -25,6 +25,7 @@   *   * Authors:   *    Gareth Hughes <gareth@valinux.com> + *    Keith Whitwell <keith@tungstengraphics.com>   */  #ifndef __RADEON_H__ @@ -43,14 +44,14 @@  #define __HAVE_SG		1  #define __HAVE_PCI_DMA		1 -#define DRIVER_AUTHOR		"Gareth Hughes, VA Linux Systems Inc." +#define DRIVER_AUTHOR		"Gareth Hughes, Keith Whitwell, others."  #define DRIVER_NAME		"radeon"  #define DRIVER_DESC		"ATI Radeon" -#define DRIVER_DATE		"20020714" +#define DRIVER_DATE		"20020611"  #define DRIVER_MAJOR		1 -#define DRIVER_MINOR		4 +#define DRIVER_MINOR		5  #define DRIVER_PATCHLEVEL	0  /* Interface history: @@ -63,6 +64,10 @@   *     - Add support for new radeon packets (keith)   *     - Add getparam ioctl (keith)   *     - Add flip-buffers ioctl, deprecate fullscreen foo (keith). + * 1.4 - Add scratch registers to get_param ioctl. + * 1.5 - Add r200 packets to cmdbuf ioctl + *     - Add r200 function to init ioctl + *     - Add 'scalar2' instruction to cmdbuf   */  #define DRIVER_IOCTLS							     \   [DRM_IOCTL_NR(DRM_IOCTL_DMA)]               = { radeon_cp_buffers,  1, 0 }, \ @@ -104,15 +109,6 @@   */  #define __HAVE_DMA		1 -#if 0 -/* GH: Remove this for now... */ -#define __HAVE_DMA_QUIESCENT	1 -#define DRIVER_DMA_QUIESCENT() do {					\ -	drm_radeon_private_t *dev_priv = dev->dev_private;		\ -	return radeon_do_cp_idle( dev_priv );				\ -} while (0) -#endif -  /* Buffer customization:   */  #define DRIVER_BUF_PRIV_T	drm_radeon_buf_priv_t diff --git a/shared/radeon_cp.c b/shared/radeon_cp.c index 8250c09b..01069e49 100644 --- a/shared/radeon_cp.c +++ b/shared/radeon_cp.c @@ -44,6 +44,266 @@  /* CP microcode (from ATI) */ +static u32 R200_cp_microcode[][2] = { +	{ 0x21007000, 0000000000 },         +	{ 0x20007000, 0000000000 },  +	{ 0x000000ab, 0x00000004 }, +	{ 0x000000af, 0x00000004 }, +	{ 0x66544a49, 0000000000 }, +	{ 0x49494174, 0000000000 }, +	{ 0x54517d83, 0000000000 }, +	{ 0x498d8b64, 0000000000 }, +	{ 0x49494949, 0000000000 }, +	{ 0x49da493c, 0000000000 }, +	{ 0x49989898, 0000000000 }, +	{ 0xd34949d5, 0000000000 }, +	{ 0x9dc90e11, 0000000000 }, +	{ 0xce9b9b9b, 0000000000 }, +	{ 0x000f0000, 0x00000016 }, +	{ 0x352e232c, 0000000000 }, +	{ 0x00000013, 0x00000004 }, +	{ 0x000f0000, 0x00000016 }, +	{ 0x352e272c, 0000000000 }, +	{ 0x000f0001, 0x00000016 }, +	{ 0x3239362f, 0000000000 }, +	{ 0x000077ef, 0x00000002 }, +	{ 0x00061000, 0x00000002 }, +	{ 0x00000020, 0x0000001a }, +	{ 0x00004000, 0x0000001e }, +	{ 0x00061000, 0x00000002 }, +	{ 0x00000020, 0x0000001a }, +	{ 0x00004000, 0x0000001e }, +	{ 0x00061000, 0x00000002 }, +	{ 0x00000020, 0x0000001a }, +	{ 0x00004000, 0x0000001e }, +	{ 0x00000016, 0x00000004 }, +	{ 0x0003802a, 0x00000002 }, +	{ 0x040067e0, 0x00000002 }, +	{ 0x00000016, 0x00000004 }, +	{ 0x000077e0, 0x00000002 }, +	{ 0x00065000, 0x00000002 }, +	{ 0x000037e1, 0x00000002 }, +	{ 0x040067e1, 0x00000006 }, +	{ 0x000077e0, 0x00000002 }, +	{ 0x000077e1, 0x00000002 }, +	{ 0x000077e1, 0x00000006 }, +	{ 0xffffffff, 0000000000 }, +	{ 0x10000000, 0000000000 }, +	{ 0x0003802a, 0x00000002 }, +	{ 0x040067e0, 0x00000006 }, +	{ 0x00007675, 0x00000002 }, +	{ 0x00007676, 0x00000002 }, +	{ 0x00007677, 0x00000002 }, +	{ 0x00007678, 0x00000006 }, +	{ 0x0003802b, 0x00000002 }, +	{ 0x04002676, 0x00000002 }, +	{ 0x00007677, 0x00000002 }, +	{ 0x00007678, 0x00000006 }, +	{ 0x0000002e, 0x00000018 }, +	{ 0x0000002e, 0x00000018 }, +	{ 0000000000, 0x00000006 }, +	{ 0x0000002f, 0x00000018 }, +	{ 0x0000002f, 0x00000018 }, +	{ 0000000000, 0x00000006 }, +	{ 0x01605000, 0x00000002 }, +	{ 0x00065000, 0x00000002 }, +	{ 0x00098000, 0x00000002 }, +	{ 0x00061000, 0x00000002 }, +	{ 0x64c0603d, 0x00000004 }, +	{ 0x00080000, 0x00000016 }, +	{ 0000000000, 0000000000 }, +	{ 0x0400251d, 0x00000002 }, +	{ 0x00007580, 0x00000002 }, +	{ 0x00067581, 0x00000002 }, +	{ 0x04002580, 0x00000002 }, +	{ 0x00067581, 0x00000002 }, +	{ 0x00000046, 0x00000004 }, +	{ 0x00005000, 0000000000 }, +	{ 0x00061000, 0x00000002 }, +	{ 0x0000750e, 0x00000002 }, +	{ 0x00019000, 0x00000002 }, +	{ 0x00011055, 0x00000014 }, +	{ 0x00000055, 0x00000012 }, +	{ 0x0400250f, 0x00000002 }, +	{ 0x0000504a, 0x00000004 }, +	{ 0x00007565, 0x00000002 }, +	{ 0x00007566, 0x00000002 }, +	{ 0x00000051, 0x00000004 }, +	{ 0x01e655b4, 0x00000002 }, +	{ 0x4401b0dc, 0x00000002 }, +	{ 0x01c110dc, 0x00000002 }, +	{ 0x2666705d, 0x00000018 }, +	{ 0x040c2565, 0x00000002 }, +	{ 0x0000005d, 0x00000018 }, +	{ 0x04002564, 0x00000002 }, +	{ 0x00007566, 0x00000002 }, +	{ 0x00000054, 0x00000004 }, +	{ 0x00401060, 0x00000008 }, +	{ 0x00101000, 0x00000002 }, +	{ 0x000d80ff, 0x00000002 }, +	{ 0x00800063, 0x00000008 }, +	{ 0x000f9000, 0x00000002 }, +	{ 0x000e00ff, 0x00000002 }, +	{ 0000000000, 0x00000006 }, +	{ 0x00000080, 0x00000018 }, +	{ 0x00000054, 0x00000004 }, +	{ 0x00007576, 0x00000002 }, +	{ 0x00065000, 0x00000002 }, +	{ 0x00009000, 0x00000002 }, +	{ 0x00041000, 0x00000002 }, +	{ 0x0c00350e, 0x00000002 }, +	{ 0x00049000, 0x00000002 }, +	{ 0x00051000, 0x00000002 }, +	{ 0x01e785f8, 0x00000002 }, +	{ 0x00200000, 0x00000002 }, +	{ 0x00600073, 0x0000000c }, +	{ 0x00007563, 0x00000002 }, +	{ 0x006075f0, 0x00000021 }, +	{ 0x20007068, 0x00000004 }, +	{ 0x00005068, 0x00000004 }, +	{ 0x00007576, 0x00000002 }, +	{ 0x00007577, 0x00000002 }, +	{ 0x0000750e, 0x00000002 }, +	{ 0x0000750f, 0x00000002 }, +	{ 0x00a05000, 0x00000002 }, +	{ 0x00600076, 0x0000000c }, +	{ 0x006075f0, 0x00000021 }, +	{ 0x000075f8, 0x00000002 }, +	{ 0x00000076, 0x00000004 }, +	{ 0x000a750e, 0x00000002 }, +	{ 0x0020750f, 0x00000002 }, +	{ 0x00600079, 0x00000004 }, +	{ 0x00007570, 0x00000002 }, +	{ 0x00007571, 0x00000002 }, +	{ 0x00007572, 0x00000006 }, +	{ 0x00005000, 0x00000002 }, +	{ 0x00a05000, 0x00000002 }, +	{ 0x00007568, 0x00000002 }, +	{ 0x00061000, 0x00000002 }, +	{ 0x00000084, 0x0000000c }, +	{ 0x00058000, 0x00000002 }, +	{ 0x0c607562, 0x00000002 }, +	{ 0x00000086, 0x00000004 }, +	{ 0x00600085, 0x00000004 }, +	{ 0x400070dd, 0000000000 }, +	{ 0x000380dd, 0x00000002 }, +	{ 0x00000093, 0x0000001c }, +	{ 0x00065095, 0x00000018 }, +	{ 0x040025bb, 0x00000002 }, +	{ 0x00061096, 0x00000018 }, +	{ 0x040075bc, 0000000000 }, +	{ 0x000075bb, 0x00000002 }, +	{ 0x000075bc, 0000000000 }, +	{ 0x00090000, 0x00000006 }, +	{ 0x00090000, 0x00000002 }, +	{ 0x000d8002, 0x00000006 }, +	{ 0x00005000, 0x00000002 }, +	{ 0x00007821, 0x00000002 }, +	{ 0x00007800, 0000000000 }, +	{ 0x00007821, 0x00000002 }, +	{ 0x00007800, 0000000000 }, +	{ 0x01665000, 0x00000002 }, +	{ 0x000a0000, 0x00000002 }, +	{ 0x000671cc, 0x00000002 }, +	{ 0x0286f1cd, 0x00000002 }, +	{ 0x000000a3, 0x00000010 }, +	{ 0x21007000, 0000000000 }, +	{ 0x000000aa, 0x0000001c }, +	{ 0x00065000, 0x00000002 }, +	{ 0x000a0000, 0x00000002 }, +	{ 0x00061000, 0x00000002 }, +	{ 0x000b0000, 0x00000002 }, +	{ 0x38067000, 0x00000002 }, +	{ 0x000a00a6, 0x00000004 }, +	{ 0x20007000, 0000000000 }, +	{ 0x01200000, 0x00000002 }, +	{ 0x20077000, 0x00000002 }, +	{ 0x01200000, 0x00000002 }, +	{ 0x20007000, 0000000000 }, +	{ 0x00061000, 0x00000002 }, +	{ 0x0120751b, 0x00000002 }, +	{ 0x8040750a, 0x00000002 }, +	{ 0x8040750b, 0x00000002 }, +	{ 0x00110000, 0x00000002 }, +	{ 0x000380dd, 0x00000002 }, +	{ 0x000000bd, 0x0000001c }, +	{ 0x00061096, 0x00000018 }, +	{ 0x844075bd, 0x00000002 }, +	{ 0x00061095, 0x00000018 }, +	{ 0x840075bb, 0x00000002 }, +	{ 0x00061096, 0x00000018 }, +	{ 0x844075bc, 0x00000002 }, +	{ 0x000000c0, 0x00000004 }, +	{ 0x804075bd, 0x00000002 }, +	{ 0x800075bb, 0x00000002 }, +	{ 0x804075bc, 0x00000002 }, +	{ 0x00108000, 0x00000002 }, +	{ 0x01400000, 0x00000002 }, +	{ 0x006000c4, 0x0000000c }, +	{ 0x20c07000, 0x00000020 }, +	{ 0x000000c6, 0x00000012 }, +	{ 0x00800000, 0x00000006 }, +	{ 0x0080751d, 0x00000006 }, +	{ 0x000025bb, 0x00000002 }, +	{ 0x000040c0, 0x00000004 }, +	{ 0x0000775c, 0x00000002 }, +	{ 0x00a05000, 0x00000002 }, +	{ 0x00661000, 0x00000002 }, +	{ 0x0460275d, 0x00000020 }, +	{ 0x00004000, 0000000000 }, +	{ 0x00007999, 0x00000002 }, +	{ 0x00a05000, 0x00000002 }, +	{ 0x00661000, 0x00000002 }, +	{ 0x0460299b, 0x00000020 }, +	{ 0x00004000, 0000000000 }, +	{ 0x01e00830, 0x00000002 }, +	{ 0x21007000, 0000000000 }, +	{ 0x00005000, 0x00000002 }, +	{ 0x00038042, 0x00000002 }, +	{ 0x040025e0, 0x00000002 }, +	{ 0x000075e1, 0000000000 }, +	{ 0x00000001, 0000000000 }, +	{ 0x000380d9, 0x00000002 }, +	{ 0x04007394, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +	{ 0000000000, 0000000000 }, +}; + +  static u32 radeon_cp_microcode[][2] = {  	{ 0x21007000, 0000000000 },  	{ 0x20007000, 0000000000 }, @@ -345,6 +605,8 @@ static int radeon_do_pixcache_flush( drm_radeon_private_t *dev_priv )  	u32 tmp;  	int i; +	dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; +  	tmp  = RADEON_READ( RADEON_RB2D_DSTCACHE_CTLSTAT );  	tmp |= RADEON_RB2D_DC_FLUSH_ALL;  	RADEON_WRITE( RADEON_RB2D_DSTCACHE_CTLSTAT, tmp ); @@ -369,6 +631,8 @@ static int radeon_do_wait_for_fifo( drm_radeon_private_t *dev_priv,  {  	int i; +	dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; +  	for ( i = 0 ; i < dev_priv->usec_timeout ; i++ ) {  		int slots = ( RADEON_READ( RADEON_RBBM_STATUS )  			      & RADEON_RBBM_FIFOCNT_MASK ); @@ -387,6 +651,8 @@ static int radeon_do_wait_for_idle( drm_radeon_private_t *dev_priv )  {  	int i, ret; +	dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; +  	ret = radeon_do_wait_for_fifo( dev_priv, 64 );  	if ( ret ) return ret; @@ -420,11 +686,26 @@ static void radeon_cp_load_microcode( drm_radeon_private_t *dev_priv )  	radeon_do_wait_for_idle( dev_priv );  	RADEON_WRITE( RADEON_CP_ME_RAM_ADDR, 0 ); -	for ( i = 0 ; i < 256 ; i++ ) { -		RADEON_WRITE( RADEON_CP_ME_RAM_DATAH, -			      radeon_cp_microcode[i][1] ); -		RADEON_WRITE( RADEON_CP_ME_RAM_DATAL, -			      radeon_cp_microcode[i][0] ); + +	if (dev_priv->is_r200) +	{ +		DRM_INFO("Loading R200 Microcode\n"); +		for ( i = 0 ; i < 256 ; i++ )  +		{ +			RADEON_WRITE( RADEON_CP_ME_RAM_DATAH, +				      R200_cp_microcode[i][1] ); +			RADEON_WRITE( RADEON_CP_ME_RAM_DATAL, +				      R200_cp_microcode[i][0] ); +		} +	} +	else +	{ +		for ( i = 0 ; i < 256 ; i++ ) { +			RADEON_WRITE( RADEON_CP_ME_RAM_DATAH, +				      radeon_cp_microcode[i][1] ); +			RADEON_WRITE( RADEON_CP_ME_RAM_DATAL, +				      radeon_cp_microcode[i][0] ); +		}  	}  } @@ -736,12 +1017,10 @@ static int radeon_do_init_cp( drm_device_t *dev, drm_radeon_init_t *init )  		return DRM_ERR(EINVAL);  	} +	dev_priv->is_r200 = (init->func == RADEON_INIT_R200_CP); +	dev_priv->do_boxes = 1;  	dev_priv->cp_mode = init->cp_mode; -	/* Simple idle check. -	 */ -	atomic_set( &dev_priv->idle_count, 0 ); -  	/* We don't support anything other than bus-mastering ring mode,  	 * but the ring can be in either AGP or PCI space for the ring  	 * read pointer. @@ -1028,6 +1307,7 @@ int radeon_cp_init( DRM_IOCTL_ARGS )  	switch ( init.func ) {  	case RADEON_INIT_CP: +	case RADEON_INIT_R200_CP:  		return radeon_do_init_cp( dev, &init );  	case RADEON_CLEANUP_CP:  		return radeon_do_cleanup_cp( dev ); @@ -1169,6 +1449,14 @@ int radeon_fullscreen( DRM_IOCTL_ARGS )   *   completed rendering.     *   * KW:  It's also a good way to find free buffers quickly. + * + * KW: Ideally this loop wouldn't exist, and freelist_get wouldn't + * sleep.  However, bugs in older versions of radeon_accel.c mean that + * we essentially have to do this, else old clients will break. + *  + * However, it does leave open a potential deadlock where all the + * buffers are held by other clients, which can't release them because + * they can't get the lock.     */  drm_buf_t *radeon_freelist_get( drm_device_t *dev ) @@ -1193,17 +1481,56 @@ drm_buf_t *radeon_freelist_get( drm_device_t *dev )  			buf_priv = buf->dev_private;  			if ( buf->pid == 0 || (buf->pending &&   					       buf_priv->age <= done_age) ) { +				dev_priv->stats.requested_bufs++;  				buf->pending = 0;  				return buf;  			}  			start = 0;  		} -		DRM_UDELAY( 1 ); + +		if (t) { +			DRM_UDELAY( 1 ); +			dev_priv->stats.freelist_loops++; +		}  	}  	DRM_ERROR( "returning NULL!\n" );  	return NULL;  } +#if 0 +drm_buf_t *radeon_freelist_get( drm_device_t *dev ) +{ +	drm_device_dma_t *dma = dev->dma; +	drm_radeon_private_t *dev_priv = dev->dev_private; +	drm_radeon_buf_priv_t *buf_priv; +	drm_buf_t *buf; +	int i, t; +	int start; +	u32 done_age = DRM_READ32(&dev_priv->scratch[1]); + +	if ( ++dev_priv->last_buf >= dma->buf_count ) +		dev_priv->last_buf = 0; + +	start = dev_priv->last_buf; +	dev_priv->stats.freelist_loops++; +	 +	for ( t = 0 ; t < 2 ; t++ ) { +		for ( i = start ; i < dma->buf_count ; i++ ) { +			buf = dma->buflist[i]; +			buf_priv = buf->dev_private; +			if ( buf->pid == 0 || (buf->pending &&  +					       buf_priv->age <= done_age) ) { +				dev_priv->stats.requested_bufs++; +				buf->pending = 0; +				return buf; +			} +		} +		start = 0; +	} + +	return NULL; +} +#endif  void radeon_freelist_reset( drm_device_t *dev )  { @@ -1228,11 +1555,23 @@ int radeon_wait_ring( drm_radeon_private_t *dev_priv, int n )  {  	drm_radeon_ring_buffer_t *ring = &dev_priv->ring;  	int i; +	u32 last_head = GET_RING_HEAD(ring);  	for ( i = 0 ; i < dev_priv->usec_timeout ; i++ ) { -		radeon_update_ring_snapshot( ring ); +		u32 head = GET_RING_HEAD(ring); + +		ring->space = (head - ring->tail) * sizeof(u32); +		if ( ring->space <= 0 ) +			ring->space += ring->size;  		if ( ring->space > n )  			return 0; +		 +		dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; + +		if (head != last_head) +			i = 0; +		last_head = head; +  		DRM_UDELAY( 1 );  	} @@ -1251,7 +1590,7 @@ static int radeon_cp_get_buffers( drm_device_t *dev, drm_dma_t *d )  	for ( i = d->granted_count ; i < d->request_count ; i++ ) {  		buf = radeon_freelist_get( dev ); -		if ( !buf ) return DRM_ERR(EAGAIN); +		if ( !buf ) return DRM_ERR(EBUSY); /* NOTE: broken client */  		buf->pid = DRM_CURRENTPID; diff --git a/shared/radeon_drm.h b/shared/radeon_drm.h index 3802e46c..6469bfb8 100644 --- a/shared/radeon_drm.h +++ b/shared/radeon_drm.h @@ -89,7 +89,47 @@  #define RADEON_EMIT_SE_ZBIAS_FACTOR                 18 /* zbias/2 */  #define RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT           19 /* tcl/11 */  #define RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED   20 /* material/17 */ -#define RADEON_MAX_STATE_PACKETS                    21 +#define R200_EMIT_PP_TXCBLEND_0                     21 /* tex0/4 */ +#define R200_EMIT_PP_TXCBLEND_1                     22 /* tex1/4 */ +#define R200_EMIT_PP_TXCBLEND_2                     23 /* tex2/4 */ +#define R200_EMIT_PP_TXCBLEND_3                     24 /* tex3/4 */ +#define R200_EMIT_PP_TXCBLEND_4                     25 /* tex4/4 */ +#define R200_EMIT_PP_TXCBLEND_5                     26 /* tex5/4 */ +#define R200_EMIT_PP_TXCBLEND_6                     27 /* /4 */ +#define R200_EMIT_PP_TXCBLEND_7                     28 /* /4 */ +#define R200_EMIT_TCL_LIGHT_MODEL_CTL_0             29 /* tcl/7 */ +#define R200_EMIT_TFACTOR_0                         30 /* tf/7 */ +#define R200_EMIT_VTX_FMT_0                         31 /* vtx/5 */ +#define R200_EMIT_VAP_CTL                           32 /* vap/1 */ +#define R200_EMIT_MATRIX_SELECT_0                   33 /* msl/5 */ +#define R200_EMIT_TEX_PROC_CTL_2                    34 /* tcg/5 */ +#define R200_EMIT_TCL_UCP_VERT_BLEND_CTL            35 /* tcl/1 */ +#define R200_EMIT_PP_TXFILTER_0                     36 /* tex0/6 */ +#define R200_EMIT_PP_TXFILTER_1                     37 /* tex1/6 */ +#define R200_EMIT_PP_TXFILTER_2                     38 /* tex2/6 */ +#define R200_EMIT_PP_TXFILTER_3                     39 /* tex3/6 */ +#define R200_EMIT_PP_TXFILTER_4                     40 /* tex4/6 */ +#define R200_EMIT_PP_TXFILTER_5                     41 /* tex5/6 */ +#define R200_EMIT_PP_TXOFFSET_0                     42 /* tex0/1 */ +#define R200_EMIT_PP_TXOFFSET_1                     43 /* tex1/1 */ +#define R200_EMIT_PP_TXOFFSET_2                     44 /* tex2/1 */ +#define R200_EMIT_PP_TXOFFSET_3                     45 /* tex3/1 */ +#define R200_EMIT_PP_TXOFFSET_4                     46 /* tex4/1 */ +#define R200_EMIT_PP_TXOFFSET_5                     47 /* tex5/1 */ +#define R200_EMIT_VTE_CNTL                          48 /* vte/1 */ +#define R200_EMIT_OUTPUT_VTX_COMP_SEL               49 /* vtx/1 */ +#define R200_EMIT_PP_TAM_DEBUG3                     50 /* tam/1 */ +#define R200_EMIT_PP_CNTL_X                         51 /* cst/1 */ +#define R200_EMIT_RB3D_DEPTHXY_OFFSET               52 /* cst/1 */ +#define R200_EMIT_RE_AUX_SCISSOR_CNTL               53 /* cst/1 */ +#define R200_EMIT_RE_SCISSOR_TL_0                   54 /* cst/2 */ +#define R200_EMIT_RE_SCISSOR_TL_1                   55 /* cst/2 */ +#define R200_EMIT_RE_SCISSOR_TL_2                   56 /* cst/2 */ +#define R200_EMIT_SE_VAP_CNTL_STATUS                57 /* cst/1 */ +#define R200_EMIT_SE_VTX_STATE_CNTL                 58 /* cst/1 */ +#define R200_EMIT_RE_POINTSIZE                      59 /* cst/1 */ +#define R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0       60 /* cst/4 */ +#define RADEON_MAX_STATE_PACKETS                    61  /* Commands understood by cmd_buffer ioctl.  More can be added but @@ -101,24 +141,25 @@  #define RADEON_CMD_DMA_DISCARD 4 /* discard current dma buf */  #define RADEON_CMD_PACKET3     5 /* emit hw packet */  #define RADEON_CMD_PACKET3_CLIP 6 /* emit hw packet wrapped in cliprects */ +#define RADEON_CMD_SCALARS2     7 /* r200 stopgap */  typedef union {  	int i;  	struct {  -		char cmd_type, pad0, pad1, pad2; +		unsigned char cmd_type, pad0, pad1, pad2;  	} header;  	struct {  -		char cmd_type, packet_id, pad0, pad1; +		unsigned char cmd_type, packet_id, pad0, pad1;  	} packet;  	struct {  -		char cmd_type, offset, stride, count;  +		unsigned char cmd_type, offset, stride, count;   	} scalars;  	struct {  -		char cmd_type, offset, stride, count;  +		unsigned char cmd_type, offset, stride, count;   	} vectors;  	struct {  -		char cmd_type, buf_idx, pad0, pad1;  +		unsigned char cmd_type, buf_idx, pad0, pad1;   	} dma;  } drm_radeon_cmd_header_t; @@ -327,7 +368,8 @@ typedef struct {  typedef struct drm_radeon_init {  	enum {  		RADEON_INIT_CP    = 0x01, -		RADEON_CLEANUP_CP = 0x02 +		RADEON_CLEANUP_CP = 0x02, +		RADEON_INIT_R200_CP = 0x03,	  	} func;  	unsigned long sarea_priv_offset;  	int is_pci; diff --git a/shared/radeon_drv.h b/shared/radeon_drv.h index 15c0d4dd..7c341b39 100644 --- a/shared/radeon_drv.h +++ b/shared/radeon_drv.h @@ -79,12 +79,25 @@ typedef struct drm_radeon_private {  	int writeback_works;  	int usec_timeout; + +	int is_r200; +  	int is_pci;  	unsigned long phys_pci_gart;  	dma_addr_t bus_pci_gart; -	atomic_t idle_count; - +	struct { +		u32 boxes; +		int freelist_timeouts; +		int freelist_loops; +		int requested_bufs; +		int last_frame_reads; +		int last_clear_reads; +		int clears; +		int texture_uploads; +	} stats; + +	int do_boxes;  	int page_flipping;  	int current_page;  	u32 crtc_offset; @@ -134,14 +147,6 @@ extern drm_buf_t *radeon_freelist_get( drm_device_t *dev );  extern int radeon_wait_ring( drm_radeon_private_t *dev_priv, int n ); -static __inline__ void -radeon_update_ring_snapshot( drm_radeon_ring_buffer_t *ring ) -{ -	ring->space = (GET_RING_HEAD(ring) - ring->tail) * sizeof(u32); -	if ( ring->space <= 0 ) -		ring->space += ring->size; -} -  extern int radeon_do_cp_idle( drm_radeon_private_t *dev_priv );  extern int radeon_do_cleanup_cp( drm_device_t *dev );  extern int radeon_do_cleanup_pageflip( drm_device_t *dev ); @@ -159,6 +164,14 @@ extern int radeon_cp_cmdbuf( DRM_IOCTL_ARGS );  extern int radeon_cp_getparam( DRM_IOCTL_ARGS );  extern int radeon_cp_flip( DRM_IOCTL_ARGS ); +/* Flags for stats.boxes + */ +#define RADEON_BOX_DMA_IDLE      0x1 +#define RADEON_BOX_RING_FULL     0x2 +#define RADEON_BOX_FLIP          0x4 +#define RADEON_BOX_WAIT_IDLE     0x8 +#define RADEON_BOX_TEXTURE_LOAD  0x10 +  /* Register definitions, register access macros and drmAddMap constants @@ -282,6 +295,7 @@ extern int radeon_cp_flip( DRM_IOCTL_ARGS );  #	define RADEON_STENCIL_ENABLE		(1 << 7)  #	define RADEON_Z_ENABLE			(1 << 8)  #define RADEON_RB3D_DEPTHOFFSET		0x1c24 +#define RADEON_RB3D_DEPTHPITCH		0x1c28  #define RADEON_RB3D_PLANEMASK		0x1d84  #define RADEON_RB3D_STENCILREFMASK	0x1d7c  #define RADEON_RB3D_ZCACHE_MODE		0x3250 @@ -513,6 +527,62 @@ extern int radeon_cp_flip( DRM_IOCTL_ARGS );  #define RADEON_TXFORMAT_ARGB8888	6  #define RADEON_TXFORMAT_RGBA8888	7 +#define R200_PP_TXCBLEND_0                0x2f00 +#define R200_PP_TXCBLEND_1                0x2f10 +#define R200_PP_TXCBLEND_2                0x2f20 +#define R200_PP_TXCBLEND_3                0x2f30 +#define R200_PP_TXCBLEND_4                0x2f40 +#define R200_PP_TXCBLEND_5                0x2f50 +#define R200_PP_TXCBLEND_6                0x2f60 +#define R200_PP_TXCBLEND_7                0x2f70 +#define R200_SE_TCL_LIGHT_MODEL_CTL_0     0x2268  +#define R200_PP_TFACTOR_0                 0x2ee0 +#define R200_SE_VTX_FMT_0                 0x2088 +#define R200_SE_VAP_CNTL                  0x2080 +#define R200_SE_TCL_MATRIX_SEL_0          0x2230 +#define R200_SE_TCL_TEX_PROC_CTL_2        0x22a8  +#define R200_SE_TCL_UCP_VERT_BLEND_CTL    0x22c0  +#define R200_PP_TXFILTER_5                0x2ca0  +#define R200_PP_TXFILTER_4                0x2c80  +#define R200_PP_TXFILTER_3                0x2c60  +#define R200_PP_TXFILTER_2                0x2c40  +#define R200_PP_TXFILTER_1                0x2c20  +#define R200_PP_TXFILTER_0                0x2c00  +#define R200_PP_TXOFFSET_5                0x2d78 +#define R200_PP_TXOFFSET_4                0x2d60 +#define R200_PP_TXOFFSET_3                0x2d48 +#define R200_PP_TXOFFSET_2                0x2d30 +#define R200_PP_TXOFFSET_1                0x2d18 +#define R200_PP_TXOFFSET_0                0x2d00 +#define R200_RE_AUX_SCISSOR_CNTL          0x26f0 +#define R200_SE_VTE_CNTL                  0x20b0 +#define R200_SE_TCL_OUTPUT_VTX_COMP_SEL   0x2250 +#define R200_PP_TAM_DEBUG3                0x2d9c +#define R200_PP_CNTL_X                    0x2cc4 +#define R200_SE_VAP_CNTL_STATUS           0x2140 +#define R200_RE_SCISSOR_TL_0              0x1cd8 +#define R200_RE_SCISSOR_TL_1              0x1ce0 +#define R200_RE_SCISSOR_TL_2              0x1ce8 +#define R200_RB3D_DEPTHXY_OFFSET          0x1d60  +#define R200_RE_AUX_SCISSOR_CNTL          0x26f0 +#define R200_SE_VTX_STATE_CNTL            0x2180 +#define R200_RE_POINTSIZE                 0x2648 +#define R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0 0x2254 + + +#define SE_VAP_CNTL__TCL_ENA_MASK                          0x00000001 +#define SE_VAP_CNTL__FORCE_W_TO_ONE_MASK                   0x00010000 +#define SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT                 0x00000012 +#define SE_VTE_CNTL__VTX_XY_FMT_MASK                       0x00000100 +#define SE_VTE_CNTL__VTX_Z_FMT_MASK                        0x00000200 +#define SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK                  0x00000001 +#define SE_VTX_FMT_0__VTX_W0_PRESENT_MASK                  0x00000002 +#define SE_VTX_FMT_0__VTX_COLOR_0_FMT__SHIFT               0x0000000b +#define R200_3D_DRAW_IMMD_2      0xC0003500 +#define R200_SE_VTX_FMT_1                 0x208c +#define R200_RE_CNTL                      0x1c50  + +  /* Constants */  #define RADEON_MAX_USEC_TIMEOUT		100000	/* 100 ms */ @@ -620,30 +690,16 @@ do {									\  	}								\  } while (0) + +/* Perfbox functionality only.   + */  #define RING_SPACE_TEST_WITH_RETURN( dev_priv )				\  do {									\ -	drm_radeon_ring_buffer_t *ring = &dev_priv->ring; int i;	\ -	if ( ring->space < ring->high_mark ) {				\ -		for ( i = 0 ; i < dev_priv->usec_timeout ; i++ ) {	\ -			radeon_update_ring_snapshot( ring );		\ -			if ( ring->space >= ring->high_mark )		\ -				goto __ring_space_done;			\ -			DRM_UDELAY( 1 );				\ -		}							\ -		DRM_ERROR( "ring space check from memory failed, reading register...\n" );	\ -		/* If ring space check fails from RAM, try reading the	\ -		   register directly */					\ -		ring->space = 4 * ( RADEON_READ( RADEON_CP_RB_RPTR ) - ring->tail );	\ -		if ( ring->space <= 0 )					\ -			ring->space += ring->size;			\ -		if ( ring->space >= ring->high_mark )			\ -			goto __ring_space_done;				\ -									\ -		DRM_ERROR( "ring space check failed!\n" );		\ -		return DRM_ERR(EBUSY);				\ +	if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE)) {		\ +		u32 head = GET_RING_HEAD(&dev_priv->ring);		\ +		if (head == dev_priv->ring.tail)			\ +			dev_priv->stats.boxes |= RADEON_BOX_DMA_IDLE;	\  	}								\ - __ring_space_done:							\ -	;								\  } while (0)  #define VB_AGE_TEST_WITH_RETURN( dev_priv )				\ @@ -710,16 +766,15 @@ do {									\  	}								\  	if (((dev_priv->ring.tail + _nr) & mask) != write) {		\  		DRM_ERROR( 						\ -			"ADVANCE_RING(): mismatch: nr: %x write: %x\n",	\ +			"ADVANCE_RING(): mismatch: nr: %x write: %x line: %d\n",	\  			((dev_priv->ring.tail + _nr) & mask),		\ -			write);						\ +			write, __LINE__);						\  	} else								\  		dev_priv->ring.tail = write;				\  } while (0)  #define COMMIT_RING() do {					    \ -	radeon_flush_write_combine();					\ -	RADEON_WRITE( RADEON_CP_RB_WPTR, dev_priv->ring.tail );		\ +	RADEON_WRITE( RADEON_CP_RB_WPTR, dev_priv->ring.tail );		    \  } while (0)  #define OUT_RING( x ) do {						\ @@ -760,6 +815,4 @@ do {									\  } while (0) -#define RADEON_PERFORMANCE_BOXES	0 -  #endif /* __RADEON_DRV_H__ */ diff --git a/shared/radeon_state.c b/shared/radeon_state.c index 1cc6bde8..7f84e739 100644 --- a/shared/radeon_state.c +++ b/shared/radeon_state.c @@ -239,18 +239,50 @@ static struct {  	{ RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" },  	{ RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" },  	{ RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" }, +	{ R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0" }, +	{ R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1" }, +	{ R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2" }, +	{ R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3" }, +	{ R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4" }, +	{ R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5" }, +	{ R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6" }, +	{ R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7" }, +	{ R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0" }, +	{ R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0" }, +	{ R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0" }, +	{ R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL" }, +	{ R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0" }, +	{ R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2" }, +	{ R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL" }, +	{ R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0" }, +	{ R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1" }, +	{ R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2" }, +	{ R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3" }, +	{ R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4" }, +	{ R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5" }, +	{ R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0" }, +	{ R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1" }, +	{ R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2" }, +	{ R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3" }, +	{ R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4" }, +	{ R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5" }, +	{ R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL" }, +	{ R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL" }, +	{ R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3" }, +	{ R200_PP_CNTL_X, 1, "R200_PP_CNTL_X" },  +	{ R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET" },  +	{ R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL" },  +	{ R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0" },  +	{ R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1" },  +	{ R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2" },  +	{ R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS" },  +	{ R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL" },  +	{ R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE" },  +	{ R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0" },  }; - - - - - - - -#if RADEON_PERFORMANCE_BOXES  /* ================================================================   * Performance monitoring functions   */ @@ -259,10 +291,12 @@ static void radeon_clear_box( drm_radeon_private_t *dev_priv,  			      int x, int y, int w, int h,  			      int r, int g, int b )  { -	u32 pitch, offset;  	u32 color;  	RING_LOCALS; +	x += dev_priv->sarea_priv->boxes[0].x1; +	y += dev_priv->sarea_priv->boxes[0].y1; +  	switch ( dev_priv->color_fmt ) {  	case RADEON_COLOR_FORMAT_RGB565:  		color = (((r & 0xf8) << 8) | @@ -275,8 +309,11 @@ static void radeon_clear_box( drm_radeon_private_t *dev_priv,  		break;  	} -	offset = dev_priv->back_offset; -	pitch = dev_priv->back_pitch >> 3; +	BEGIN_RING( 4 ); +	RADEON_WAIT_UNTIL_3D_IDLE();		 +	OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) ); +	OUT_RING( 0xffffffff ); +	ADVANCE_RING();  	BEGIN_RING( 6 ); @@ -288,7 +325,12 @@ static void radeon_clear_box( drm_radeon_private_t *dev_priv,  		  RADEON_ROP3_P |  		  RADEON_GMC_CLR_CMP_CNTL_DIS ); -	OUT_RING( (pitch << 22) | (offset >> 5) ); + 	if ( dev_priv->page_flipping && dev_priv->current_page == 1 ) {  +		OUT_RING( dev_priv->front_pitch_offset ); + 	} else {	  +		OUT_RING( dev_priv->back_pitch_offset ); + 	}  +  	OUT_RING( color );  	OUT_RING( (x << 16) | y ); @@ -299,16 +341,57 @@ static void radeon_clear_box( drm_radeon_private_t *dev_priv,  static void radeon_cp_performance_boxes( drm_radeon_private_t *dev_priv )  { -	if ( atomic_read( &dev_priv->idle_count ) == 0 ) { -		radeon_clear_box( dev_priv, 64, 4, 8, 8, 0, 255, 0 ); -	} else { -		atomic_set( &dev_priv->idle_count, 0 ); +	/* Collapse various things into a wait flag -- trying to +	 * guess if userspase slept -- better just to have them tell us. +	 */ +	if (dev_priv->stats.last_frame_reads > 1 || +	    dev_priv->stats.last_clear_reads > dev_priv->stats.clears) { +		dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;  	} -} -#endif +	if (dev_priv->stats.freelist_loops) { +		dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; +	} +	/* Purple box for page flipping +	 */ +	if ( dev_priv->stats.boxes & RADEON_BOX_FLIP )  +		radeon_clear_box( dev_priv, 4, 4, 8, 8, 255, 0, 255 ); + +	/* Red box if we have to wait for idle at any point +	 */ +	if ( dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE )  +		radeon_clear_box( dev_priv, 16, 4, 8, 8, 255, 0, 0 ); + +	/* Blue box: lost context? +	 */ +	/* Yellow box for texture swaps +	 */ +	if ( dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD )  +		radeon_clear_box( dev_priv, 40, 4, 8, 8, 255, 255, 0 ); + +	/* Green box if hardware never idles (as far as we can tell) +	 */ +	if ( !(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE) )  +		radeon_clear_box( dev_priv, 64, 4, 8, 8, 0, 255, 0 ); + + +	/* Draw bars indicating number of buffers allocated  +	 * (not a great measure, easily confused) +	 */ +	if (dev_priv->stats.requested_bufs) { +		if (dev_priv->stats.requested_bufs > 100) +			dev_priv->stats.requested_bufs = 100; + +		radeon_clear_box( dev_priv, 4, 16,   +				  dev_priv->stats.requested_bufs, 4, +				  196, 128, 128 ); +	} + +	memset( &dev_priv->stats, 0, sizeof(dev_priv->stats) ); + +}  /* ================================================================   * CP command dispatch functions   */ @@ -328,6 +411,8 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,  	RING_LOCALS;  	DRM_DEBUG( "flags = 0x%x\n", flags ); +	dev_priv->stats.clears++; +  	if ( dev_priv->page_flipping && dev_priv->current_page == 1 ) {  		unsigned int tmp = flags; @@ -336,120 +421,251 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,  		if ( tmp & RADEON_BACK )  flags |= RADEON_FRONT;  	} +	if ( flags & (RADEON_FRONT | RADEON_BACK) ) { + +		BEGIN_RING( 4 ); + +		/* Ensure the 3D stream is idle before doing a +		 * 2D fill to clear the front or back buffer. +		 */ +		RADEON_WAIT_UNTIL_3D_IDLE(); +		 +		OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) ); +		OUT_RING( clear->color_mask ); + +		ADVANCE_RING(); + +		/* Make sure we restore the 3D state next time. +		 */ +		dev_priv->sarea_priv->ctx_owner = 0; + +		for ( i = 0 ; i < nbox ; i++ ) { +			int x = pbox[i].x1; +			int y = pbox[i].y1; +			int w = pbox[i].x2 - x; +			int h = pbox[i].y2 - y; + +			DRM_DEBUG( "dispatch clear %d,%d-%d,%d flags 0x%x\n", +				   x, y, w, h, flags ); + +			if ( flags & RADEON_FRONT ) { +				BEGIN_RING( 6 ); +				 +				OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) ); +				OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL | +					  RADEON_GMC_BRUSH_SOLID_COLOR | +					  (dev_priv->color_fmt << 8) | +					  RADEON_GMC_SRC_DATATYPE_COLOR | +					  RADEON_ROP3_P | +					  RADEON_GMC_CLR_CMP_CNTL_DIS ); + +				OUT_RING( dev_priv->front_pitch_offset ); +				OUT_RING( clear->clear_color ); +				 +				OUT_RING( (x << 16) | y ); +				OUT_RING( (w << 16) | h ); +				 +				ADVANCE_RING(); +			} +			 +			if ( flags & RADEON_BACK ) { +				BEGIN_RING( 6 ); +				 +				OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) ); +				OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL | +					  RADEON_GMC_BRUSH_SOLID_COLOR | +					  (dev_priv->color_fmt << 8) | +					  RADEON_GMC_SRC_DATATYPE_COLOR | +					  RADEON_ROP3_P | +					  RADEON_GMC_CLR_CMP_CNTL_DIS ); +				 +				OUT_RING( dev_priv->back_pitch_offset ); +				OUT_RING( clear->clear_color ); + +				OUT_RING( (x << 16) | y ); +				OUT_RING( (w << 16) | h ); + +				ADVANCE_RING(); +			} +		} +	} +  	/* We have to clear the depth and/or stencil buffers by  	 * rendering a quad into just those buffers.  Thus, we have to  	 * make sure the 3D engine is configured correctly.  	 */ -	if ( flags & (RADEON_DEPTH | RADEON_STENCIL) ) { -		rb3d_cntl = depth_clear->rb3d_cntl; +	if ( dev_priv->is_r200 && +	     (flags & (RADEON_DEPTH | RADEON_STENCIL)) ) { -		if ( flags & RADEON_DEPTH ) { -			rb3d_cntl |=  RADEON_Z_ENABLE; -		} else { -			rb3d_cntl &= ~RADEON_Z_ENABLE; -		} +		int tempPP_CNTL; +		int tempRE_CNTL; +		int tempRB3D_CNTL; +		int tempRB3D_ZSTENCILCNTL; +		int tempRB3D_STENCILREFMASK; +		int tempRB3D_PLANEMASK; +		int tempSE_CNTL; +		int tempSE_VTE_CNTL; +		int tempSE_VTX_FMT_0; +		int tempSE_VTX_FMT_1; +		int tempSE_VAP_CNTL; +		int tempRE_AUX_SCISSOR_CNTL; -		if ( flags & RADEON_STENCIL ) { -			rb3d_cntl |=  RADEON_STENCIL_ENABLE; -			rb3d_stencilrefmask = clear->depth_mask; /* misnamed field */ -		} else { -			rb3d_cntl &= ~RADEON_STENCIL_ENABLE; -			rb3d_stencilrefmask = 0x00000000; -		} -	} +		tempPP_CNTL = 0; +		tempRE_CNTL = 0; -	for ( i = 0 ; i < nbox ; i++ ) { -		int x = pbox[i].x1; -		int y = pbox[i].y1; -		int w = pbox[i].x2 - x; -		int h = pbox[i].y2 - y; +		tempRB3D_CNTL = depth_clear->rb3d_cntl; +		tempRB3D_CNTL &= ~(1<<15); /* unset radeon magic flag */ -		DRM_DEBUG( "dispatch clear %d,%d-%d,%d flags 0x%x\n", -			   x, y, w, h, flags ); +		tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl; +		tempRB3D_STENCILREFMASK = 0x0; -		if ( flags & (RADEON_FRONT | RADEON_BACK) ) { -			BEGIN_RING( 4 ); +		tempSE_CNTL = depth_clear->se_cntl; -			/* Ensure the 3D stream is idle before doing a -			 * 2D fill to clear the front or back buffer. -			 */ -			RADEON_WAIT_UNTIL_3D_IDLE(); -			OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) ); -			OUT_RING( clear->color_mask ); -			ADVANCE_RING(); +		/* Disable TCL */ -			/* Make sure we restore the 3D state next time. -			 */ -			dev_priv->sarea_priv->ctx_owner = 0; -		} +		tempSE_VAP_CNTL = (/* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK |  */ +				   (0x9 << SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT)); -		if ( flags & RADEON_FRONT ) { -			BEGIN_RING( 6 ); +		tempRB3D_PLANEMASK = 0x0; -			OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) ); -			OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL | -				  RADEON_GMC_BRUSH_SOLID_COLOR | -				  (dev_priv->color_fmt << 8) | -				  RADEON_GMC_SRC_DATATYPE_COLOR | -				  RADEON_ROP3_P | -				  RADEON_GMC_CLR_CMP_CNTL_DIS ); +		tempRE_AUX_SCISSOR_CNTL = 0x0; -			OUT_RING( dev_priv->front_pitch_offset ); -			OUT_RING( clear->clear_color ); +		tempSE_VTE_CNTL = +			SE_VTE_CNTL__VTX_XY_FMT_MASK | +			SE_VTE_CNTL__VTX_Z_FMT_MASK; -			OUT_RING( (x << 16) | y ); -			OUT_RING( (w << 16) | h ); +		/* Vertex format (X, Y, Z, W)*/ +		tempSE_VTX_FMT_0 = +			SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK | +			SE_VTX_FMT_0__VTX_W0_PRESENT_MASK; +		tempSE_VTX_FMT_1 = 0x0; -			ADVANCE_RING(); + +		/*  +		 * Depth buffer specific enables  +		 */ +		if (flags & RADEON_DEPTH) { +			/* Enable depth buffer */ +			tempRB3D_CNTL |= RADEON_Z_ENABLE; +		} else { +			/* Disable depth buffer */ +			tempRB3D_CNTL &= ~RADEON_Z_ENABLE;  		} -		if ( flags & RADEON_BACK ) { -			BEGIN_RING( 6 ); +		/*  +		 * Stencil buffer specific enables +		 */ +		if ( flags & RADEON_STENCIL ) { +			tempRB3D_CNTL |=  RADEON_STENCIL_ENABLE; +			tempRB3D_STENCILREFMASK = clear->depth_mask;  +		} else { +			tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE; +			tempRB3D_STENCILREFMASK = 0x00000000; +		} -			OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) ); -			OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL | -				  RADEON_GMC_BRUSH_SOLID_COLOR | -				  (dev_priv->color_fmt << 8) | -				  RADEON_GMC_SRC_DATATYPE_COLOR | -				  RADEON_ROP3_P | -				  RADEON_GMC_CLR_CMP_CNTL_DIS ); +		BEGIN_RING( 26 ); +		RADEON_WAIT_UNTIL_2D_IDLE(); + +		OUT_RING_REG( RADEON_PP_CNTL, tempPP_CNTL ); +		OUT_RING_REG( R200_RE_CNTL, tempRE_CNTL ); +		OUT_RING_REG( RADEON_RB3D_CNTL, tempRB3D_CNTL ); +		OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL, +			      tempRB3D_ZSTENCILCNTL ); +		OUT_RING_REG( RADEON_RB3D_STENCILREFMASK,  +			      tempRB3D_STENCILREFMASK ); +		OUT_RING_REG( RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK ); +		OUT_RING_REG( RADEON_SE_CNTL, tempSE_CNTL ); +		OUT_RING_REG( R200_SE_VTE_CNTL, tempSE_VTE_CNTL ); +		OUT_RING_REG( R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0 ); +		OUT_RING_REG( R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1 ); +		OUT_RING_REG( R200_SE_VAP_CNTL, tempSE_VAP_CNTL ); +		OUT_RING_REG( R200_RE_AUX_SCISSOR_CNTL,  +			      tempRE_AUX_SCISSOR_CNTL ); +		ADVANCE_RING(); -			OUT_RING( dev_priv->back_pitch_offset ); -			OUT_RING( clear->clear_color ); +		/* Make sure we restore the 3D state next time. +		 */ +		dev_priv->sarea_priv->ctx_owner = 0; -			OUT_RING( (x << 16) | y ); -			OUT_RING( (w << 16) | h ); +		for ( i = 0 ; i < nbox ; i++ ) { +			 +			/* Funny that this should be required --  +			 *  sets top-left? +			 */ +			radeon_emit_clip_rect( dev_priv, +					       &sarea_priv->boxes[i] ); +			BEGIN_RING( 14 ); +			OUT_RING( CP_PACKET3( R200_3D_DRAW_IMMD_2, 12 ) ); +			OUT_RING( (RADEON_PRIM_TYPE_RECT_LIST | +				   RADEON_PRIM_WALK_RING | +				   (3 << RADEON_NUM_VERTICES_SHIFT)) ); +			OUT_RING( depth_boxes[i].ui[CLEAR_X1] ); +			OUT_RING( depth_boxes[i].ui[CLEAR_Y1] ); +			OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] ); +			OUT_RING( 0x3f800000 ); +			OUT_RING( depth_boxes[i].ui[CLEAR_X1] ); +			OUT_RING( depth_boxes[i].ui[CLEAR_Y2] ); +			OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] ); +			OUT_RING( 0x3f800000 ); +			OUT_RING( depth_boxes[i].ui[CLEAR_X2] ); +			OUT_RING( depth_boxes[i].ui[CLEAR_Y2] ); +			OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] ); +			OUT_RING( 0x3f800000 );  			ADVANCE_RING();  		} +	}  +	else if ( (flags & (RADEON_DEPTH | RADEON_STENCIL)) ) { -		if ( flags & (RADEON_DEPTH | RADEON_STENCIL) ) { +		rb3d_cntl = depth_clear->rb3d_cntl; -			radeon_emit_clip_rect( dev_priv, -					       &sarea_priv->boxes[i] ); +		if ( flags & RADEON_DEPTH ) { +			rb3d_cntl |=  RADEON_Z_ENABLE; +		} else { +			rb3d_cntl &= ~RADEON_Z_ENABLE; +		} -			BEGIN_RING( 28 ); +		if ( flags & RADEON_STENCIL ) { +			rb3d_cntl |=  RADEON_STENCIL_ENABLE; +			rb3d_stencilrefmask = clear->depth_mask; /* misnamed field */ +		} else { +			rb3d_cntl &= ~RADEON_STENCIL_ENABLE; +			rb3d_stencilrefmask = 0x00000000; +		} -			RADEON_WAIT_UNTIL_2D_IDLE(); +		BEGIN_RING( 13 ); +		RADEON_WAIT_UNTIL_2D_IDLE(); -			OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 1 ) ); -			OUT_RING( 0x00000000 ); -			OUT_RING( rb3d_cntl ); +		OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 1 ) ); +		OUT_RING( 0x00000000 ); +		OUT_RING( rb3d_cntl ); +		 +		OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL, +			      depth_clear->rb3d_zstencilcntl ); +		OUT_RING_REG( RADEON_RB3D_STENCILREFMASK, +			      rb3d_stencilrefmask ); +		OUT_RING_REG( RADEON_RB3D_PLANEMASK, +			      0x00000000 ); +		OUT_RING_REG( RADEON_SE_CNTL, +			      depth_clear->se_cntl ); +		ADVANCE_RING(); -			OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL, -				      depth_clear->rb3d_zstencilcntl ); -			OUT_RING_REG( RADEON_RB3D_STENCILREFMASK, -				      rb3d_stencilrefmask ); -			OUT_RING_REG( RADEON_RB3D_PLANEMASK, -				      0x00000000 ); -			OUT_RING_REG( RADEON_SE_CNTL, -				      depth_clear->se_cntl ); +		/* Make sure we restore the 3D state next time. +		 */ +		dev_priv->sarea_priv->ctx_owner = 0; -			/* Radeon 7500 doesn't like vertices without -			 * color. +		for ( i = 0 ; i < nbox ; i++ ) { +			 +			/* Funny that this should be required --  +			 *  sets top-left?  			 */ +			radeon_emit_clip_rect( dev_priv, +					       &sarea_priv->boxes[i] ); + +			BEGIN_RING( 15 ); +  			OUT_RING( CP_PACKET3( RADEON_3D_DRAW_IMMD, 13 ) );  			OUT_RING( RADEON_VTX_Z_PRESENT |  				  RADEON_VTX_PKCOLOR_PRESENT); @@ -459,6 +675,7 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,  				   RADEON_VTX_FMT_RADEON_MODE |  				   (3 << RADEON_NUM_VERTICES_SHIFT)) ); +  			OUT_RING( depth_boxes[i].ui[CLEAR_X1] );  			OUT_RING( depth_boxes[i].ui[CLEAR_Y1] );  			OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] ); @@ -475,10 +692,6 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,  			OUT_RING( 0x0 );  			ADVANCE_RING(); - -			/* Make sure we restore the 3D state next time. -			 */ -			dev_priv->sarea_priv->ctx_owner = 0;  		}  	} @@ -506,11 +719,12 @@ static void radeon_cp_dispatch_swap( drm_device_t *dev )  	RING_LOCALS;  	DRM_DEBUG( "\n" ); -#if RADEON_PERFORMANCE_BOXES +  	/* Do some trivial performance monitoring...  	 */ -	radeon_cp_performance_boxes( dev_priv ); -#endif +	if (dev_priv->do_boxes) +		radeon_cp_performance_boxes( dev_priv ); +  	/* Wait for the 3D stream to idle before dispatching the bitblt.  	 * This will prevent data corruption between the two streams. @@ -579,20 +793,21 @@ static void radeon_cp_dispatch_flip( drm_device_t *dev )  {  	drm_radeon_private_t *dev_priv = dev->dev_private;  	RING_LOCALS; -	DRM_DEBUG( "page=%d\n", dev_priv->current_page ); +	DRM_DEBUG( "%s: page=%d pfCurrentPage=%d\n",  +		__FUNCTION__,  +		dev_priv->current_page, +		dev_priv->sarea_priv->pfCurrentPage); -#if RADEON_PERFORMANCE_BOXES  	/* Do some trivial performance monitoring...  	 */ -	radeon_cp_performance_boxes( dev_priv ); -#endif +	if (dev_priv->do_boxes) { +		dev_priv->stats.boxes |= RADEON_BOX_FLIP; +		radeon_cp_performance_boxes( dev_priv ); +	}  	BEGIN_RING( 4 );  	RADEON_WAIT_UNTIL_3D_IDLE(); -/* -	RADEON_WAIT_UNTIL_PAGE_FLIPPED(); -*/  	OUT_RING( CP_PACKET0( RADEON_CRTC_OFFSET, 0 ) );  	if ( dev_priv->current_page == 0 ) { @@ -847,6 +1062,8 @@ static int radeon_cp_dispatch_texture( drm_device_t *dev,  	int ret = 0, i;  	RING_LOCALS; +	dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD; +  	/* FIXME: Be smarter about this...  	 */  	buf = radeon_freelist_get( dev ); @@ -1611,6 +1828,30 @@ static __inline__ int radeon_emit_scalars(  	return 0;  } +/* God this is ugly + */ +static __inline__ int radeon_emit_scalars2(  +	drm_radeon_private_t *dev_priv, +	drm_radeon_cmd_header_t header, +	drm_radeon_cmd_buffer_t *cmdbuf ) +{ +	int sz = header.scalars.count; +	int *data = (int *)cmdbuf->buf; +	int start = ((unsigned int)header.scalars.offset) + 0x100; +	int stride = header.scalars.stride; +	RING_LOCALS; + +	BEGIN_RING( 3+sz ); +	OUT_RING( CP_PACKET0( RADEON_SE_TCL_SCALAR_INDX_REG, 0 ) ); +	OUT_RING( start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); +	OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_SCALAR_DATA_REG, sz-1 ) ); +	OUT_RING_USER_TABLE( data, sz ); +	ADVANCE_RING(); +	cmdbuf->buf += sz * sizeof(int); +	cmdbuf->bufsz -= sz * sizeof(int); +	return 0; +} +  static __inline__ int radeon_emit_vectors(   	drm_radeon_private_t *dev_priv,  	drm_radeon_cmd_header_t header, @@ -1775,6 +2016,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )  		switch (header.header.cmd_type) {  		case RADEON_CMD_PACKET:  +			DRM_DEBUG("RADEON_CMD_PACKET\n");  			if (radeon_emit_packets( dev_priv, header, &cmdbuf )) {  				DRM_ERROR("radeon_emit_packets failed\n");  				return DRM_ERR(EINVAL); @@ -1782,6 +2024,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )  			break;  		case RADEON_CMD_SCALARS: +			DRM_DEBUG("RADEON_CMD_SCALARS\n");  			if (radeon_emit_scalars( dev_priv, header, &cmdbuf )) {  				DRM_ERROR("radeon_emit_scalars failed\n");  				return DRM_ERR(EINVAL); @@ -1789,6 +2032,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )  			break;  		case RADEON_CMD_VECTORS: +			DRM_DEBUG("RADEON_CMD_VECTORS\n");  			if (radeon_emit_vectors( dev_priv, header, &cmdbuf )) {  				DRM_ERROR("radeon_emit_vectors failed\n");  				return DRM_ERR(EINVAL); @@ -1796,6 +2040,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )  			break;  		case RADEON_CMD_DMA_DISCARD: +			DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");  			idx = header.dma.buf_idx;  			if ( idx < 0 || idx >= dma->buf_count ) {  				DRM_ERROR( "buffer index %d (of %d max)\n", @@ -1813,6 +2058,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )  			break;  		case RADEON_CMD_PACKET3: +			DRM_DEBUG("RADEON_CMD_PACKET3\n");  			if (radeon_emit_packet3( dev, &cmdbuf )) {  				DRM_ERROR("radeon_emit_packet3 failed\n");  				return DRM_ERR(EINVAL); @@ -1820,12 +2066,20 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )  			break;  		case RADEON_CMD_PACKET3_CLIP: +			DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");  			if (radeon_emit_packet3_cliprect( dev, &cmdbuf, orig_nbox )) {  				DRM_ERROR("radeon_emit_packet3_clip failed\n");  				return DRM_ERR(EINVAL);  			}  			break; +		case RADEON_CMD_SCALARS2: +			DRM_DEBUG("RADEON_CMD_SCALARS2\n"); +			if (radeon_emit_scalars2( dev_priv, header, &cmdbuf )) { +				DRM_ERROR("radeon_emit_scalars2 failed\n"); +				return DRM_ERR(EINVAL); +			} +			break;  		default:  			DRM_ERROR("bad cmd_type %d at %p\n",   				  header.header.cmd_type, @@ -1835,6 +2089,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )  	} +	DRM_DEBUG("DONE\n");  	COMMIT_RING();  	return 0;  } @@ -1863,12 +2118,14 @@ int radeon_cp_getparam( DRM_IOCTL_ARGS )  		value = dev_priv->agp_buffers_offset;  		break;  	case RADEON_PARAM_LAST_FRAME: +		dev_priv->stats.last_frame_reads++;  		value = GET_SCRATCH( 0 );  		break;  	case RADEON_PARAM_LAST_DISPATCH:  		value = GET_SCRATCH( 1 );  		break;  	case RADEON_PARAM_LAST_CLEAR: +		dev_priv->stats.last_clear_reads++;  		value = GET_SCRATCH( 2 );  		break;  	default: | 
