From 48cc350e21acd2b4b03c76937e2861af5271435a Mon Sep 17 00:00:00 2001 From: Keith Whitwell Date: Mon, 26 Aug 2002 22:16:18 +0000 Subject: merged r200-0-1-branch --- bsd-core/Makefile | 2 +- bsd-core/drm_drv.c | 2 +- bsd-core/radeon_drv.c | 1 + bsd/Makefile | 2 +- bsd/Makefile.bsd | 2 +- bsd/drm_drv.h | 2 +- bsd/radeon_drv.c | 1 + shared-core/radeon_cp.c | 363 ++++++++++++++++++++++++++++++++-- shared-core/radeon_drm.h | 56 +++++- shared-core/radeon_drv.h | 127 ++++++++---- shared-core/radeon_state.c | 481 ++++++++++++++++++++++++++++++++++----------- shared/radeon.h | 20 +- shared/radeon_cp.c | 363 ++++++++++++++++++++++++++++++++-- shared/radeon_drm.h | 56 +++++- shared/radeon_drv.h | 127 ++++++++---- shared/radeon_state.c | 481 ++++++++++++++++++++++++++++++++++----------- 16 files changed, 1733 insertions(+), 353 deletions(-) diff --git a/bsd-core/Makefile b/bsd-core/Makefile index 9c87d963..f26fd53d 100644 --- a/bsd-core/Makefile +++ b/bsd-core/Makefile @@ -1,6 +1,6 @@ # $FreeBSD$ # i810, i830 & sis are not complete -SUBDIR = tdfx mga r128 radeon gamma # i810 sis i830 +SUBDIR = radeon # r128 i810 sis i830 tdfx mga gamma .include diff --git a/bsd-core/drm_drv.c b/bsd-core/drm_drv.c index fb0454dd..81ca644a 100644 --- a/bsd-core/drm_drv.c +++ b/bsd-core/drm_drv.c @@ -1207,7 +1207,7 @@ int DRM(unlock)( DRM_IOCTL_ARGS ) DRM(dma_schedule)( dev, 1 ); #endif - /* FIXME: Do we ever really need to check this??? + /* FIXME: Do we ever really need to check this? */ if ( 1 /* !dev->context_flag */ ) { if ( DRM(lock_free)( dev, &dev->lock.hw_lock->lock, diff --git a/bsd-core/radeon_drv.c b/bsd-core/radeon_drv.c index fe69fbb8..d02ab959 100644 --- a/bsd-core/radeon_drv.c +++ b/bsd-core/radeon_drv.c @@ -52,6 +52,7 @@ drm_chipinfo_t DRM(devicelist)[] = { {0x1002, 0x5157, 1, "ATI Radeon QW 7500 (AGP)"}, {0x1002, 0x5159, 1, "ATI Radeon QY VE (AGP)"}, {0x1002, 0x515A, 1, "ATI Radeon QZ VE (AGP)"}, + {0x1002, 0x514C, 1, "ATI Radeon QL 8500 (AGP)"}, {0, 0, 0, NULL} }; diff --git a/bsd/Makefile b/bsd/Makefile index 9c87d963..f26fd53d 100644 --- a/bsd/Makefile +++ b/bsd/Makefile @@ -1,6 +1,6 @@ # $FreeBSD$ # i810, i830 & sis are not complete -SUBDIR = tdfx mga r128 radeon gamma # i810 sis i830 +SUBDIR = radeon # r128 i810 sis i830 tdfx mga gamma .include diff --git a/bsd/Makefile.bsd b/bsd/Makefile.bsd index 9c87d963..f26fd53d 100644 --- a/bsd/Makefile.bsd +++ b/bsd/Makefile.bsd @@ -1,6 +1,6 @@ # $FreeBSD$ # i810, i830 & sis are not complete -SUBDIR = tdfx mga r128 radeon gamma # i810 sis i830 +SUBDIR = radeon # r128 i810 sis i830 tdfx mga gamma .include diff --git a/bsd/drm_drv.h b/bsd/drm_drv.h index fb0454dd..81ca644a 100644 --- a/bsd/drm_drv.h +++ b/bsd/drm_drv.h @@ -1207,7 +1207,7 @@ int DRM(unlock)( DRM_IOCTL_ARGS ) DRM(dma_schedule)( dev, 1 ); #endif - /* FIXME: Do we ever really need to check this??? + /* FIXME: Do we ever really need to check this? */ if ( 1 /* !dev->context_flag */ ) { if ( DRM(lock_free)( dev, &dev->lock.hw_lock->lock, diff --git a/bsd/radeon_drv.c b/bsd/radeon_drv.c index fe69fbb8..d02ab959 100644 --- a/bsd/radeon_drv.c +++ b/bsd/radeon_drv.c @@ -52,6 +52,7 @@ drm_chipinfo_t DRM(devicelist)[] = { {0x1002, 0x5157, 1, "ATI Radeon QW 7500 (AGP)"}, {0x1002, 0x5159, 1, "ATI Radeon QY VE (AGP)"}, {0x1002, 0x515A, 1, "ATI Radeon QZ VE (AGP)"}, + {0x1002, 0x514C, 1, "ATI Radeon QL 8500 (AGP)"}, {0, 0, 0, NULL} }; diff --git a/shared-core/radeon_cp.c b/shared-core/radeon_cp.c index 8250c09b..01069e49 100644 --- a/shared-core/radeon_cp.c +++ b/shared-core/radeon_cp.c @@ -44,6 +44,266 @@ /* CP microcode (from ATI) */ +static u32 R200_cp_microcode[][2] = { + { 0x21007000, 0000000000 }, + { 0x20007000, 0000000000 }, + { 0x000000ab, 0x00000004 }, + { 0x000000af, 0x00000004 }, + { 0x66544a49, 0000000000 }, + { 0x49494174, 0000000000 }, + { 0x54517d83, 0000000000 }, + { 0x498d8b64, 0000000000 }, + { 0x49494949, 0000000000 }, + { 0x49da493c, 0000000000 }, + { 0x49989898, 0000000000 }, + { 0xd34949d5, 0000000000 }, + { 0x9dc90e11, 0000000000 }, + { 0xce9b9b9b, 0000000000 }, + { 0x000f0000, 0x00000016 }, + { 0x352e232c, 0000000000 }, + { 0x00000013, 0x00000004 }, + { 0x000f0000, 0x00000016 }, + { 0x352e272c, 0000000000 }, + { 0x000f0001, 0x00000016 }, + { 0x3239362f, 0000000000 }, + { 0x000077ef, 0x00000002 }, + { 0x00061000, 0x00000002 }, + { 0x00000020, 0x0000001a }, + { 0x00004000, 0x0000001e }, + { 0x00061000, 0x00000002 }, + { 0x00000020, 0x0000001a }, + { 0x00004000, 0x0000001e }, + { 0x00061000, 0x00000002 }, + { 0x00000020, 0x0000001a }, + { 0x00004000, 0x0000001e }, + { 0x00000016, 0x00000004 }, + { 0x0003802a, 0x00000002 }, + { 0x040067e0, 0x00000002 }, + { 0x00000016, 0x00000004 }, + { 0x000077e0, 0x00000002 }, + { 0x00065000, 0x00000002 }, + { 0x000037e1, 0x00000002 }, + { 0x040067e1, 0x00000006 }, + { 0x000077e0, 0x00000002 }, + { 0x000077e1, 0x00000002 }, + { 0x000077e1, 0x00000006 }, + { 0xffffffff, 0000000000 }, + { 0x10000000, 0000000000 }, + { 0x0003802a, 0x00000002 }, + { 0x040067e0, 0x00000006 }, + { 0x00007675, 0x00000002 }, + { 0x00007676, 0x00000002 }, + { 0x00007677, 0x00000002 }, + { 0x00007678, 0x00000006 }, + { 0x0003802b, 0x00000002 }, + { 0x04002676, 0x00000002 }, + { 0x00007677, 0x00000002 }, + { 0x00007678, 0x00000006 }, + { 0x0000002e, 0x00000018 }, + { 0x0000002e, 0x00000018 }, + { 0000000000, 0x00000006 }, + { 0x0000002f, 0x00000018 }, + { 0x0000002f, 0x00000018 }, + { 0000000000, 0x00000006 }, + { 0x01605000, 0x00000002 }, + { 0x00065000, 0x00000002 }, + { 0x00098000, 0x00000002 }, + { 0x00061000, 0x00000002 }, + { 0x64c0603d, 0x00000004 }, + { 0x00080000, 0x00000016 }, + { 0000000000, 0000000000 }, + { 0x0400251d, 0x00000002 }, + { 0x00007580, 0x00000002 }, + { 0x00067581, 0x00000002 }, + { 0x04002580, 0x00000002 }, + { 0x00067581, 0x00000002 }, + { 0x00000046, 0x00000004 }, + { 0x00005000, 0000000000 }, + { 0x00061000, 0x00000002 }, + { 0x0000750e, 0x00000002 }, + { 0x00019000, 0x00000002 }, + { 0x00011055, 0x00000014 }, + { 0x00000055, 0x00000012 }, + { 0x0400250f, 0x00000002 }, + { 0x0000504a, 0x00000004 }, + { 0x00007565, 0x00000002 }, + { 0x00007566, 0x00000002 }, + { 0x00000051, 0x00000004 }, + { 0x01e655b4, 0x00000002 }, + { 0x4401b0dc, 0x00000002 }, + { 0x01c110dc, 0x00000002 }, + { 0x2666705d, 0x00000018 }, + { 0x040c2565, 0x00000002 }, + { 0x0000005d, 0x00000018 }, + { 0x04002564, 0x00000002 }, + { 0x00007566, 0x00000002 }, + { 0x00000054, 0x00000004 }, + { 0x00401060, 0x00000008 }, + { 0x00101000, 0x00000002 }, + { 0x000d80ff, 0x00000002 }, + { 0x00800063, 0x00000008 }, + { 0x000f9000, 0x00000002 }, + { 0x000e00ff, 0x00000002 }, + { 0000000000, 0x00000006 }, + { 0x00000080, 0x00000018 }, + { 0x00000054, 0x00000004 }, + { 0x00007576, 0x00000002 }, + { 0x00065000, 0x00000002 }, + { 0x00009000, 0x00000002 }, + { 0x00041000, 0x00000002 }, + { 0x0c00350e, 0x00000002 }, + { 0x00049000, 0x00000002 }, + { 0x00051000, 0x00000002 }, + { 0x01e785f8, 0x00000002 }, + { 0x00200000, 0x00000002 }, + { 0x00600073, 0x0000000c }, + { 0x00007563, 0x00000002 }, + { 0x006075f0, 0x00000021 }, + { 0x20007068, 0x00000004 }, + { 0x00005068, 0x00000004 }, + { 0x00007576, 0x00000002 }, + { 0x00007577, 0x00000002 }, + { 0x0000750e, 0x00000002 }, + { 0x0000750f, 0x00000002 }, + { 0x00a05000, 0x00000002 }, + { 0x00600076, 0x0000000c }, + { 0x006075f0, 0x00000021 }, + { 0x000075f8, 0x00000002 }, + { 0x00000076, 0x00000004 }, + { 0x000a750e, 0x00000002 }, + { 0x0020750f, 0x00000002 }, + { 0x00600079, 0x00000004 }, + { 0x00007570, 0x00000002 }, + { 0x00007571, 0x00000002 }, + { 0x00007572, 0x00000006 }, + { 0x00005000, 0x00000002 }, + { 0x00a05000, 0x00000002 }, + { 0x00007568, 0x00000002 }, + { 0x00061000, 0x00000002 }, + { 0x00000084, 0x0000000c }, + { 0x00058000, 0x00000002 }, + { 0x0c607562, 0x00000002 }, + { 0x00000086, 0x00000004 }, + { 0x00600085, 0x00000004 }, + { 0x400070dd, 0000000000 }, + { 0x000380dd, 0x00000002 }, + { 0x00000093, 0x0000001c }, + { 0x00065095, 0x00000018 }, + { 0x040025bb, 0x00000002 }, + { 0x00061096, 0x00000018 }, + { 0x040075bc, 0000000000 }, + { 0x000075bb, 0x00000002 }, + { 0x000075bc, 0000000000 }, + { 0x00090000, 0x00000006 }, + { 0x00090000, 0x00000002 }, + { 0x000d8002, 0x00000006 }, + { 0x00005000, 0x00000002 }, + { 0x00007821, 0x00000002 }, + { 0x00007800, 0000000000 }, + { 0x00007821, 0x00000002 }, + { 0x00007800, 0000000000 }, + { 0x01665000, 0x00000002 }, + { 0x000a0000, 0x00000002 }, + { 0x000671cc, 0x00000002 }, + { 0x0286f1cd, 0x00000002 }, + { 0x000000a3, 0x00000010 }, + { 0x21007000, 0000000000 }, + { 0x000000aa, 0x0000001c }, + { 0x00065000, 0x00000002 }, + { 0x000a0000, 0x00000002 }, + { 0x00061000, 0x00000002 }, + { 0x000b0000, 0x00000002 }, + { 0x38067000, 0x00000002 }, + { 0x000a00a6, 0x00000004 }, + { 0x20007000, 0000000000 }, + { 0x01200000, 0x00000002 }, + { 0x20077000, 0x00000002 }, + { 0x01200000, 0x00000002 }, + { 0x20007000, 0000000000 }, + { 0x00061000, 0x00000002 }, + { 0x0120751b, 0x00000002 }, + { 0x8040750a, 0x00000002 }, + { 0x8040750b, 0x00000002 }, + { 0x00110000, 0x00000002 }, + { 0x000380dd, 0x00000002 }, + { 0x000000bd, 0x0000001c }, + { 0x00061096, 0x00000018 }, + { 0x844075bd, 0x00000002 }, + { 0x00061095, 0x00000018 }, + { 0x840075bb, 0x00000002 }, + { 0x00061096, 0x00000018 }, + { 0x844075bc, 0x00000002 }, + { 0x000000c0, 0x00000004 }, + { 0x804075bd, 0x00000002 }, + { 0x800075bb, 0x00000002 }, + { 0x804075bc, 0x00000002 }, + { 0x00108000, 0x00000002 }, + { 0x01400000, 0x00000002 }, + { 0x006000c4, 0x0000000c }, + { 0x20c07000, 0x00000020 }, + { 0x000000c6, 0x00000012 }, + { 0x00800000, 0x00000006 }, + { 0x0080751d, 0x00000006 }, + { 0x000025bb, 0x00000002 }, + { 0x000040c0, 0x00000004 }, + { 0x0000775c, 0x00000002 }, + { 0x00a05000, 0x00000002 }, + { 0x00661000, 0x00000002 }, + { 0x0460275d, 0x00000020 }, + { 0x00004000, 0000000000 }, + { 0x00007999, 0x00000002 }, + { 0x00a05000, 0x00000002 }, + { 0x00661000, 0x00000002 }, + { 0x0460299b, 0x00000020 }, + { 0x00004000, 0000000000 }, + { 0x01e00830, 0x00000002 }, + { 0x21007000, 0000000000 }, + { 0x00005000, 0x00000002 }, + { 0x00038042, 0x00000002 }, + { 0x040025e0, 0x00000002 }, + { 0x000075e1, 0000000000 }, + { 0x00000001, 0000000000 }, + { 0x000380d9, 0x00000002 }, + { 0x04007394, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, +}; + + static u32 radeon_cp_microcode[][2] = { { 0x21007000, 0000000000 }, { 0x20007000, 0000000000 }, @@ -345,6 +605,8 @@ static int radeon_do_pixcache_flush( drm_radeon_private_t *dev_priv ) u32 tmp; int i; + dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; + tmp = RADEON_READ( RADEON_RB2D_DSTCACHE_CTLSTAT ); tmp |= RADEON_RB2D_DC_FLUSH_ALL; RADEON_WRITE( RADEON_RB2D_DSTCACHE_CTLSTAT, tmp ); @@ -369,6 +631,8 @@ static int radeon_do_wait_for_fifo( drm_radeon_private_t *dev_priv, { int i; + dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; + for ( i = 0 ; i < dev_priv->usec_timeout ; i++ ) { int slots = ( RADEON_READ( RADEON_RBBM_STATUS ) & RADEON_RBBM_FIFOCNT_MASK ); @@ -387,6 +651,8 @@ static int radeon_do_wait_for_idle( drm_radeon_private_t *dev_priv ) { int i, ret; + dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; + ret = radeon_do_wait_for_fifo( dev_priv, 64 ); if ( ret ) return ret; @@ -420,11 +686,26 @@ static void radeon_cp_load_microcode( drm_radeon_private_t *dev_priv ) radeon_do_wait_for_idle( dev_priv ); RADEON_WRITE( RADEON_CP_ME_RAM_ADDR, 0 ); - for ( i = 0 ; i < 256 ; i++ ) { - RADEON_WRITE( RADEON_CP_ME_RAM_DATAH, - radeon_cp_microcode[i][1] ); - RADEON_WRITE( RADEON_CP_ME_RAM_DATAL, - radeon_cp_microcode[i][0] ); + + if (dev_priv->is_r200) + { + DRM_INFO("Loading R200 Microcode\n"); + for ( i = 0 ; i < 256 ; i++ ) + { + RADEON_WRITE( RADEON_CP_ME_RAM_DATAH, + R200_cp_microcode[i][1] ); + RADEON_WRITE( RADEON_CP_ME_RAM_DATAL, + R200_cp_microcode[i][0] ); + } + } + else + { + for ( i = 0 ; i < 256 ; i++ ) { + RADEON_WRITE( RADEON_CP_ME_RAM_DATAH, + radeon_cp_microcode[i][1] ); + RADEON_WRITE( RADEON_CP_ME_RAM_DATAL, + radeon_cp_microcode[i][0] ); + } } } @@ -736,12 +1017,10 @@ static int radeon_do_init_cp( drm_device_t *dev, drm_radeon_init_t *init ) return DRM_ERR(EINVAL); } + dev_priv->is_r200 = (init->func == RADEON_INIT_R200_CP); + dev_priv->do_boxes = 1; dev_priv->cp_mode = init->cp_mode; - /* Simple idle check. - */ - atomic_set( &dev_priv->idle_count, 0 ); - /* We don't support anything other than bus-mastering ring mode, * but the ring can be in either AGP or PCI space for the ring * read pointer. @@ -1028,6 +1307,7 @@ int radeon_cp_init( DRM_IOCTL_ARGS ) switch ( init.func ) { case RADEON_INIT_CP: + case RADEON_INIT_R200_CP: return radeon_do_init_cp( dev, &init ); case RADEON_CLEANUP_CP: return radeon_do_cleanup_cp( dev ); @@ -1169,6 +1449,14 @@ int radeon_fullscreen( DRM_IOCTL_ARGS ) * completed rendering. * * KW: It's also a good way to find free buffers quickly. + * + * KW: Ideally this loop wouldn't exist, and freelist_get wouldn't + * sleep. However, bugs in older versions of radeon_accel.c mean that + * we essentially have to do this, else old clients will break. + * + * However, it does leave open a potential deadlock where all the + * buffers are held by other clients, which can't release them because + * they can't get the lock. */ drm_buf_t *radeon_freelist_get( drm_device_t *dev ) @@ -1193,17 +1481,56 @@ drm_buf_t *radeon_freelist_get( drm_device_t *dev ) buf_priv = buf->dev_private; if ( buf->pid == 0 || (buf->pending && buf_priv->age <= done_age) ) { + dev_priv->stats.requested_bufs++; buf->pending = 0; return buf; } start = 0; } - DRM_UDELAY( 1 ); + + if (t) { + DRM_UDELAY( 1 ); + dev_priv->stats.freelist_loops++; + } } DRM_ERROR( "returning NULL!\n" ); return NULL; } +#if 0 +drm_buf_t *radeon_freelist_get( drm_device_t *dev ) +{ + drm_device_dma_t *dma = dev->dma; + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_buf_priv_t *buf_priv; + drm_buf_t *buf; + int i, t; + int start; + u32 done_age = DRM_READ32(&dev_priv->scratch[1]); + + if ( ++dev_priv->last_buf >= dma->buf_count ) + dev_priv->last_buf = 0; + + start = dev_priv->last_buf; + dev_priv->stats.freelist_loops++; + + for ( t = 0 ; t < 2 ; t++ ) { + for ( i = start ; i < dma->buf_count ; i++ ) { + buf = dma->buflist[i]; + buf_priv = buf->dev_private; + if ( buf->pid == 0 || (buf->pending && + buf_priv->age <= done_age) ) { + dev_priv->stats.requested_bufs++; + buf->pending = 0; + return buf; + } + } + start = 0; + } + + return NULL; +} +#endif void radeon_freelist_reset( drm_device_t *dev ) { @@ -1228,11 +1555,23 @@ int radeon_wait_ring( drm_radeon_private_t *dev_priv, int n ) { drm_radeon_ring_buffer_t *ring = &dev_priv->ring; int i; + u32 last_head = GET_RING_HEAD(ring); for ( i = 0 ; i < dev_priv->usec_timeout ; i++ ) { - radeon_update_ring_snapshot( ring ); + u32 head = GET_RING_HEAD(ring); + + ring->space = (head - ring->tail) * sizeof(u32); + if ( ring->space <= 0 ) + ring->space += ring->size; if ( ring->space > n ) return 0; + + dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; + + if (head != last_head) + i = 0; + last_head = head; + DRM_UDELAY( 1 ); } @@ -1251,7 +1590,7 @@ static int radeon_cp_get_buffers( drm_device_t *dev, drm_dma_t *d ) for ( i = d->granted_count ; i < d->request_count ; i++ ) { buf = radeon_freelist_get( dev ); - if ( !buf ) return DRM_ERR(EAGAIN); + if ( !buf ) return DRM_ERR(EBUSY); /* NOTE: broken client */ buf->pid = DRM_CURRENTPID; diff --git a/shared-core/radeon_drm.h b/shared-core/radeon_drm.h index 3802e46c..6469bfb8 100644 --- a/shared-core/radeon_drm.h +++ b/shared-core/radeon_drm.h @@ -89,7 +89,47 @@ #define RADEON_EMIT_SE_ZBIAS_FACTOR 18 /* zbias/2 */ #define RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT 19 /* tcl/11 */ #define RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED 20 /* material/17 */ -#define RADEON_MAX_STATE_PACKETS 21 +#define R200_EMIT_PP_TXCBLEND_0 21 /* tex0/4 */ +#define R200_EMIT_PP_TXCBLEND_1 22 /* tex1/4 */ +#define R200_EMIT_PP_TXCBLEND_2 23 /* tex2/4 */ +#define R200_EMIT_PP_TXCBLEND_3 24 /* tex3/4 */ +#define R200_EMIT_PP_TXCBLEND_4 25 /* tex4/4 */ +#define R200_EMIT_PP_TXCBLEND_5 26 /* tex5/4 */ +#define R200_EMIT_PP_TXCBLEND_6 27 /* /4 */ +#define R200_EMIT_PP_TXCBLEND_7 28 /* /4 */ +#define R200_EMIT_TCL_LIGHT_MODEL_CTL_0 29 /* tcl/7 */ +#define R200_EMIT_TFACTOR_0 30 /* tf/7 */ +#define R200_EMIT_VTX_FMT_0 31 /* vtx/5 */ +#define R200_EMIT_VAP_CTL 32 /* vap/1 */ +#define R200_EMIT_MATRIX_SELECT_0 33 /* msl/5 */ +#define R200_EMIT_TEX_PROC_CTL_2 34 /* tcg/5 */ +#define R200_EMIT_TCL_UCP_VERT_BLEND_CTL 35 /* tcl/1 */ +#define R200_EMIT_PP_TXFILTER_0 36 /* tex0/6 */ +#define R200_EMIT_PP_TXFILTER_1 37 /* tex1/6 */ +#define R200_EMIT_PP_TXFILTER_2 38 /* tex2/6 */ +#define R200_EMIT_PP_TXFILTER_3 39 /* tex3/6 */ +#define R200_EMIT_PP_TXFILTER_4 40 /* tex4/6 */ +#define R200_EMIT_PP_TXFILTER_5 41 /* tex5/6 */ +#define R200_EMIT_PP_TXOFFSET_0 42 /* tex0/1 */ +#define R200_EMIT_PP_TXOFFSET_1 43 /* tex1/1 */ +#define R200_EMIT_PP_TXOFFSET_2 44 /* tex2/1 */ +#define R200_EMIT_PP_TXOFFSET_3 45 /* tex3/1 */ +#define R200_EMIT_PP_TXOFFSET_4 46 /* tex4/1 */ +#define R200_EMIT_PP_TXOFFSET_5 47 /* tex5/1 */ +#define R200_EMIT_VTE_CNTL 48 /* vte/1 */ +#define R200_EMIT_OUTPUT_VTX_COMP_SEL 49 /* vtx/1 */ +#define R200_EMIT_PP_TAM_DEBUG3 50 /* tam/1 */ +#define R200_EMIT_PP_CNTL_X 51 /* cst/1 */ +#define R200_EMIT_RB3D_DEPTHXY_OFFSET 52 /* cst/1 */ +#define R200_EMIT_RE_AUX_SCISSOR_CNTL 53 /* cst/1 */ +#define R200_EMIT_RE_SCISSOR_TL_0 54 /* cst/2 */ +#define R200_EMIT_RE_SCISSOR_TL_1 55 /* cst/2 */ +#define R200_EMIT_RE_SCISSOR_TL_2 56 /* cst/2 */ +#define R200_EMIT_SE_VAP_CNTL_STATUS 57 /* cst/1 */ +#define R200_EMIT_SE_VTX_STATE_CNTL 58 /* cst/1 */ +#define R200_EMIT_RE_POINTSIZE 59 /* cst/1 */ +#define R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0 60 /* cst/4 */ +#define RADEON_MAX_STATE_PACKETS 61 /* Commands understood by cmd_buffer ioctl. More can be added but @@ -101,24 +141,25 @@ #define RADEON_CMD_DMA_DISCARD 4 /* discard current dma buf */ #define RADEON_CMD_PACKET3 5 /* emit hw packet */ #define RADEON_CMD_PACKET3_CLIP 6 /* emit hw packet wrapped in cliprects */ +#define RADEON_CMD_SCALARS2 7 /* r200 stopgap */ typedef union { int i; struct { - char cmd_type, pad0, pad1, pad2; + unsigned char cmd_type, pad0, pad1, pad2; } header; struct { - char cmd_type, packet_id, pad0, pad1; + unsigned char cmd_type, packet_id, pad0, pad1; } packet; struct { - char cmd_type, offset, stride, count; + unsigned char cmd_type, offset, stride, count; } scalars; struct { - char cmd_type, offset, stride, count; + unsigned char cmd_type, offset, stride, count; } vectors; struct { - char cmd_type, buf_idx, pad0, pad1; + unsigned char cmd_type, buf_idx, pad0, pad1; } dma; } drm_radeon_cmd_header_t; @@ -327,7 +368,8 @@ typedef struct { typedef struct drm_radeon_init { enum { RADEON_INIT_CP = 0x01, - RADEON_CLEANUP_CP = 0x02 + RADEON_CLEANUP_CP = 0x02, + RADEON_INIT_R200_CP = 0x03, } func; unsigned long sarea_priv_offset; int is_pci; diff --git a/shared-core/radeon_drv.h b/shared-core/radeon_drv.h index 15c0d4dd..7c341b39 100644 --- a/shared-core/radeon_drv.h +++ b/shared-core/radeon_drv.h @@ -79,12 +79,25 @@ typedef struct drm_radeon_private { int writeback_works; int usec_timeout; + + int is_r200; + int is_pci; unsigned long phys_pci_gart; dma_addr_t bus_pci_gart; - atomic_t idle_count; - + struct { + u32 boxes; + int freelist_timeouts; + int freelist_loops; + int requested_bufs; + int last_frame_reads; + int last_clear_reads; + int clears; + int texture_uploads; + } stats; + + int do_boxes; int page_flipping; int current_page; u32 crtc_offset; @@ -134,14 +147,6 @@ extern drm_buf_t *radeon_freelist_get( drm_device_t *dev ); extern int radeon_wait_ring( drm_radeon_private_t *dev_priv, int n ); -static __inline__ void -radeon_update_ring_snapshot( drm_radeon_ring_buffer_t *ring ) -{ - ring->space = (GET_RING_HEAD(ring) - ring->tail) * sizeof(u32); - if ( ring->space <= 0 ) - ring->space += ring->size; -} - extern int radeon_do_cp_idle( drm_radeon_private_t *dev_priv ); extern int radeon_do_cleanup_cp( drm_device_t *dev ); extern int radeon_do_cleanup_pageflip( drm_device_t *dev ); @@ -159,6 +164,14 @@ extern int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ); extern int radeon_cp_getparam( DRM_IOCTL_ARGS ); extern int radeon_cp_flip( DRM_IOCTL_ARGS ); +/* Flags for stats.boxes + */ +#define RADEON_BOX_DMA_IDLE 0x1 +#define RADEON_BOX_RING_FULL 0x2 +#define RADEON_BOX_FLIP 0x4 +#define RADEON_BOX_WAIT_IDLE 0x8 +#define RADEON_BOX_TEXTURE_LOAD 0x10 + /* Register definitions, register access macros and drmAddMap constants @@ -282,6 +295,7 @@ extern int radeon_cp_flip( DRM_IOCTL_ARGS ); # define RADEON_STENCIL_ENABLE (1 << 7) # define RADEON_Z_ENABLE (1 << 8) #define RADEON_RB3D_DEPTHOFFSET 0x1c24 +#define RADEON_RB3D_DEPTHPITCH 0x1c28 #define RADEON_RB3D_PLANEMASK 0x1d84 #define RADEON_RB3D_STENCILREFMASK 0x1d7c #define RADEON_RB3D_ZCACHE_MODE 0x3250 @@ -513,6 +527,62 @@ extern int radeon_cp_flip( DRM_IOCTL_ARGS ); #define RADEON_TXFORMAT_ARGB8888 6 #define RADEON_TXFORMAT_RGBA8888 7 +#define R200_PP_TXCBLEND_0 0x2f00 +#define R200_PP_TXCBLEND_1 0x2f10 +#define R200_PP_TXCBLEND_2 0x2f20 +#define R200_PP_TXCBLEND_3 0x2f30 +#define R200_PP_TXCBLEND_4 0x2f40 +#define R200_PP_TXCBLEND_5 0x2f50 +#define R200_PP_TXCBLEND_6 0x2f60 +#define R200_PP_TXCBLEND_7 0x2f70 +#define R200_SE_TCL_LIGHT_MODEL_CTL_0 0x2268 +#define R200_PP_TFACTOR_0 0x2ee0 +#define R200_SE_VTX_FMT_0 0x2088 +#define R200_SE_VAP_CNTL 0x2080 +#define R200_SE_TCL_MATRIX_SEL_0 0x2230 +#define R200_SE_TCL_TEX_PROC_CTL_2 0x22a8 +#define R200_SE_TCL_UCP_VERT_BLEND_CTL 0x22c0 +#define R200_PP_TXFILTER_5 0x2ca0 +#define R200_PP_TXFILTER_4 0x2c80 +#define R200_PP_TXFILTER_3 0x2c60 +#define R200_PP_TXFILTER_2 0x2c40 +#define R200_PP_TXFILTER_1 0x2c20 +#define R200_PP_TXFILTER_0 0x2c00 +#define R200_PP_TXOFFSET_5 0x2d78 +#define R200_PP_TXOFFSET_4 0x2d60 +#define R200_PP_TXOFFSET_3 0x2d48 +#define R200_PP_TXOFFSET_2 0x2d30 +#define R200_PP_TXOFFSET_1 0x2d18 +#define R200_PP_TXOFFSET_0 0x2d00 +#define R200_RE_AUX_SCISSOR_CNTL 0x26f0 +#define R200_SE_VTE_CNTL 0x20b0 +#define R200_SE_TCL_OUTPUT_VTX_COMP_SEL 0x2250 +#define R200_PP_TAM_DEBUG3 0x2d9c +#define R200_PP_CNTL_X 0x2cc4 +#define R200_SE_VAP_CNTL_STATUS 0x2140 +#define R200_RE_SCISSOR_TL_0 0x1cd8 +#define R200_RE_SCISSOR_TL_1 0x1ce0 +#define R200_RE_SCISSOR_TL_2 0x1ce8 +#define R200_RB3D_DEPTHXY_OFFSET 0x1d60 +#define R200_RE_AUX_SCISSOR_CNTL 0x26f0 +#define R200_SE_VTX_STATE_CNTL 0x2180 +#define R200_RE_POINTSIZE 0x2648 +#define R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0 0x2254 + + +#define SE_VAP_CNTL__TCL_ENA_MASK 0x00000001 +#define SE_VAP_CNTL__FORCE_W_TO_ONE_MASK 0x00010000 +#define SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT 0x00000012 +#define SE_VTE_CNTL__VTX_XY_FMT_MASK 0x00000100 +#define SE_VTE_CNTL__VTX_Z_FMT_MASK 0x00000200 +#define SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK 0x00000001 +#define SE_VTX_FMT_0__VTX_W0_PRESENT_MASK 0x00000002 +#define SE_VTX_FMT_0__VTX_COLOR_0_FMT__SHIFT 0x0000000b +#define R200_3D_DRAW_IMMD_2 0xC0003500 +#define R200_SE_VTX_FMT_1 0x208c +#define R200_RE_CNTL 0x1c50 + + /* Constants */ #define RADEON_MAX_USEC_TIMEOUT 100000 /* 100 ms */ @@ -620,30 +690,16 @@ do { \ } \ } while (0) + +/* Perfbox functionality only. + */ #define RING_SPACE_TEST_WITH_RETURN( dev_priv ) \ do { \ - drm_radeon_ring_buffer_t *ring = &dev_priv->ring; int i; \ - if ( ring->space < ring->high_mark ) { \ - for ( i = 0 ; i < dev_priv->usec_timeout ; i++ ) { \ - radeon_update_ring_snapshot( ring ); \ - if ( ring->space >= ring->high_mark ) \ - goto __ring_space_done; \ - DRM_UDELAY( 1 ); \ - } \ - DRM_ERROR( "ring space check from memory failed, reading register...\n" ); \ - /* If ring space check fails from RAM, try reading the \ - register directly */ \ - ring->space = 4 * ( RADEON_READ( RADEON_CP_RB_RPTR ) - ring->tail ); \ - if ( ring->space <= 0 ) \ - ring->space += ring->size; \ - if ( ring->space >= ring->high_mark ) \ - goto __ring_space_done; \ - \ - DRM_ERROR( "ring space check failed!\n" ); \ - return DRM_ERR(EBUSY); \ + if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE)) { \ + u32 head = GET_RING_HEAD(&dev_priv->ring); \ + if (head == dev_priv->ring.tail) \ + dev_priv->stats.boxes |= RADEON_BOX_DMA_IDLE; \ } \ - __ring_space_done: \ - ; \ } while (0) #define VB_AGE_TEST_WITH_RETURN( dev_priv ) \ @@ -710,16 +766,15 @@ do { \ } \ if (((dev_priv->ring.tail + _nr) & mask) != write) { \ DRM_ERROR( \ - "ADVANCE_RING(): mismatch: nr: %x write: %x\n", \ + "ADVANCE_RING(): mismatch: nr: %x write: %x line: %d\n", \ ((dev_priv->ring.tail + _nr) & mask), \ - write); \ + write, __LINE__); \ } else \ dev_priv->ring.tail = write; \ } while (0) #define COMMIT_RING() do { \ - radeon_flush_write_combine(); \ - RADEON_WRITE( RADEON_CP_RB_WPTR, dev_priv->ring.tail ); \ + RADEON_WRITE( RADEON_CP_RB_WPTR, dev_priv->ring.tail ); \ } while (0) #define OUT_RING( x ) do { \ @@ -760,6 +815,4 @@ do { \ } while (0) -#define RADEON_PERFORMANCE_BOXES 0 - #endif /* __RADEON_DRV_H__ */ diff --git a/shared-core/radeon_state.c b/shared-core/radeon_state.c index 1cc6bde8..7f84e739 100644 --- a/shared-core/radeon_state.c +++ b/shared-core/radeon_state.c @@ -239,18 +239,50 @@ static struct { { RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" }, { RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" }, { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" }, + { R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0" }, + { R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1" }, + { R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2" }, + { R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3" }, + { R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4" }, + { R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5" }, + { R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6" }, + { R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7" }, + { R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0" }, + { R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0" }, + { R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0" }, + { R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL" }, + { R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0" }, + { R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2" }, + { R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL" }, + { R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0" }, + { R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1" }, + { R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2" }, + { R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3" }, + { R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4" }, + { R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5" }, + { R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0" }, + { R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1" }, + { R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2" }, + { R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3" }, + { R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4" }, + { R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5" }, + { R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL" }, + { R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL" }, + { R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3" }, + { R200_PP_CNTL_X, 1, "R200_PP_CNTL_X" }, + { R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET" }, + { R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL" }, + { R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0" }, + { R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1" }, + { R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2" }, + { R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS" }, + { R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL" }, + { R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE" }, + { R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0" }, }; - - - - - - - -#if RADEON_PERFORMANCE_BOXES /* ================================================================ * Performance monitoring functions */ @@ -259,10 +291,12 @@ static void radeon_clear_box( drm_radeon_private_t *dev_priv, int x, int y, int w, int h, int r, int g, int b ) { - u32 pitch, offset; u32 color; RING_LOCALS; + x += dev_priv->sarea_priv->boxes[0].x1; + y += dev_priv->sarea_priv->boxes[0].y1; + switch ( dev_priv->color_fmt ) { case RADEON_COLOR_FORMAT_RGB565: color = (((r & 0xf8) << 8) | @@ -275,8 +309,11 @@ static void radeon_clear_box( drm_radeon_private_t *dev_priv, break; } - offset = dev_priv->back_offset; - pitch = dev_priv->back_pitch >> 3; + BEGIN_RING( 4 ); + RADEON_WAIT_UNTIL_3D_IDLE(); + OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) ); + OUT_RING( 0xffffffff ); + ADVANCE_RING(); BEGIN_RING( 6 ); @@ -288,7 +325,12 @@ static void radeon_clear_box( drm_radeon_private_t *dev_priv, RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS ); - OUT_RING( (pitch << 22) | (offset >> 5) ); + if ( dev_priv->page_flipping && dev_priv->current_page == 1 ) { + OUT_RING( dev_priv->front_pitch_offset ); + } else { + OUT_RING( dev_priv->back_pitch_offset ); + } + OUT_RING( color ); OUT_RING( (x << 16) | y ); @@ -299,16 +341,57 @@ static void radeon_clear_box( drm_radeon_private_t *dev_priv, static void radeon_cp_performance_boxes( drm_radeon_private_t *dev_priv ) { - if ( atomic_read( &dev_priv->idle_count ) == 0 ) { - radeon_clear_box( dev_priv, 64, 4, 8, 8, 0, 255, 0 ); - } else { - atomic_set( &dev_priv->idle_count, 0 ); + /* Collapse various things into a wait flag -- trying to + * guess if userspase slept -- better just to have them tell us. + */ + if (dev_priv->stats.last_frame_reads > 1 || + dev_priv->stats.last_clear_reads > dev_priv->stats.clears) { + dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; } -} -#endif + if (dev_priv->stats.freelist_loops) { + dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; + } + /* Purple box for page flipping + */ + if ( dev_priv->stats.boxes & RADEON_BOX_FLIP ) + radeon_clear_box( dev_priv, 4, 4, 8, 8, 255, 0, 255 ); + + /* Red box if we have to wait for idle at any point + */ + if ( dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE ) + radeon_clear_box( dev_priv, 16, 4, 8, 8, 255, 0, 0 ); + + /* Blue box: lost context? + */ + /* Yellow box for texture swaps + */ + if ( dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD ) + radeon_clear_box( dev_priv, 40, 4, 8, 8, 255, 255, 0 ); + + /* Green box if hardware never idles (as far as we can tell) + */ + if ( !(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE) ) + radeon_clear_box( dev_priv, 64, 4, 8, 8, 0, 255, 0 ); + + + /* Draw bars indicating number of buffers allocated + * (not a great measure, easily confused) + */ + if (dev_priv->stats.requested_bufs) { + if (dev_priv->stats.requested_bufs > 100) + dev_priv->stats.requested_bufs = 100; + + radeon_clear_box( dev_priv, 4, 16, + dev_priv->stats.requested_bufs, 4, + 196, 128, 128 ); + } + + memset( &dev_priv->stats, 0, sizeof(dev_priv->stats) ); + +} /* ================================================================ * CP command dispatch functions */ @@ -328,6 +411,8 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev, RING_LOCALS; DRM_DEBUG( "flags = 0x%x\n", flags ); + dev_priv->stats.clears++; + if ( dev_priv->page_flipping && dev_priv->current_page == 1 ) { unsigned int tmp = flags; @@ -336,120 +421,251 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev, if ( tmp & RADEON_BACK ) flags |= RADEON_FRONT; } + if ( flags & (RADEON_FRONT | RADEON_BACK) ) { + + BEGIN_RING( 4 ); + + /* Ensure the 3D stream is idle before doing a + * 2D fill to clear the front or back buffer. + */ + RADEON_WAIT_UNTIL_3D_IDLE(); + + OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) ); + OUT_RING( clear->color_mask ); + + ADVANCE_RING(); + + /* Make sure we restore the 3D state next time. + */ + dev_priv->sarea_priv->ctx_owner = 0; + + for ( i = 0 ; i < nbox ; i++ ) { + int x = pbox[i].x1; + int y = pbox[i].y1; + int w = pbox[i].x2 - x; + int h = pbox[i].y2 - y; + + DRM_DEBUG( "dispatch clear %d,%d-%d,%d flags 0x%x\n", + x, y, w, h, flags ); + + if ( flags & RADEON_FRONT ) { + BEGIN_RING( 6 ); + + OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) ); + OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL | + RADEON_GMC_BRUSH_SOLID_COLOR | + (dev_priv->color_fmt << 8) | + RADEON_GMC_SRC_DATATYPE_COLOR | + RADEON_ROP3_P | + RADEON_GMC_CLR_CMP_CNTL_DIS ); + + OUT_RING( dev_priv->front_pitch_offset ); + OUT_RING( clear->clear_color ); + + OUT_RING( (x << 16) | y ); + OUT_RING( (w << 16) | h ); + + ADVANCE_RING(); + } + + if ( flags & RADEON_BACK ) { + BEGIN_RING( 6 ); + + OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) ); + OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL | + RADEON_GMC_BRUSH_SOLID_COLOR | + (dev_priv->color_fmt << 8) | + RADEON_GMC_SRC_DATATYPE_COLOR | + RADEON_ROP3_P | + RADEON_GMC_CLR_CMP_CNTL_DIS ); + + OUT_RING( dev_priv->back_pitch_offset ); + OUT_RING( clear->clear_color ); + + OUT_RING( (x << 16) | y ); + OUT_RING( (w << 16) | h ); + + ADVANCE_RING(); + } + } + } + /* We have to clear the depth and/or stencil buffers by * rendering a quad into just those buffers. Thus, we have to * make sure the 3D engine is configured correctly. */ - if ( flags & (RADEON_DEPTH | RADEON_STENCIL) ) { - rb3d_cntl = depth_clear->rb3d_cntl; + if ( dev_priv->is_r200 && + (flags & (RADEON_DEPTH | RADEON_STENCIL)) ) { - if ( flags & RADEON_DEPTH ) { - rb3d_cntl |= RADEON_Z_ENABLE; - } else { - rb3d_cntl &= ~RADEON_Z_ENABLE; - } + int tempPP_CNTL; + int tempRE_CNTL; + int tempRB3D_CNTL; + int tempRB3D_ZSTENCILCNTL; + int tempRB3D_STENCILREFMASK; + int tempRB3D_PLANEMASK; + int tempSE_CNTL; + int tempSE_VTE_CNTL; + int tempSE_VTX_FMT_0; + int tempSE_VTX_FMT_1; + int tempSE_VAP_CNTL; + int tempRE_AUX_SCISSOR_CNTL; - if ( flags & RADEON_STENCIL ) { - rb3d_cntl |= RADEON_STENCIL_ENABLE; - rb3d_stencilrefmask = clear->depth_mask; /* misnamed field */ - } else { - rb3d_cntl &= ~RADEON_STENCIL_ENABLE; - rb3d_stencilrefmask = 0x00000000; - } - } + tempPP_CNTL = 0; + tempRE_CNTL = 0; - for ( i = 0 ; i < nbox ; i++ ) { - int x = pbox[i].x1; - int y = pbox[i].y1; - int w = pbox[i].x2 - x; - int h = pbox[i].y2 - y; + tempRB3D_CNTL = depth_clear->rb3d_cntl; + tempRB3D_CNTL &= ~(1<<15); /* unset radeon magic flag */ - DRM_DEBUG( "dispatch clear %d,%d-%d,%d flags 0x%x\n", - x, y, w, h, flags ); + tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl; + tempRB3D_STENCILREFMASK = 0x0; - if ( flags & (RADEON_FRONT | RADEON_BACK) ) { - BEGIN_RING( 4 ); + tempSE_CNTL = depth_clear->se_cntl; - /* Ensure the 3D stream is idle before doing a - * 2D fill to clear the front or back buffer. - */ - RADEON_WAIT_UNTIL_3D_IDLE(); - OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) ); - OUT_RING( clear->color_mask ); - ADVANCE_RING(); + /* Disable TCL */ - /* Make sure we restore the 3D state next time. - */ - dev_priv->sarea_priv->ctx_owner = 0; - } + tempSE_VAP_CNTL = (/* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK | */ + (0x9 << SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT)); - if ( flags & RADEON_FRONT ) { - BEGIN_RING( 6 ); + tempRB3D_PLANEMASK = 0x0; - OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) ); - OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL | - RADEON_GMC_BRUSH_SOLID_COLOR | - (dev_priv->color_fmt << 8) | - RADEON_GMC_SRC_DATATYPE_COLOR | - RADEON_ROP3_P | - RADEON_GMC_CLR_CMP_CNTL_DIS ); + tempRE_AUX_SCISSOR_CNTL = 0x0; - OUT_RING( dev_priv->front_pitch_offset ); - OUT_RING( clear->clear_color ); + tempSE_VTE_CNTL = + SE_VTE_CNTL__VTX_XY_FMT_MASK | + SE_VTE_CNTL__VTX_Z_FMT_MASK; - OUT_RING( (x << 16) | y ); - OUT_RING( (w << 16) | h ); + /* Vertex format (X, Y, Z, W)*/ + tempSE_VTX_FMT_0 = + SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK | + SE_VTX_FMT_0__VTX_W0_PRESENT_MASK; + tempSE_VTX_FMT_1 = 0x0; - ADVANCE_RING(); + + /* + * Depth buffer specific enables + */ + if (flags & RADEON_DEPTH) { + /* Enable depth buffer */ + tempRB3D_CNTL |= RADEON_Z_ENABLE; + } else { + /* Disable depth buffer */ + tempRB3D_CNTL &= ~RADEON_Z_ENABLE; } - if ( flags & RADEON_BACK ) { - BEGIN_RING( 6 ); + /* + * Stencil buffer specific enables + */ + if ( flags & RADEON_STENCIL ) { + tempRB3D_CNTL |= RADEON_STENCIL_ENABLE; + tempRB3D_STENCILREFMASK = clear->depth_mask; + } else { + tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE; + tempRB3D_STENCILREFMASK = 0x00000000; + } - OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) ); - OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL | - RADEON_GMC_BRUSH_SOLID_COLOR | - (dev_priv->color_fmt << 8) | - RADEON_GMC_SRC_DATATYPE_COLOR | - RADEON_ROP3_P | - RADEON_GMC_CLR_CMP_CNTL_DIS ); + BEGIN_RING( 26 ); + RADEON_WAIT_UNTIL_2D_IDLE(); + + OUT_RING_REG( RADEON_PP_CNTL, tempPP_CNTL ); + OUT_RING_REG( R200_RE_CNTL, tempRE_CNTL ); + OUT_RING_REG( RADEON_RB3D_CNTL, tempRB3D_CNTL ); + OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL, + tempRB3D_ZSTENCILCNTL ); + OUT_RING_REG( RADEON_RB3D_STENCILREFMASK, + tempRB3D_STENCILREFMASK ); + OUT_RING_REG( RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK ); + OUT_RING_REG( RADEON_SE_CNTL, tempSE_CNTL ); + OUT_RING_REG( R200_SE_VTE_CNTL, tempSE_VTE_CNTL ); + OUT_RING_REG( R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0 ); + OUT_RING_REG( R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1 ); + OUT_RING_REG( R200_SE_VAP_CNTL, tempSE_VAP_CNTL ); + OUT_RING_REG( R200_RE_AUX_SCISSOR_CNTL, + tempRE_AUX_SCISSOR_CNTL ); + ADVANCE_RING(); - OUT_RING( dev_priv->back_pitch_offset ); - OUT_RING( clear->clear_color ); + /* Make sure we restore the 3D state next time. + */ + dev_priv->sarea_priv->ctx_owner = 0; - OUT_RING( (x << 16) | y ); - OUT_RING( (w << 16) | h ); + for ( i = 0 ; i < nbox ; i++ ) { + + /* Funny that this should be required -- + * sets top-left? + */ + radeon_emit_clip_rect( dev_priv, + &sarea_priv->boxes[i] ); + BEGIN_RING( 14 ); + OUT_RING( CP_PACKET3( R200_3D_DRAW_IMMD_2, 12 ) ); + OUT_RING( (RADEON_PRIM_TYPE_RECT_LIST | + RADEON_PRIM_WALK_RING | + (3 << RADEON_NUM_VERTICES_SHIFT)) ); + OUT_RING( depth_boxes[i].ui[CLEAR_X1] ); + OUT_RING( depth_boxes[i].ui[CLEAR_Y1] ); + OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] ); + OUT_RING( 0x3f800000 ); + OUT_RING( depth_boxes[i].ui[CLEAR_X1] ); + OUT_RING( depth_boxes[i].ui[CLEAR_Y2] ); + OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] ); + OUT_RING( 0x3f800000 ); + OUT_RING( depth_boxes[i].ui[CLEAR_X2] ); + OUT_RING( depth_boxes[i].ui[CLEAR_Y2] ); + OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] ); + OUT_RING( 0x3f800000 ); ADVANCE_RING(); } + } + else if ( (flags & (RADEON_DEPTH | RADEON_STENCIL)) ) { - if ( flags & (RADEON_DEPTH | RADEON_STENCIL) ) { + rb3d_cntl = depth_clear->rb3d_cntl; - radeon_emit_clip_rect( dev_priv, - &sarea_priv->boxes[i] ); + if ( flags & RADEON_DEPTH ) { + rb3d_cntl |= RADEON_Z_ENABLE; + } else { + rb3d_cntl &= ~RADEON_Z_ENABLE; + } - BEGIN_RING( 28 ); + if ( flags & RADEON_STENCIL ) { + rb3d_cntl |= RADEON_STENCIL_ENABLE; + rb3d_stencilrefmask = clear->depth_mask; /* misnamed field */ + } else { + rb3d_cntl &= ~RADEON_STENCIL_ENABLE; + rb3d_stencilrefmask = 0x00000000; + } - RADEON_WAIT_UNTIL_2D_IDLE(); + BEGIN_RING( 13 ); + RADEON_WAIT_UNTIL_2D_IDLE(); - OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 1 ) ); - OUT_RING( 0x00000000 ); - OUT_RING( rb3d_cntl ); + OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 1 ) ); + OUT_RING( 0x00000000 ); + OUT_RING( rb3d_cntl ); + + OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL, + depth_clear->rb3d_zstencilcntl ); + OUT_RING_REG( RADEON_RB3D_STENCILREFMASK, + rb3d_stencilrefmask ); + OUT_RING_REG( RADEON_RB3D_PLANEMASK, + 0x00000000 ); + OUT_RING_REG( RADEON_SE_CNTL, + depth_clear->se_cntl ); + ADVANCE_RING(); - OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL, - depth_clear->rb3d_zstencilcntl ); - OUT_RING_REG( RADEON_RB3D_STENCILREFMASK, - rb3d_stencilrefmask ); - OUT_RING_REG( RADEON_RB3D_PLANEMASK, - 0x00000000 ); - OUT_RING_REG( RADEON_SE_CNTL, - depth_clear->se_cntl ); + /* Make sure we restore the 3D state next time. + */ + dev_priv->sarea_priv->ctx_owner = 0; - /* Radeon 7500 doesn't like vertices without - * color. + for ( i = 0 ; i < nbox ; i++ ) { + + /* Funny that this should be required -- + * sets top-left? */ + radeon_emit_clip_rect( dev_priv, + &sarea_priv->boxes[i] ); + + BEGIN_RING( 15 ); + OUT_RING( CP_PACKET3( RADEON_3D_DRAW_IMMD, 13 ) ); OUT_RING( RADEON_VTX_Z_PRESENT | RADEON_VTX_PKCOLOR_PRESENT); @@ -459,6 +675,7 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev, RADEON_VTX_FMT_RADEON_MODE | (3 << RADEON_NUM_VERTICES_SHIFT)) ); + OUT_RING( depth_boxes[i].ui[CLEAR_X1] ); OUT_RING( depth_boxes[i].ui[CLEAR_Y1] ); OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] ); @@ -475,10 +692,6 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev, OUT_RING( 0x0 ); ADVANCE_RING(); - - /* Make sure we restore the 3D state next time. - */ - dev_priv->sarea_priv->ctx_owner = 0; } } @@ -506,11 +719,12 @@ static void radeon_cp_dispatch_swap( drm_device_t *dev ) RING_LOCALS; DRM_DEBUG( "\n" ); -#if RADEON_PERFORMANCE_BOXES + /* Do some trivial performance monitoring... */ - radeon_cp_performance_boxes( dev_priv ); -#endif + if (dev_priv->do_boxes) + radeon_cp_performance_boxes( dev_priv ); + /* Wait for the 3D stream to idle before dispatching the bitblt. * This will prevent data corruption between the two streams. @@ -579,20 +793,21 @@ static void radeon_cp_dispatch_flip( drm_device_t *dev ) { drm_radeon_private_t *dev_priv = dev->dev_private; RING_LOCALS; - DRM_DEBUG( "page=%d\n", dev_priv->current_page ); + DRM_DEBUG( "%s: page=%d pfCurrentPage=%d\n", + __FUNCTION__, + dev_priv->current_page, + dev_priv->sarea_priv->pfCurrentPage); -#if RADEON_PERFORMANCE_BOXES /* Do some trivial performance monitoring... */ - radeon_cp_performance_boxes( dev_priv ); -#endif + if (dev_priv->do_boxes) { + dev_priv->stats.boxes |= RADEON_BOX_FLIP; + radeon_cp_performance_boxes( dev_priv ); + } BEGIN_RING( 4 ); RADEON_WAIT_UNTIL_3D_IDLE(); -/* - RADEON_WAIT_UNTIL_PAGE_FLIPPED(); -*/ OUT_RING( CP_PACKET0( RADEON_CRTC_OFFSET, 0 ) ); if ( dev_priv->current_page == 0 ) { @@ -847,6 +1062,8 @@ static int radeon_cp_dispatch_texture( drm_device_t *dev, int ret = 0, i; RING_LOCALS; + dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD; + /* FIXME: Be smarter about this... */ buf = radeon_freelist_get( dev ); @@ -1611,6 +1828,30 @@ static __inline__ int radeon_emit_scalars( return 0; } +/* God this is ugly + */ +static __inline__ int radeon_emit_scalars2( + drm_radeon_private_t *dev_priv, + drm_radeon_cmd_header_t header, + drm_radeon_cmd_buffer_t *cmdbuf ) +{ + int sz = header.scalars.count; + int *data = (int *)cmdbuf->buf; + int start = ((unsigned int)header.scalars.offset) + 0x100; + int stride = header.scalars.stride; + RING_LOCALS; + + BEGIN_RING( 3+sz ); + OUT_RING( CP_PACKET0( RADEON_SE_TCL_SCALAR_INDX_REG, 0 ) ); + OUT_RING( start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); + OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_SCALAR_DATA_REG, sz-1 ) ); + OUT_RING_USER_TABLE( data, sz ); + ADVANCE_RING(); + cmdbuf->buf += sz * sizeof(int); + cmdbuf->bufsz -= sz * sizeof(int); + return 0; +} + static __inline__ int radeon_emit_vectors( drm_radeon_private_t *dev_priv, drm_radeon_cmd_header_t header, @@ -1775,6 +2016,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ) switch (header.header.cmd_type) { case RADEON_CMD_PACKET: + DRM_DEBUG("RADEON_CMD_PACKET\n"); if (radeon_emit_packets( dev_priv, header, &cmdbuf )) { DRM_ERROR("radeon_emit_packets failed\n"); return DRM_ERR(EINVAL); @@ -1782,6 +2024,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ) break; case RADEON_CMD_SCALARS: + DRM_DEBUG("RADEON_CMD_SCALARS\n"); if (radeon_emit_scalars( dev_priv, header, &cmdbuf )) { DRM_ERROR("radeon_emit_scalars failed\n"); return DRM_ERR(EINVAL); @@ -1789,6 +2032,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ) break; case RADEON_CMD_VECTORS: + DRM_DEBUG("RADEON_CMD_VECTORS\n"); if (radeon_emit_vectors( dev_priv, header, &cmdbuf )) { DRM_ERROR("radeon_emit_vectors failed\n"); return DRM_ERR(EINVAL); @@ -1796,6 +2040,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ) break; case RADEON_CMD_DMA_DISCARD: + DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n"); idx = header.dma.buf_idx; if ( idx < 0 || idx >= dma->buf_count ) { DRM_ERROR( "buffer index %d (of %d max)\n", @@ -1813,6 +2058,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ) break; case RADEON_CMD_PACKET3: + DRM_DEBUG("RADEON_CMD_PACKET3\n"); if (radeon_emit_packet3( dev, &cmdbuf )) { DRM_ERROR("radeon_emit_packet3 failed\n"); return DRM_ERR(EINVAL); @@ -1820,12 +2066,20 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ) break; case RADEON_CMD_PACKET3_CLIP: + DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n"); if (radeon_emit_packet3_cliprect( dev, &cmdbuf, orig_nbox )) { DRM_ERROR("radeon_emit_packet3_clip failed\n"); return DRM_ERR(EINVAL); } break; + case RADEON_CMD_SCALARS2: + DRM_DEBUG("RADEON_CMD_SCALARS2\n"); + if (radeon_emit_scalars2( dev_priv, header, &cmdbuf )) { + DRM_ERROR("radeon_emit_scalars2 failed\n"); + return DRM_ERR(EINVAL); + } + break; default: DRM_ERROR("bad cmd_type %d at %p\n", header.header.cmd_type, @@ -1835,6 +2089,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ) } + DRM_DEBUG("DONE\n"); COMMIT_RING(); return 0; } @@ -1863,12 +2118,14 @@ int radeon_cp_getparam( DRM_IOCTL_ARGS ) value = dev_priv->agp_buffers_offset; break; case RADEON_PARAM_LAST_FRAME: + dev_priv->stats.last_frame_reads++; value = GET_SCRATCH( 0 ); break; case RADEON_PARAM_LAST_DISPATCH: value = GET_SCRATCH( 1 ); break; case RADEON_PARAM_LAST_CLEAR: + dev_priv->stats.last_clear_reads++; value = GET_SCRATCH( 2 ); break; default: diff --git a/shared/radeon.h b/shared/radeon.h index ad36c868..885e4297 100644 --- a/shared/radeon.h +++ b/shared/radeon.h @@ -25,6 +25,7 @@ * * Authors: * Gareth Hughes + * Keith Whitwell */ #ifndef __RADEON_H__ @@ -43,14 +44,14 @@ #define __HAVE_SG 1 #define __HAVE_PCI_DMA 1 -#define DRIVER_AUTHOR "Gareth Hughes, VA Linux Systems Inc." +#define DRIVER_AUTHOR "Gareth Hughes, Keith Whitwell, others." #define DRIVER_NAME "radeon" #define DRIVER_DESC "ATI Radeon" -#define DRIVER_DATE "20020714" +#define DRIVER_DATE "20020611" #define DRIVER_MAJOR 1 -#define DRIVER_MINOR 4 +#define DRIVER_MINOR 5 #define DRIVER_PATCHLEVEL 0 /* Interface history: @@ -63,6 +64,10 @@ * - Add support for new radeon packets (keith) * - Add getparam ioctl (keith) * - Add flip-buffers ioctl, deprecate fullscreen foo (keith). + * 1.4 - Add scratch registers to get_param ioctl. + * 1.5 - Add r200 packets to cmdbuf ioctl + * - Add r200 function to init ioctl + * - Add 'scalar2' instruction to cmdbuf */ #define DRIVER_IOCTLS \ [DRM_IOCTL_NR(DRM_IOCTL_DMA)] = { radeon_cp_buffers, 1, 0 }, \ @@ -104,15 +109,6 @@ */ #define __HAVE_DMA 1 -#if 0 -/* GH: Remove this for now... */ -#define __HAVE_DMA_QUIESCENT 1 -#define DRIVER_DMA_QUIESCENT() do { \ - drm_radeon_private_t *dev_priv = dev->dev_private; \ - return radeon_do_cp_idle( dev_priv ); \ -} while (0) -#endif - /* Buffer customization: */ #define DRIVER_BUF_PRIV_T drm_radeon_buf_priv_t diff --git a/shared/radeon_cp.c b/shared/radeon_cp.c index 8250c09b..01069e49 100644 --- a/shared/radeon_cp.c +++ b/shared/radeon_cp.c @@ -44,6 +44,266 @@ /* CP microcode (from ATI) */ +static u32 R200_cp_microcode[][2] = { + { 0x21007000, 0000000000 }, + { 0x20007000, 0000000000 }, + { 0x000000ab, 0x00000004 }, + { 0x000000af, 0x00000004 }, + { 0x66544a49, 0000000000 }, + { 0x49494174, 0000000000 }, + { 0x54517d83, 0000000000 }, + { 0x498d8b64, 0000000000 }, + { 0x49494949, 0000000000 }, + { 0x49da493c, 0000000000 }, + { 0x49989898, 0000000000 }, + { 0xd34949d5, 0000000000 }, + { 0x9dc90e11, 0000000000 }, + { 0xce9b9b9b, 0000000000 }, + { 0x000f0000, 0x00000016 }, + { 0x352e232c, 0000000000 }, + { 0x00000013, 0x00000004 }, + { 0x000f0000, 0x00000016 }, + { 0x352e272c, 0000000000 }, + { 0x000f0001, 0x00000016 }, + { 0x3239362f, 0000000000 }, + { 0x000077ef, 0x00000002 }, + { 0x00061000, 0x00000002 }, + { 0x00000020, 0x0000001a }, + { 0x00004000, 0x0000001e }, + { 0x00061000, 0x00000002 }, + { 0x00000020, 0x0000001a }, + { 0x00004000, 0x0000001e }, + { 0x00061000, 0x00000002 }, + { 0x00000020, 0x0000001a }, + { 0x00004000, 0x0000001e }, + { 0x00000016, 0x00000004 }, + { 0x0003802a, 0x00000002 }, + { 0x040067e0, 0x00000002 }, + { 0x00000016, 0x00000004 }, + { 0x000077e0, 0x00000002 }, + { 0x00065000, 0x00000002 }, + { 0x000037e1, 0x00000002 }, + { 0x040067e1, 0x00000006 }, + { 0x000077e0, 0x00000002 }, + { 0x000077e1, 0x00000002 }, + { 0x000077e1, 0x00000006 }, + { 0xffffffff, 0000000000 }, + { 0x10000000, 0000000000 }, + { 0x0003802a, 0x00000002 }, + { 0x040067e0, 0x00000006 }, + { 0x00007675, 0x00000002 }, + { 0x00007676, 0x00000002 }, + { 0x00007677, 0x00000002 }, + { 0x00007678, 0x00000006 }, + { 0x0003802b, 0x00000002 }, + { 0x04002676, 0x00000002 }, + { 0x00007677, 0x00000002 }, + { 0x00007678, 0x00000006 }, + { 0x0000002e, 0x00000018 }, + { 0x0000002e, 0x00000018 }, + { 0000000000, 0x00000006 }, + { 0x0000002f, 0x00000018 }, + { 0x0000002f, 0x00000018 }, + { 0000000000, 0x00000006 }, + { 0x01605000, 0x00000002 }, + { 0x00065000, 0x00000002 }, + { 0x00098000, 0x00000002 }, + { 0x00061000, 0x00000002 }, + { 0x64c0603d, 0x00000004 }, + { 0x00080000, 0x00000016 }, + { 0000000000, 0000000000 }, + { 0x0400251d, 0x00000002 }, + { 0x00007580, 0x00000002 }, + { 0x00067581, 0x00000002 }, + { 0x04002580, 0x00000002 }, + { 0x00067581, 0x00000002 }, + { 0x00000046, 0x00000004 }, + { 0x00005000, 0000000000 }, + { 0x00061000, 0x00000002 }, + { 0x0000750e, 0x00000002 }, + { 0x00019000, 0x00000002 }, + { 0x00011055, 0x00000014 }, + { 0x00000055, 0x00000012 }, + { 0x0400250f, 0x00000002 }, + { 0x0000504a, 0x00000004 }, + { 0x00007565, 0x00000002 }, + { 0x00007566, 0x00000002 }, + { 0x00000051, 0x00000004 }, + { 0x01e655b4, 0x00000002 }, + { 0x4401b0dc, 0x00000002 }, + { 0x01c110dc, 0x00000002 }, + { 0x2666705d, 0x00000018 }, + { 0x040c2565, 0x00000002 }, + { 0x0000005d, 0x00000018 }, + { 0x04002564, 0x00000002 }, + { 0x00007566, 0x00000002 }, + { 0x00000054, 0x00000004 }, + { 0x00401060, 0x00000008 }, + { 0x00101000, 0x00000002 }, + { 0x000d80ff, 0x00000002 }, + { 0x00800063, 0x00000008 }, + { 0x000f9000, 0x00000002 }, + { 0x000e00ff, 0x00000002 }, + { 0000000000, 0x00000006 }, + { 0x00000080, 0x00000018 }, + { 0x00000054, 0x00000004 }, + { 0x00007576, 0x00000002 }, + { 0x00065000, 0x00000002 }, + { 0x00009000, 0x00000002 }, + { 0x00041000, 0x00000002 }, + { 0x0c00350e, 0x00000002 }, + { 0x00049000, 0x00000002 }, + { 0x00051000, 0x00000002 }, + { 0x01e785f8, 0x00000002 }, + { 0x00200000, 0x00000002 }, + { 0x00600073, 0x0000000c }, + { 0x00007563, 0x00000002 }, + { 0x006075f0, 0x00000021 }, + { 0x20007068, 0x00000004 }, + { 0x00005068, 0x00000004 }, + { 0x00007576, 0x00000002 }, + { 0x00007577, 0x00000002 }, + { 0x0000750e, 0x00000002 }, + { 0x0000750f, 0x00000002 }, + { 0x00a05000, 0x00000002 }, + { 0x00600076, 0x0000000c }, + { 0x006075f0, 0x00000021 }, + { 0x000075f8, 0x00000002 }, + { 0x00000076, 0x00000004 }, + { 0x000a750e, 0x00000002 }, + { 0x0020750f, 0x00000002 }, + { 0x00600079, 0x00000004 }, + { 0x00007570, 0x00000002 }, + { 0x00007571, 0x00000002 }, + { 0x00007572, 0x00000006 }, + { 0x00005000, 0x00000002 }, + { 0x00a05000, 0x00000002 }, + { 0x00007568, 0x00000002 }, + { 0x00061000, 0x00000002 }, + { 0x00000084, 0x0000000c }, + { 0x00058000, 0x00000002 }, + { 0x0c607562, 0x00000002 }, + { 0x00000086, 0x00000004 }, + { 0x00600085, 0x00000004 }, + { 0x400070dd, 0000000000 }, + { 0x000380dd, 0x00000002 }, + { 0x00000093, 0x0000001c }, + { 0x00065095, 0x00000018 }, + { 0x040025bb, 0x00000002 }, + { 0x00061096, 0x00000018 }, + { 0x040075bc, 0000000000 }, + { 0x000075bb, 0x00000002 }, + { 0x000075bc, 0000000000 }, + { 0x00090000, 0x00000006 }, + { 0x00090000, 0x00000002 }, + { 0x000d8002, 0x00000006 }, + { 0x00005000, 0x00000002 }, + { 0x00007821, 0x00000002 }, + { 0x00007800, 0000000000 }, + { 0x00007821, 0x00000002 }, + { 0x00007800, 0000000000 }, + { 0x01665000, 0x00000002 }, + { 0x000a0000, 0x00000002 }, + { 0x000671cc, 0x00000002 }, + { 0x0286f1cd, 0x00000002 }, + { 0x000000a3, 0x00000010 }, + { 0x21007000, 0000000000 }, + { 0x000000aa, 0x0000001c }, + { 0x00065000, 0x00000002 }, + { 0x000a0000, 0x00000002 }, + { 0x00061000, 0x00000002 }, + { 0x000b0000, 0x00000002 }, + { 0x38067000, 0x00000002 }, + { 0x000a00a6, 0x00000004 }, + { 0x20007000, 0000000000 }, + { 0x01200000, 0x00000002 }, + { 0x20077000, 0x00000002 }, + { 0x01200000, 0x00000002 }, + { 0x20007000, 0000000000 }, + { 0x00061000, 0x00000002 }, + { 0x0120751b, 0x00000002 }, + { 0x8040750a, 0x00000002 }, + { 0x8040750b, 0x00000002 }, + { 0x00110000, 0x00000002 }, + { 0x000380dd, 0x00000002 }, + { 0x000000bd, 0x0000001c }, + { 0x00061096, 0x00000018 }, + { 0x844075bd, 0x00000002 }, + { 0x00061095, 0x00000018 }, + { 0x840075bb, 0x00000002 }, + { 0x00061096, 0x00000018 }, + { 0x844075bc, 0x00000002 }, + { 0x000000c0, 0x00000004 }, + { 0x804075bd, 0x00000002 }, + { 0x800075bb, 0x00000002 }, + { 0x804075bc, 0x00000002 }, + { 0x00108000, 0x00000002 }, + { 0x01400000, 0x00000002 }, + { 0x006000c4, 0x0000000c }, + { 0x20c07000, 0x00000020 }, + { 0x000000c6, 0x00000012 }, + { 0x00800000, 0x00000006 }, + { 0x0080751d, 0x00000006 }, + { 0x000025bb, 0x00000002 }, + { 0x000040c0, 0x00000004 }, + { 0x0000775c, 0x00000002 }, + { 0x00a05000, 0x00000002 }, + { 0x00661000, 0x00000002 }, + { 0x0460275d, 0x00000020 }, + { 0x00004000, 0000000000 }, + { 0x00007999, 0x00000002 }, + { 0x00a05000, 0x00000002 }, + { 0x00661000, 0x00000002 }, + { 0x0460299b, 0x00000020 }, + { 0x00004000, 0000000000 }, + { 0x01e00830, 0x00000002 }, + { 0x21007000, 0000000000 }, + { 0x00005000, 0x00000002 }, + { 0x00038042, 0x00000002 }, + { 0x040025e0, 0x00000002 }, + { 0x000075e1, 0000000000 }, + { 0x00000001, 0000000000 }, + { 0x000380d9, 0x00000002 }, + { 0x04007394, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, + { 0000000000, 0000000000 }, +}; + + static u32 radeon_cp_microcode[][2] = { { 0x21007000, 0000000000 }, { 0x20007000, 0000000000 }, @@ -345,6 +605,8 @@ static int radeon_do_pixcache_flush( drm_radeon_private_t *dev_priv ) u32 tmp; int i; + dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; + tmp = RADEON_READ( RADEON_RB2D_DSTCACHE_CTLSTAT ); tmp |= RADEON_RB2D_DC_FLUSH_ALL; RADEON_WRITE( RADEON_RB2D_DSTCACHE_CTLSTAT, tmp ); @@ -369,6 +631,8 @@ static int radeon_do_wait_for_fifo( drm_radeon_private_t *dev_priv, { int i; + dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; + for ( i = 0 ; i < dev_priv->usec_timeout ; i++ ) { int slots = ( RADEON_READ( RADEON_RBBM_STATUS ) & RADEON_RBBM_FIFOCNT_MASK ); @@ -387,6 +651,8 @@ static int radeon_do_wait_for_idle( drm_radeon_private_t *dev_priv ) { int i, ret; + dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; + ret = radeon_do_wait_for_fifo( dev_priv, 64 ); if ( ret ) return ret; @@ -420,11 +686,26 @@ static void radeon_cp_load_microcode( drm_radeon_private_t *dev_priv ) radeon_do_wait_for_idle( dev_priv ); RADEON_WRITE( RADEON_CP_ME_RAM_ADDR, 0 ); - for ( i = 0 ; i < 256 ; i++ ) { - RADEON_WRITE( RADEON_CP_ME_RAM_DATAH, - radeon_cp_microcode[i][1] ); - RADEON_WRITE( RADEON_CP_ME_RAM_DATAL, - radeon_cp_microcode[i][0] ); + + if (dev_priv->is_r200) + { + DRM_INFO("Loading R200 Microcode\n"); + for ( i = 0 ; i < 256 ; i++ ) + { + RADEON_WRITE( RADEON_CP_ME_RAM_DATAH, + R200_cp_microcode[i][1] ); + RADEON_WRITE( RADEON_CP_ME_RAM_DATAL, + R200_cp_microcode[i][0] ); + } + } + else + { + for ( i = 0 ; i < 256 ; i++ ) { + RADEON_WRITE( RADEON_CP_ME_RAM_DATAH, + radeon_cp_microcode[i][1] ); + RADEON_WRITE( RADEON_CP_ME_RAM_DATAL, + radeon_cp_microcode[i][0] ); + } } } @@ -736,12 +1017,10 @@ static int radeon_do_init_cp( drm_device_t *dev, drm_radeon_init_t *init ) return DRM_ERR(EINVAL); } + dev_priv->is_r200 = (init->func == RADEON_INIT_R200_CP); + dev_priv->do_boxes = 1; dev_priv->cp_mode = init->cp_mode; - /* Simple idle check. - */ - atomic_set( &dev_priv->idle_count, 0 ); - /* We don't support anything other than bus-mastering ring mode, * but the ring can be in either AGP or PCI space for the ring * read pointer. @@ -1028,6 +1307,7 @@ int radeon_cp_init( DRM_IOCTL_ARGS ) switch ( init.func ) { case RADEON_INIT_CP: + case RADEON_INIT_R200_CP: return radeon_do_init_cp( dev, &init ); case RADEON_CLEANUP_CP: return radeon_do_cleanup_cp( dev ); @@ -1169,6 +1449,14 @@ int radeon_fullscreen( DRM_IOCTL_ARGS ) * completed rendering. * * KW: It's also a good way to find free buffers quickly. + * + * KW: Ideally this loop wouldn't exist, and freelist_get wouldn't + * sleep. However, bugs in older versions of radeon_accel.c mean that + * we essentially have to do this, else old clients will break. + * + * However, it does leave open a potential deadlock where all the + * buffers are held by other clients, which can't release them because + * they can't get the lock. */ drm_buf_t *radeon_freelist_get( drm_device_t *dev ) @@ -1193,17 +1481,56 @@ drm_buf_t *radeon_freelist_get( drm_device_t *dev ) buf_priv = buf->dev_private; if ( buf->pid == 0 || (buf->pending && buf_priv->age <= done_age) ) { + dev_priv->stats.requested_bufs++; buf->pending = 0; return buf; } start = 0; } - DRM_UDELAY( 1 ); + + if (t) { + DRM_UDELAY( 1 ); + dev_priv->stats.freelist_loops++; + } } DRM_ERROR( "returning NULL!\n" ); return NULL; } +#if 0 +drm_buf_t *radeon_freelist_get( drm_device_t *dev ) +{ + drm_device_dma_t *dma = dev->dma; + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_buf_priv_t *buf_priv; + drm_buf_t *buf; + int i, t; + int start; + u32 done_age = DRM_READ32(&dev_priv->scratch[1]); + + if ( ++dev_priv->last_buf >= dma->buf_count ) + dev_priv->last_buf = 0; + + start = dev_priv->last_buf; + dev_priv->stats.freelist_loops++; + + for ( t = 0 ; t < 2 ; t++ ) { + for ( i = start ; i < dma->buf_count ; i++ ) { + buf = dma->buflist[i]; + buf_priv = buf->dev_private; + if ( buf->pid == 0 || (buf->pending && + buf_priv->age <= done_age) ) { + dev_priv->stats.requested_bufs++; + buf->pending = 0; + return buf; + } + } + start = 0; + } + + return NULL; +} +#endif void radeon_freelist_reset( drm_device_t *dev ) { @@ -1228,11 +1555,23 @@ int radeon_wait_ring( drm_radeon_private_t *dev_priv, int n ) { drm_radeon_ring_buffer_t *ring = &dev_priv->ring; int i; + u32 last_head = GET_RING_HEAD(ring); for ( i = 0 ; i < dev_priv->usec_timeout ; i++ ) { - radeon_update_ring_snapshot( ring ); + u32 head = GET_RING_HEAD(ring); + + ring->space = (head - ring->tail) * sizeof(u32); + if ( ring->space <= 0 ) + ring->space += ring->size; if ( ring->space > n ) return 0; + + dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; + + if (head != last_head) + i = 0; + last_head = head; + DRM_UDELAY( 1 ); } @@ -1251,7 +1590,7 @@ static int radeon_cp_get_buffers( drm_device_t *dev, drm_dma_t *d ) for ( i = d->granted_count ; i < d->request_count ; i++ ) { buf = radeon_freelist_get( dev ); - if ( !buf ) return DRM_ERR(EAGAIN); + if ( !buf ) return DRM_ERR(EBUSY); /* NOTE: broken client */ buf->pid = DRM_CURRENTPID; diff --git a/shared/radeon_drm.h b/shared/radeon_drm.h index 3802e46c..6469bfb8 100644 --- a/shared/radeon_drm.h +++ b/shared/radeon_drm.h @@ -89,7 +89,47 @@ #define RADEON_EMIT_SE_ZBIAS_FACTOR 18 /* zbias/2 */ #define RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT 19 /* tcl/11 */ #define RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED 20 /* material/17 */ -#define RADEON_MAX_STATE_PACKETS 21 +#define R200_EMIT_PP_TXCBLEND_0 21 /* tex0/4 */ +#define R200_EMIT_PP_TXCBLEND_1 22 /* tex1/4 */ +#define R200_EMIT_PP_TXCBLEND_2 23 /* tex2/4 */ +#define R200_EMIT_PP_TXCBLEND_3 24 /* tex3/4 */ +#define R200_EMIT_PP_TXCBLEND_4 25 /* tex4/4 */ +#define R200_EMIT_PP_TXCBLEND_5 26 /* tex5/4 */ +#define R200_EMIT_PP_TXCBLEND_6 27 /* /4 */ +#define R200_EMIT_PP_TXCBLEND_7 28 /* /4 */ +#define R200_EMIT_TCL_LIGHT_MODEL_CTL_0 29 /* tcl/7 */ +#define R200_EMIT_TFACTOR_0 30 /* tf/7 */ +#define R200_EMIT_VTX_FMT_0 31 /* vtx/5 */ +#define R200_EMIT_VAP_CTL 32 /* vap/1 */ +#define R200_EMIT_MATRIX_SELECT_0 33 /* msl/5 */ +#define R200_EMIT_TEX_PROC_CTL_2 34 /* tcg/5 */ +#define R200_EMIT_TCL_UCP_VERT_BLEND_CTL 35 /* tcl/1 */ +#define R200_EMIT_PP_TXFILTER_0 36 /* tex0/6 */ +#define R200_EMIT_PP_TXFILTER_1 37 /* tex1/6 */ +#define R200_EMIT_PP_TXFILTER_2 38 /* tex2/6 */ +#define R200_EMIT_PP_TXFILTER_3 39 /* tex3/6 */ +#define R200_EMIT_PP_TXFILTER_4 40 /* tex4/6 */ +#define R200_EMIT_PP_TXFILTER_5 41 /* tex5/6 */ +#define R200_EMIT_PP_TXOFFSET_0 42 /* tex0/1 */ +#define R200_EMIT_PP_TXOFFSET_1 43 /* tex1/1 */ +#define R200_EMIT_PP_TXOFFSET_2 44 /* tex2/1 */ +#define R200_EMIT_PP_TXOFFSET_3 45 /* tex3/1 */ +#define R200_EMIT_PP_TXOFFSET_4 46 /* tex4/1 */ +#define R200_EMIT_PP_TXOFFSET_5 47 /* tex5/1 */ +#define R200_EMIT_VTE_CNTL 48 /* vte/1 */ +#define R200_EMIT_OUTPUT_VTX_COMP_SEL 49 /* vtx/1 */ +#define R200_EMIT_PP_TAM_DEBUG3 50 /* tam/1 */ +#define R200_EMIT_PP_CNTL_X 51 /* cst/1 */ +#define R200_EMIT_RB3D_DEPTHXY_OFFSET 52 /* cst/1 */ +#define R200_EMIT_RE_AUX_SCISSOR_CNTL 53 /* cst/1 */ +#define R200_EMIT_RE_SCISSOR_TL_0 54 /* cst/2 */ +#define R200_EMIT_RE_SCISSOR_TL_1 55 /* cst/2 */ +#define R200_EMIT_RE_SCISSOR_TL_2 56 /* cst/2 */ +#define R200_EMIT_SE_VAP_CNTL_STATUS 57 /* cst/1 */ +#define R200_EMIT_SE_VTX_STATE_CNTL 58 /* cst/1 */ +#define R200_EMIT_RE_POINTSIZE 59 /* cst/1 */ +#define R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0 60 /* cst/4 */ +#define RADEON_MAX_STATE_PACKETS 61 /* Commands understood by cmd_buffer ioctl. More can be added but @@ -101,24 +141,25 @@ #define RADEON_CMD_DMA_DISCARD 4 /* discard current dma buf */ #define RADEON_CMD_PACKET3 5 /* emit hw packet */ #define RADEON_CMD_PACKET3_CLIP 6 /* emit hw packet wrapped in cliprects */ +#define RADEON_CMD_SCALARS2 7 /* r200 stopgap */ typedef union { int i; struct { - char cmd_type, pad0, pad1, pad2; + unsigned char cmd_type, pad0, pad1, pad2; } header; struct { - char cmd_type, packet_id, pad0, pad1; + unsigned char cmd_type, packet_id, pad0, pad1; } packet; struct { - char cmd_type, offset, stride, count; + unsigned char cmd_type, offset, stride, count; } scalars; struct { - char cmd_type, offset, stride, count; + unsigned char cmd_type, offset, stride, count; } vectors; struct { - char cmd_type, buf_idx, pad0, pad1; + unsigned char cmd_type, buf_idx, pad0, pad1; } dma; } drm_radeon_cmd_header_t; @@ -327,7 +368,8 @@ typedef struct { typedef struct drm_radeon_init { enum { RADEON_INIT_CP = 0x01, - RADEON_CLEANUP_CP = 0x02 + RADEON_CLEANUP_CP = 0x02, + RADEON_INIT_R200_CP = 0x03, } func; unsigned long sarea_priv_offset; int is_pci; diff --git a/shared/radeon_drv.h b/shared/radeon_drv.h index 15c0d4dd..7c341b39 100644 --- a/shared/radeon_drv.h +++ b/shared/radeon_drv.h @@ -79,12 +79,25 @@ typedef struct drm_radeon_private { int writeback_works; int usec_timeout; + + int is_r200; + int is_pci; unsigned long phys_pci_gart; dma_addr_t bus_pci_gart; - atomic_t idle_count; - + struct { + u32 boxes; + int freelist_timeouts; + int freelist_loops; + int requested_bufs; + int last_frame_reads; + int last_clear_reads; + int clears; + int texture_uploads; + } stats; + + int do_boxes; int page_flipping; int current_page; u32 crtc_offset; @@ -134,14 +147,6 @@ extern drm_buf_t *radeon_freelist_get( drm_device_t *dev ); extern int radeon_wait_ring( drm_radeon_private_t *dev_priv, int n ); -static __inline__ void -radeon_update_ring_snapshot( drm_radeon_ring_buffer_t *ring ) -{ - ring->space = (GET_RING_HEAD(ring) - ring->tail) * sizeof(u32); - if ( ring->space <= 0 ) - ring->space += ring->size; -} - extern int radeon_do_cp_idle( drm_radeon_private_t *dev_priv ); extern int radeon_do_cleanup_cp( drm_device_t *dev ); extern int radeon_do_cleanup_pageflip( drm_device_t *dev ); @@ -159,6 +164,14 @@ extern int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ); extern int radeon_cp_getparam( DRM_IOCTL_ARGS ); extern int radeon_cp_flip( DRM_IOCTL_ARGS ); +/* Flags for stats.boxes + */ +#define RADEON_BOX_DMA_IDLE 0x1 +#define RADEON_BOX_RING_FULL 0x2 +#define RADEON_BOX_FLIP 0x4 +#define RADEON_BOX_WAIT_IDLE 0x8 +#define RADEON_BOX_TEXTURE_LOAD 0x10 + /* Register definitions, register access macros and drmAddMap constants @@ -282,6 +295,7 @@ extern int radeon_cp_flip( DRM_IOCTL_ARGS ); # define RADEON_STENCIL_ENABLE (1 << 7) # define RADEON_Z_ENABLE (1 << 8) #define RADEON_RB3D_DEPTHOFFSET 0x1c24 +#define RADEON_RB3D_DEPTHPITCH 0x1c28 #define RADEON_RB3D_PLANEMASK 0x1d84 #define RADEON_RB3D_STENCILREFMASK 0x1d7c #define RADEON_RB3D_ZCACHE_MODE 0x3250 @@ -513,6 +527,62 @@ extern int radeon_cp_flip( DRM_IOCTL_ARGS ); #define RADEON_TXFORMAT_ARGB8888 6 #define RADEON_TXFORMAT_RGBA8888 7 +#define R200_PP_TXCBLEND_0 0x2f00 +#define R200_PP_TXCBLEND_1 0x2f10 +#define R200_PP_TXCBLEND_2 0x2f20 +#define R200_PP_TXCBLEND_3 0x2f30 +#define R200_PP_TXCBLEND_4 0x2f40 +#define R200_PP_TXCBLEND_5 0x2f50 +#define R200_PP_TXCBLEND_6 0x2f60 +#define R200_PP_TXCBLEND_7 0x2f70 +#define R200_SE_TCL_LIGHT_MODEL_CTL_0 0x2268 +#define R200_PP_TFACTOR_0 0x2ee0 +#define R200_SE_VTX_FMT_0 0x2088 +#define R200_SE_VAP_CNTL 0x2080 +#define R200_SE_TCL_MATRIX_SEL_0 0x2230 +#define R200_SE_TCL_TEX_PROC_CTL_2 0x22a8 +#define R200_SE_TCL_UCP_VERT_BLEND_CTL 0x22c0 +#define R200_PP_TXFILTER_5 0x2ca0 +#define R200_PP_TXFILTER_4 0x2c80 +#define R200_PP_TXFILTER_3 0x2c60 +#define R200_PP_TXFILTER_2 0x2c40 +#define R200_PP_TXFILTER_1 0x2c20 +#define R200_PP_TXFILTER_0 0x2c00 +#define R200_PP_TXOFFSET_5 0x2d78 +#define R200_PP_TXOFFSET_4 0x2d60 +#define R200_PP_TXOFFSET_3 0x2d48 +#define R200_PP_TXOFFSET_2 0x2d30 +#define R200_PP_TXOFFSET_1 0x2d18 +#define R200_PP_TXOFFSET_0 0x2d00 +#define R200_RE_AUX_SCISSOR_CNTL 0x26f0 +#define R200_SE_VTE_CNTL 0x20b0 +#define R200_SE_TCL_OUTPUT_VTX_COMP_SEL 0x2250 +#define R200_PP_TAM_DEBUG3 0x2d9c +#define R200_PP_CNTL_X 0x2cc4 +#define R200_SE_VAP_CNTL_STATUS 0x2140 +#define R200_RE_SCISSOR_TL_0 0x1cd8 +#define R200_RE_SCISSOR_TL_1 0x1ce0 +#define R200_RE_SCISSOR_TL_2 0x1ce8 +#define R200_RB3D_DEPTHXY_OFFSET 0x1d60 +#define R200_RE_AUX_SCISSOR_CNTL 0x26f0 +#define R200_SE_VTX_STATE_CNTL 0x2180 +#define R200_RE_POINTSIZE 0x2648 +#define R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0 0x2254 + + +#define SE_VAP_CNTL__TCL_ENA_MASK 0x00000001 +#define SE_VAP_CNTL__FORCE_W_TO_ONE_MASK 0x00010000 +#define SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT 0x00000012 +#define SE_VTE_CNTL__VTX_XY_FMT_MASK 0x00000100 +#define SE_VTE_CNTL__VTX_Z_FMT_MASK 0x00000200 +#define SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK 0x00000001 +#define SE_VTX_FMT_0__VTX_W0_PRESENT_MASK 0x00000002 +#define SE_VTX_FMT_0__VTX_COLOR_0_FMT__SHIFT 0x0000000b +#define R200_3D_DRAW_IMMD_2 0xC0003500 +#define R200_SE_VTX_FMT_1 0x208c +#define R200_RE_CNTL 0x1c50 + + /* Constants */ #define RADEON_MAX_USEC_TIMEOUT 100000 /* 100 ms */ @@ -620,30 +690,16 @@ do { \ } \ } while (0) + +/* Perfbox functionality only. + */ #define RING_SPACE_TEST_WITH_RETURN( dev_priv ) \ do { \ - drm_radeon_ring_buffer_t *ring = &dev_priv->ring; int i; \ - if ( ring->space < ring->high_mark ) { \ - for ( i = 0 ; i < dev_priv->usec_timeout ; i++ ) { \ - radeon_update_ring_snapshot( ring ); \ - if ( ring->space >= ring->high_mark ) \ - goto __ring_space_done; \ - DRM_UDELAY( 1 ); \ - } \ - DRM_ERROR( "ring space check from memory failed, reading register...\n" ); \ - /* If ring space check fails from RAM, try reading the \ - register directly */ \ - ring->space = 4 * ( RADEON_READ( RADEON_CP_RB_RPTR ) - ring->tail ); \ - if ( ring->space <= 0 ) \ - ring->space += ring->size; \ - if ( ring->space >= ring->high_mark ) \ - goto __ring_space_done; \ - \ - DRM_ERROR( "ring space check failed!\n" ); \ - return DRM_ERR(EBUSY); \ + if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE)) { \ + u32 head = GET_RING_HEAD(&dev_priv->ring); \ + if (head == dev_priv->ring.tail) \ + dev_priv->stats.boxes |= RADEON_BOX_DMA_IDLE; \ } \ - __ring_space_done: \ - ; \ } while (0) #define VB_AGE_TEST_WITH_RETURN( dev_priv ) \ @@ -710,16 +766,15 @@ do { \ } \ if (((dev_priv->ring.tail + _nr) & mask) != write) { \ DRM_ERROR( \ - "ADVANCE_RING(): mismatch: nr: %x write: %x\n", \ + "ADVANCE_RING(): mismatch: nr: %x write: %x line: %d\n", \ ((dev_priv->ring.tail + _nr) & mask), \ - write); \ + write, __LINE__); \ } else \ dev_priv->ring.tail = write; \ } while (0) #define COMMIT_RING() do { \ - radeon_flush_write_combine(); \ - RADEON_WRITE( RADEON_CP_RB_WPTR, dev_priv->ring.tail ); \ + RADEON_WRITE( RADEON_CP_RB_WPTR, dev_priv->ring.tail ); \ } while (0) #define OUT_RING( x ) do { \ @@ -760,6 +815,4 @@ do { \ } while (0) -#define RADEON_PERFORMANCE_BOXES 0 - #endif /* __RADEON_DRV_H__ */ diff --git a/shared/radeon_state.c b/shared/radeon_state.c index 1cc6bde8..7f84e739 100644 --- a/shared/radeon_state.c +++ b/shared/radeon_state.c @@ -239,18 +239,50 @@ static struct { { RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" }, { RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" }, { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" }, + { R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0" }, + { R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1" }, + { R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2" }, + { R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3" }, + { R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4" }, + { R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5" }, + { R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6" }, + { R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7" }, + { R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0" }, + { R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0" }, + { R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0" }, + { R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL" }, + { R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0" }, + { R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2" }, + { R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL" }, + { R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0" }, + { R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1" }, + { R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2" }, + { R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3" }, + { R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4" }, + { R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5" }, + { R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0" }, + { R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1" }, + { R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2" }, + { R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3" }, + { R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4" }, + { R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5" }, + { R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL" }, + { R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL" }, + { R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3" }, + { R200_PP_CNTL_X, 1, "R200_PP_CNTL_X" }, + { R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET" }, + { R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL" }, + { R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0" }, + { R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1" }, + { R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2" }, + { R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS" }, + { R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL" }, + { R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE" }, + { R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0" }, }; - - - - - - - -#if RADEON_PERFORMANCE_BOXES /* ================================================================ * Performance monitoring functions */ @@ -259,10 +291,12 @@ static void radeon_clear_box( drm_radeon_private_t *dev_priv, int x, int y, int w, int h, int r, int g, int b ) { - u32 pitch, offset; u32 color; RING_LOCALS; + x += dev_priv->sarea_priv->boxes[0].x1; + y += dev_priv->sarea_priv->boxes[0].y1; + switch ( dev_priv->color_fmt ) { case RADEON_COLOR_FORMAT_RGB565: color = (((r & 0xf8) << 8) | @@ -275,8 +309,11 @@ static void radeon_clear_box( drm_radeon_private_t *dev_priv, break; } - offset = dev_priv->back_offset; - pitch = dev_priv->back_pitch >> 3; + BEGIN_RING( 4 ); + RADEON_WAIT_UNTIL_3D_IDLE(); + OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) ); + OUT_RING( 0xffffffff ); + ADVANCE_RING(); BEGIN_RING( 6 ); @@ -288,7 +325,12 @@ static void radeon_clear_box( drm_radeon_private_t *dev_priv, RADEON_ROP3_P | RADEON_GMC_CLR_CMP_CNTL_DIS ); - OUT_RING( (pitch << 22) | (offset >> 5) ); + if ( dev_priv->page_flipping && dev_priv->current_page == 1 ) { + OUT_RING( dev_priv->front_pitch_offset ); + } else { + OUT_RING( dev_priv->back_pitch_offset ); + } + OUT_RING( color ); OUT_RING( (x << 16) | y ); @@ -299,16 +341,57 @@ static void radeon_clear_box( drm_radeon_private_t *dev_priv, static void radeon_cp_performance_boxes( drm_radeon_private_t *dev_priv ) { - if ( atomic_read( &dev_priv->idle_count ) == 0 ) { - radeon_clear_box( dev_priv, 64, 4, 8, 8, 0, 255, 0 ); - } else { - atomic_set( &dev_priv->idle_count, 0 ); + /* Collapse various things into a wait flag -- trying to + * guess if userspase slept -- better just to have them tell us. + */ + if (dev_priv->stats.last_frame_reads > 1 || + dev_priv->stats.last_clear_reads > dev_priv->stats.clears) { + dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; } -} -#endif + if (dev_priv->stats.freelist_loops) { + dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE; + } + /* Purple box for page flipping + */ + if ( dev_priv->stats.boxes & RADEON_BOX_FLIP ) + radeon_clear_box( dev_priv, 4, 4, 8, 8, 255, 0, 255 ); + + /* Red box if we have to wait for idle at any point + */ + if ( dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE ) + radeon_clear_box( dev_priv, 16, 4, 8, 8, 255, 0, 0 ); + + /* Blue box: lost context? + */ + /* Yellow box for texture swaps + */ + if ( dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD ) + radeon_clear_box( dev_priv, 40, 4, 8, 8, 255, 255, 0 ); + + /* Green box if hardware never idles (as far as we can tell) + */ + if ( !(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE) ) + radeon_clear_box( dev_priv, 64, 4, 8, 8, 0, 255, 0 ); + + + /* Draw bars indicating number of buffers allocated + * (not a great measure, easily confused) + */ + if (dev_priv->stats.requested_bufs) { + if (dev_priv->stats.requested_bufs > 100) + dev_priv->stats.requested_bufs = 100; + + radeon_clear_box( dev_priv, 4, 16, + dev_priv->stats.requested_bufs, 4, + 196, 128, 128 ); + } + + memset( &dev_priv->stats, 0, sizeof(dev_priv->stats) ); + +} /* ================================================================ * CP command dispatch functions */ @@ -328,6 +411,8 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev, RING_LOCALS; DRM_DEBUG( "flags = 0x%x\n", flags ); + dev_priv->stats.clears++; + if ( dev_priv->page_flipping && dev_priv->current_page == 1 ) { unsigned int tmp = flags; @@ -336,120 +421,251 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev, if ( tmp & RADEON_BACK ) flags |= RADEON_FRONT; } + if ( flags & (RADEON_FRONT | RADEON_BACK) ) { + + BEGIN_RING( 4 ); + + /* Ensure the 3D stream is idle before doing a + * 2D fill to clear the front or back buffer. + */ + RADEON_WAIT_UNTIL_3D_IDLE(); + + OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) ); + OUT_RING( clear->color_mask ); + + ADVANCE_RING(); + + /* Make sure we restore the 3D state next time. + */ + dev_priv->sarea_priv->ctx_owner = 0; + + for ( i = 0 ; i < nbox ; i++ ) { + int x = pbox[i].x1; + int y = pbox[i].y1; + int w = pbox[i].x2 - x; + int h = pbox[i].y2 - y; + + DRM_DEBUG( "dispatch clear %d,%d-%d,%d flags 0x%x\n", + x, y, w, h, flags ); + + if ( flags & RADEON_FRONT ) { + BEGIN_RING( 6 ); + + OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) ); + OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL | + RADEON_GMC_BRUSH_SOLID_COLOR | + (dev_priv->color_fmt << 8) | + RADEON_GMC_SRC_DATATYPE_COLOR | + RADEON_ROP3_P | + RADEON_GMC_CLR_CMP_CNTL_DIS ); + + OUT_RING( dev_priv->front_pitch_offset ); + OUT_RING( clear->clear_color ); + + OUT_RING( (x << 16) | y ); + OUT_RING( (w << 16) | h ); + + ADVANCE_RING(); + } + + if ( flags & RADEON_BACK ) { + BEGIN_RING( 6 ); + + OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) ); + OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL | + RADEON_GMC_BRUSH_SOLID_COLOR | + (dev_priv->color_fmt << 8) | + RADEON_GMC_SRC_DATATYPE_COLOR | + RADEON_ROP3_P | + RADEON_GMC_CLR_CMP_CNTL_DIS ); + + OUT_RING( dev_priv->back_pitch_offset ); + OUT_RING( clear->clear_color ); + + OUT_RING( (x << 16) | y ); + OUT_RING( (w << 16) | h ); + + ADVANCE_RING(); + } + } + } + /* We have to clear the depth and/or stencil buffers by * rendering a quad into just those buffers. Thus, we have to * make sure the 3D engine is configured correctly. */ - if ( flags & (RADEON_DEPTH | RADEON_STENCIL) ) { - rb3d_cntl = depth_clear->rb3d_cntl; + if ( dev_priv->is_r200 && + (flags & (RADEON_DEPTH | RADEON_STENCIL)) ) { - if ( flags & RADEON_DEPTH ) { - rb3d_cntl |= RADEON_Z_ENABLE; - } else { - rb3d_cntl &= ~RADEON_Z_ENABLE; - } + int tempPP_CNTL; + int tempRE_CNTL; + int tempRB3D_CNTL; + int tempRB3D_ZSTENCILCNTL; + int tempRB3D_STENCILREFMASK; + int tempRB3D_PLANEMASK; + int tempSE_CNTL; + int tempSE_VTE_CNTL; + int tempSE_VTX_FMT_0; + int tempSE_VTX_FMT_1; + int tempSE_VAP_CNTL; + int tempRE_AUX_SCISSOR_CNTL; - if ( flags & RADEON_STENCIL ) { - rb3d_cntl |= RADEON_STENCIL_ENABLE; - rb3d_stencilrefmask = clear->depth_mask; /* misnamed field */ - } else { - rb3d_cntl &= ~RADEON_STENCIL_ENABLE; - rb3d_stencilrefmask = 0x00000000; - } - } + tempPP_CNTL = 0; + tempRE_CNTL = 0; - for ( i = 0 ; i < nbox ; i++ ) { - int x = pbox[i].x1; - int y = pbox[i].y1; - int w = pbox[i].x2 - x; - int h = pbox[i].y2 - y; + tempRB3D_CNTL = depth_clear->rb3d_cntl; + tempRB3D_CNTL &= ~(1<<15); /* unset radeon magic flag */ - DRM_DEBUG( "dispatch clear %d,%d-%d,%d flags 0x%x\n", - x, y, w, h, flags ); + tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl; + tempRB3D_STENCILREFMASK = 0x0; - if ( flags & (RADEON_FRONT | RADEON_BACK) ) { - BEGIN_RING( 4 ); + tempSE_CNTL = depth_clear->se_cntl; - /* Ensure the 3D stream is idle before doing a - * 2D fill to clear the front or back buffer. - */ - RADEON_WAIT_UNTIL_3D_IDLE(); - OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) ); - OUT_RING( clear->color_mask ); - ADVANCE_RING(); + /* Disable TCL */ - /* Make sure we restore the 3D state next time. - */ - dev_priv->sarea_priv->ctx_owner = 0; - } + tempSE_VAP_CNTL = (/* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK | */ + (0x9 << SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT)); - if ( flags & RADEON_FRONT ) { - BEGIN_RING( 6 ); + tempRB3D_PLANEMASK = 0x0; - OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) ); - OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL | - RADEON_GMC_BRUSH_SOLID_COLOR | - (dev_priv->color_fmt << 8) | - RADEON_GMC_SRC_DATATYPE_COLOR | - RADEON_ROP3_P | - RADEON_GMC_CLR_CMP_CNTL_DIS ); + tempRE_AUX_SCISSOR_CNTL = 0x0; - OUT_RING( dev_priv->front_pitch_offset ); - OUT_RING( clear->clear_color ); + tempSE_VTE_CNTL = + SE_VTE_CNTL__VTX_XY_FMT_MASK | + SE_VTE_CNTL__VTX_Z_FMT_MASK; - OUT_RING( (x << 16) | y ); - OUT_RING( (w << 16) | h ); + /* Vertex format (X, Y, Z, W)*/ + tempSE_VTX_FMT_0 = + SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK | + SE_VTX_FMT_0__VTX_W0_PRESENT_MASK; + tempSE_VTX_FMT_1 = 0x0; - ADVANCE_RING(); + + /* + * Depth buffer specific enables + */ + if (flags & RADEON_DEPTH) { + /* Enable depth buffer */ + tempRB3D_CNTL |= RADEON_Z_ENABLE; + } else { + /* Disable depth buffer */ + tempRB3D_CNTL &= ~RADEON_Z_ENABLE; } - if ( flags & RADEON_BACK ) { - BEGIN_RING( 6 ); + /* + * Stencil buffer specific enables + */ + if ( flags & RADEON_STENCIL ) { + tempRB3D_CNTL |= RADEON_STENCIL_ENABLE; + tempRB3D_STENCILREFMASK = clear->depth_mask; + } else { + tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE; + tempRB3D_STENCILREFMASK = 0x00000000; + } - OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) ); - OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL | - RADEON_GMC_BRUSH_SOLID_COLOR | - (dev_priv->color_fmt << 8) | - RADEON_GMC_SRC_DATATYPE_COLOR | - RADEON_ROP3_P | - RADEON_GMC_CLR_CMP_CNTL_DIS ); + BEGIN_RING( 26 ); + RADEON_WAIT_UNTIL_2D_IDLE(); + + OUT_RING_REG( RADEON_PP_CNTL, tempPP_CNTL ); + OUT_RING_REG( R200_RE_CNTL, tempRE_CNTL ); + OUT_RING_REG( RADEON_RB3D_CNTL, tempRB3D_CNTL ); + OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL, + tempRB3D_ZSTENCILCNTL ); + OUT_RING_REG( RADEON_RB3D_STENCILREFMASK, + tempRB3D_STENCILREFMASK ); + OUT_RING_REG( RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK ); + OUT_RING_REG( RADEON_SE_CNTL, tempSE_CNTL ); + OUT_RING_REG( R200_SE_VTE_CNTL, tempSE_VTE_CNTL ); + OUT_RING_REG( R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0 ); + OUT_RING_REG( R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1 ); + OUT_RING_REG( R200_SE_VAP_CNTL, tempSE_VAP_CNTL ); + OUT_RING_REG( R200_RE_AUX_SCISSOR_CNTL, + tempRE_AUX_SCISSOR_CNTL ); + ADVANCE_RING(); - OUT_RING( dev_priv->back_pitch_offset ); - OUT_RING( clear->clear_color ); + /* Make sure we restore the 3D state next time. + */ + dev_priv->sarea_priv->ctx_owner = 0; - OUT_RING( (x << 16) | y ); - OUT_RING( (w << 16) | h ); + for ( i = 0 ; i < nbox ; i++ ) { + + /* Funny that this should be required -- + * sets top-left? + */ + radeon_emit_clip_rect( dev_priv, + &sarea_priv->boxes[i] ); + BEGIN_RING( 14 ); + OUT_RING( CP_PACKET3( R200_3D_DRAW_IMMD_2, 12 ) ); + OUT_RING( (RADEON_PRIM_TYPE_RECT_LIST | + RADEON_PRIM_WALK_RING | + (3 << RADEON_NUM_VERTICES_SHIFT)) ); + OUT_RING( depth_boxes[i].ui[CLEAR_X1] ); + OUT_RING( depth_boxes[i].ui[CLEAR_Y1] ); + OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] ); + OUT_RING( 0x3f800000 ); + OUT_RING( depth_boxes[i].ui[CLEAR_X1] ); + OUT_RING( depth_boxes[i].ui[CLEAR_Y2] ); + OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] ); + OUT_RING( 0x3f800000 ); + OUT_RING( depth_boxes[i].ui[CLEAR_X2] ); + OUT_RING( depth_boxes[i].ui[CLEAR_Y2] ); + OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] ); + OUT_RING( 0x3f800000 ); ADVANCE_RING(); } + } + else if ( (flags & (RADEON_DEPTH | RADEON_STENCIL)) ) { - if ( flags & (RADEON_DEPTH | RADEON_STENCIL) ) { + rb3d_cntl = depth_clear->rb3d_cntl; - radeon_emit_clip_rect( dev_priv, - &sarea_priv->boxes[i] ); + if ( flags & RADEON_DEPTH ) { + rb3d_cntl |= RADEON_Z_ENABLE; + } else { + rb3d_cntl &= ~RADEON_Z_ENABLE; + } - BEGIN_RING( 28 ); + if ( flags & RADEON_STENCIL ) { + rb3d_cntl |= RADEON_STENCIL_ENABLE; + rb3d_stencilrefmask = clear->depth_mask; /* misnamed field */ + } else { + rb3d_cntl &= ~RADEON_STENCIL_ENABLE; + rb3d_stencilrefmask = 0x00000000; + } - RADEON_WAIT_UNTIL_2D_IDLE(); + BEGIN_RING( 13 ); + RADEON_WAIT_UNTIL_2D_IDLE(); - OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 1 ) ); - OUT_RING( 0x00000000 ); - OUT_RING( rb3d_cntl ); + OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 1 ) ); + OUT_RING( 0x00000000 ); + OUT_RING( rb3d_cntl ); + + OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL, + depth_clear->rb3d_zstencilcntl ); + OUT_RING_REG( RADEON_RB3D_STENCILREFMASK, + rb3d_stencilrefmask ); + OUT_RING_REG( RADEON_RB3D_PLANEMASK, + 0x00000000 ); + OUT_RING_REG( RADEON_SE_CNTL, + depth_clear->se_cntl ); + ADVANCE_RING(); - OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL, - depth_clear->rb3d_zstencilcntl ); - OUT_RING_REG( RADEON_RB3D_STENCILREFMASK, - rb3d_stencilrefmask ); - OUT_RING_REG( RADEON_RB3D_PLANEMASK, - 0x00000000 ); - OUT_RING_REG( RADEON_SE_CNTL, - depth_clear->se_cntl ); + /* Make sure we restore the 3D state next time. + */ + dev_priv->sarea_priv->ctx_owner = 0; - /* Radeon 7500 doesn't like vertices without - * color. + for ( i = 0 ; i < nbox ; i++ ) { + + /* Funny that this should be required -- + * sets top-left? */ + radeon_emit_clip_rect( dev_priv, + &sarea_priv->boxes[i] ); + + BEGIN_RING( 15 ); + OUT_RING( CP_PACKET3( RADEON_3D_DRAW_IMMD, 13 ) ); OUT_RING( RADEON_VTX_Z_PRESENT | RADEON_VTX_PKCOLOR_PRESENT); @@ -459,6 +675,7 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev, RADEON_VTX_FMT_RADEON_MODE | (3 << RADEON_NUM_VERTICES_SHIFT)) ); + OUT_RING( depth_boxes[i].ui[CLEAR_X1] ); OUT_RING( depth_boxes[i].ui[CLEAR_Y1] ); OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] ); @@ -475,10 +692,6 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev, OUT_RING( 0x0 ); ADVANCE_RING(); - - /* Make sure we restore the 3D state next time. - */ - dev_priv->sarea_priv->ctx_owner = 0; } } @@ -506,11 +719,12 @@ static void radeon_cp_dispatch_swap( drm_device_t *dev ) RING_LOCALS; DRM_DEBUG( "\n" ); -#if RADEON_PERFORMANCE_BOXES + /* Do some trivial performance monitoring... */ - radeon_cp_performance_boxes( dev_priv ); -#endif + if (dev_priv->do_boxes) + radeon_cp_performance_boxes( dev_priv ); + /* Wait for the 3D stream to idle before dispatching the bitblt. * This will prevent data corruption between the two streams. @@ -579,20 +793,21 @@ static void radeon_cp_dispatch_flip( drm_device_t *dev ) { drm_radeon_private_t *dev_priv = dev->dev_private; RING_LOCALS; - DRM_DEBUG( "page=%d\n", dev_priv->current_page ); + DRM_DEBUG( "%s: page=%d pfCurrentPage=%d\n", + __FUNCTION__, + dev_priv->current_page, + dev_priv->sarea_priv->pfCurrentPage); -#if RADEON_PERFORMANCE_BOXES /* Do some trivial performance monitoring... */ - radeon_cp_performance_boxes( dev_priv ); -#endif + if (dev_priv->do_boxes) { + dev_priv->stats.boxes |= RADEON_BOX_FLIP; + radeon_cp_performance_boxes( dev_priv ); + } BEGIN_RING( 4 ); RADEON_WAIT_UNTIL_3D_IDLE(); -/* - RADEON_WAIT_UNTIL_PAGE_FLIPPED(); -*/ OUT_RING( CP_PACKET0( RADEON_CRTC_OFFSET, 0 ) ); if ( dev_priv->current_page == 0 ) { @@ -847,6 +1062,8 @@ static int radeon_cp_dispatch_texture( drm_device_t *dev, int ret = 0, i; RING_LOCALS; + dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD; + /* FIXME: Be smarter about this... */ buf = radeon_freelist_get( dev ); @@ -1611,6 +1828,30 @@ static __inline__ int radeon_emit_scalars( return 0; } +/* God this is ugly + */ +static __inline__ int radeon_emit_scalars2( + drm_radeon_private_t *dev_priv, + drm_radeon_cmd_header_t header, + drm_radeon_cmd_buffer_t *cmdbuf ) +{ + int sz = header.scalars.count; + int *data = (int *)cmdbuf->buf; + int start = ((unsigned int)header.scalars.offset) + 0x100; + int stride = header.scalars.stride; + RING_LOCALS; + + BEGIN_RING( 3+sz ); + OUT_RING( CP_PACKET0( RADEON_SE_TCL_SCALAR_INDX_REG, 0 ) ); + OUT_RING( start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); + OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_SCALAR_DATA_REG, sz-1 ) ); + OUT_RING_USER_TABLE( data, sz ); + ADVANCE_RING(); + cmdbuf->buf += sz * sizeof(int); + cmdbuf->bufsz -= sz * sizeof(int); + return 0; +} + static __inline__ int radeon_emit_vectors( drm_radeon_private_t *dev_priv, drm_radeon_cmd_header_t header, @@ -1775,6 +2016,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ) switch (header.header.cmd_type) { case RADEON_CMD_PACKET: + DRM_DEBUG("RADEON_CMD_PACKET\n"); if (radeon_emit_packets( dev_priv, header, &cmdbuf )) { DRM_ERROR("radeon_emit_packets failed\n"); return DRM_ERR(EINVAL); @@ -1782,6 +2024,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ) break; case RADEON_CMD_SCALARS: + DRM_DEBUG("RADEON_CMD_SCALARS\n"); if (radeon_emit_scalars( dev_priv, header, &cmdbuf )) { DRM_ERROR("radeon_emit_scalars failed\n"); return DRM_ERR(EINVAL); @@ -1789,6 +2032,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ) break; case RADEON_CMD_VECTORS: + DRM_DEBUG("RADEON_CMD_VECTORS\n"); if (radeon_emit_vectors( dev_priv, header, &cmdbuf )) { DRM_ERROR("radeon_emit_vectors failed\n"); return DRM_ERR(EINVAL); @@ -1796,6 +2040,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ) break; case RADEON_CMD_DMA_DISCARD: + DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n"); idx = header.dma.buf_idx; if ( idx < 0 || idx >= dma->buf_count ) { DRM_ERROR( "buffer index %d (of %d max)\n", @@ -1813,6 +2058,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ) break; case RADEON_CMD_PACKET3: + DRM_DEBUG("RADEON_CMD_PACKET3\n"); if (radeon_emit_packet3( dev, &cmdbuf )) { DRM_ERROR("radeon_emit_packet3 failed\n"); return DRM_ERR(EINVAL); @@ -1820,12 +2066,20 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ) break; case RADEON_CMD_PACKET3_CLIP: + DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n"); if (radeon_emit_packet3_cliprect( dev, &cmdbuf, orig_nbox )) { DRM_ERROR("radeon_emit_packet3_clip failed\n"); return DRM_ERR(EINVAL); } break; + case RADEON_CMD_SCALARS2: + DRM_DEBUG("RADEON_CMD_SCALARS2\n"); + if (radeon_emit_scalars2( dev_priv, header, &cmdbuf )) { + DRM_ERROR("radeon_emit_scalars2 failed\n"); + return DRM_ERR(EINVAL); + } + break; default: DRM_ERROR("bad cmd_type %d at %p\n", header.header.cmd_type, @@ -1835,6 +2089,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS ) } + DRM_DEBUG("DONE\n"); COMMIT_RING(); return 0; } @@ -1863,12 +2118,14 @@ int radeon_cp_getparam( DRM_IOCTL_ARGS ) value = dev_priv->agp_buffers_offset; break; case RADEON_PARAM_LAST_FRAME: + dev_priv->stats.last_frame_reads++; value = GET_SCRATCH( 0 ); break; case RADEON_PARAM_LAST_DISPATCH: value = GET_SCRATCH( 1 ); break; case RADEON_PARAM_LAST_CLEAR: + dev_priv->stats.last_clear_reads++; value = GET_SCRATCH( 2 ); break; default: -- cgit v1.2.3