summaryrefslogtreecommitdiff
path: root/shared
diff options
context:
space:
mode:
Diffstat (limited to 'shared')
-rw-r--r--shared/radeon.h20
-rw-r--r--shared/radeon_cp.c363
-rw-r--r--shared/radeon_drm.h56
-rw-r--r--shared/radeon_drv.h127
-rw-r--r--shared/radeon_state.c481
5 files changed, 867 insertions, 180 deletions
diff --git a/shared/radeon.h b/shared/radeon.h
index ad36c868..885e4297 100644
--- a/shared/radeon.h
+++ b/shared/radeon.h
@@ -25,6 +25,7 @@
*
* Authors:
* Gareth Hughes <gareth@valinux.com>
+ * Keith Whitwell <keith@tungstengraphics.com>
*/
#ifndef __RADEON_H__
@@ -43,14 +44,14 @@
#define __HAVE_SG 1
#define __HAVE_PCI_DMA 1
-#define DRIVER_AUTHOR "Gareth Hughes, VA Linux Systems Inc."
+#define DRIVER_AUTHOR "Gareth Hughes, Keith Whitwell, others."
#define DRIVER_NAME "radeon"
#define DRIVER_DESC "ATI Radeon"
-#define DRIVER_DATE "20020714"
+#define DRIVER_DATE "20020611"
#define DRIVER_MAJOR 1
-#define DRIVER_MINOR 4
+#define DRIVER_MINOR 5
#define DRIVER_PATCHLEVEL 0
/* Interface history:
@@ -63,6 +64,10 @@
* - Add support for new radeon packets (keith)
* - Add getparam ioctl (keith)
* - Add flip-buffers ioctl, deprecate fullscreen foo (keith).
+ * 1.4 - Add scratch registers to get_param ioctl.
+ * 1.5 - Add r200 packets to cmdbuf ioctl
+ * - Add r200 function to init ioctl
+ * - Add 'scalar2' instruction to cmdbuf
*/
#define DRIVER_IOCTLS \
[DRM_IOCTL_NR(DRM_IOCTL_DMA)] = { radeon_cp_buffers, 1, 0 }, \
@@ -104,15 +109,6 @@
*/
#define __HAVE_DMA 1
-#if 0
-/* GH: Remove this for now... */
-#define __HAVE_DMA_QUIESCENT 1
-#define DRIVER_DMA_QUIESCENT() do { \
- drm_radeon_private_t *dev_priv = dev->dev_private; \
- return radeon_do_cp_idle( dev_priv ); \
-} while (0)
-#endif
-
/* Buffer customization:
*/
#define DRIVER_BUF_PRIV_T drm_radeon_buf_priv_t
diff --git a/shared/radeon_cp.c b/shared/radeon_cp.c
index 8250c09b..01069e49 100644
--- a/shared/radeon_cp.c
+++ b/shared/radeon_cp.c
@@ -44,6 +44,266 @@
/* CP microcode (from ATI) */
+static u32 R200_cp_microcode[][2] = {
+ { 0x21007000, 0000000000 },
+ { 0x20007000, 0000000000 },
+ { 0x000000ab, 0x00000004 },
+ { 0x000000af, 0x00000004 },
+ { 0x66544a49, 0000000000 },
+ { 0x49494174, 0000000000 },
+ { 0x54517d83, 0000000000 },
+ { 0x498d8b64, 0000000000 },
+ { 0x49494949, 0000000000 },
+ { 0x49da493c, 0000000000 },
+ { 0x49989898, 0000000000 },
+ { 0xd34949d5, 0000000000 },
+ { 0x9dc90e11, 0000000000 },
+ { 0xce9b9b9b, 0000000000 },
+ { 0x000f0000, 0x00000016 },
+ { 0x352e232c, 0000000000 },
+ { 0x00000013, 0x00000004 },
+ { 0x000f0000, 0x00000016 },
+ { 0x352e272c, 0000000000 },
+ { 0x000f0001, 0x00000016 },
+ { 0x3239362f, 0000000000 },
+ { 0x000077ef, 0x00000002 },
+ { 0x00061000, 0x00000002 },
+ { 0x00000020, 0x0000001a },
+ { 0x00004000, 0x0000001e },
+ { 0x00061000, 0x00000002 },
+ { 0x00000020, 0x0000001a },
+ { 0x00004000, 0x0000001e },
+ { 0x00061000, 0x00000002 },
+ { 0x00000020, 0x0000001a },
+ { 0x00004000, 0x0000001e },
+ { 0x00000016, 0x00000004 },
+ { 0x0003802a, 0x00000002 },
+ { 0x040067e0, 0x00000002 },
+ { 0x00000016, 0x00000004 },
+ { 0x000077e0, 0x00000002 },
+ { 0x00065000, 0x00000002 },
+ { 0x000037e1, 0x00000002 },
+ { 0x040067e1, 0x00000006 },
+ { 0x000077e0, 0x00000002 },
+ { 0x000077e1, 0x00000002 },
+ { 0x000077e1, 0x00000006 },
+ { 0xffffffff, 0000000000 },
+ { 0x10000000, 0000000000 },
+ { 0x0003802a, 0x00000002 },
+ { 0x040067e0, 0x00000006 },
+ { 0x00007675, 0x00000002 },
+ { 0x00007676, 0x00000002 },
+ { 0x00007677, 0x00000002 },
+ { 0x00007678, 0x00000006 },
+ { 0x0003802b, 0x00000002 },
+ { 0x04002676, 0x00000002 },
+ { 0x00007677, 0x00000002 },
+ { 0x00007678, 0x00000006 },
+ { 0x0000002e, 0x00000018 },
+ { 0x0000002e, 0x00000018 },
+ { 0000000000, 0x00000006 },
+ { 0x0000002f, 0x00000018 },
+ { 0x0000002f, 0x00000018 },
+ { 0000000000, 0x00000006 },
+ { 0x01605000, 0x00000002 },
+ { 0x00065000, 0x00000002 },
+ { 0x00098000, 0x00000002 },
+ { 0x00061000, 0x00000002 },
+ { 0x64c0603d, 0x00000004 },
+ { 0x00080000, 0x00000016 },
+ { 0000000000, 0000000000 },
+ { 0x0400251d, 0x00000002 },
+ { 0x00007580, 0x00000002 },
+ { 0x00067581, 0x00000002 },
+ { 0x04002580, 0x00000002 },
+ { 0x00067581, 0x00000002 },
+ { 0x00000046, 0x00000004 },
+ { 0x00005000, 0000000000 },
+ { 0x00061000, 0x00000002 },
+ { 0x0000750e, 0x00000002 },
+ { 0x00019000, 0x00000002 },
+ { 0x00011055, 0x00000014 },
+ { 0x00000055, 0x00000012 },
+ { 0x0400250f, 0x00000002 },
+ { 0x0000504a, 0x00000004 },
+ { 0x00007565, 0x00000002 },
+ { 0x00007566, 0x00000002 },
+ { 0x00000051, 0x00000004 },
+ { 0x01e655b4, 0x00000002 },
+ { 0x4401b0dc, 0x00000002 },
+ { 0x01c110dc, 0x00000002 },
+ { 0x2666705d, 0x00000018 },
+ { 0x040c2565, 0x00000002 },
+ { 0x0000005d, 0x00000018 },
+ { 0x04002564, 0x00000002 },
+ { 0x00007566, 0x00000002 },
+ { 0x00000054, 0x00000004 },
+ { 0x00401060, 0x00000008 },
+ { 0x00101000, 0x00000002 },
+ { 0x000d80ff, 0x00000002 },
+ { 0x00800063, 0x00000008 },
+ { 0x000f9000, 0x00000002 },
+ { 0x000e00ff, 0x00000002 },
+ { 0000000000, 0x00000006 },
+ { 0x00000080, 0x00000018 },
+ { 0x00000054, 0x00000004 },
+ { 0x00007576, 0x00000002 },
+ { 0x00065000, 0x00000002 },
+ { 0x00009000, 0x00000002 },
+ { 0x00041000, 0x00000002 },
+ { 0x0c00350e, 0x00000002 },
+ { 0x00049000, 0x00000002 },
+ { 0x00051000, 0x00000002 },
+ { 0x01e785f8, 0x00000002 },
+ { 0x00200000, 0x00000002 },
+ { 0x00600073, 0x0000000c },
+ { 0x00007563, 0x00000002 },
+ { 0x006075f0, 0x00000021 },
+ { 0x20007068, 0x00000004 },
+ { 0x00005068, 0x00000004 },
+ { 0x00007576, 0x00000002 },
+ { 0x00007577, 0x00000002 },
+ { 0x0000750e, 0x00000002 },
+ { 0x0000750f, 0x00000002 },
+ { 0x00a05000, 0x00000002 },
+ { 0x00600076, 0x0000000c },
+ { 0x006075f0, 0x00000021 },
+ { 0x000075f8, 0x00000002 },
+ { 0x00000076, 0x00000004 },
+ { 0x000a750e, 0x00000002 },
+ { 0x0020750f, 0x00000002 },
+ { 0x00600079, 0x00000004 },
+ { 0x00007570, 0x00000002 },
+ { 0x00007571, 0x00000002 },
+ { 0x00007572, 0x00000006 },
+ { 0x00005000, 0x00000002 },
+ { 0x00a05000, 0x00000002 },
+ { 0x00007568, 0x00000002 },
+ { 0x00061000, 0x00000002 },
+ { 0x00000084, 0x0000000c },
+ { 0x00058000, 0x00000002 },
+ { 0x0c607562, 0x00000002 },
+ { 0x00000086, 0x00000004 },
+ { 0x00600085, 0x00000004 },
+ { 0x400070dd, 0000000000 },
+ { 0x000380dd, 0x00000002 },
+ { 0x00000093, 0x0000001c },
+ { 0x00065095, 0x00000018 },
+ { 0x040025bb, 0x00000002 },
+ { 0x00061096, 0x00000018 },
+ { 0x040075bc, 0000000000 },
+ { 0x000075bb, 0x00000002 },
+ { 0x000075bc, 0000000000 },
+ { 0x00090000, 0x00000006 },
+ { 0x00090000, 0x00000002 },
+ { 0x000d8002, 0x00000006 },
+ { 0x00005000, 0x00000002 },
+ { 0x00007821, 0x00000002 },
+ { 0x00007800, 0000000000 },
+ { 0x00007821, 0x00000002 },
+ { 0x00007800, 0000000000 },
+ { 0x01665000, 0x00000002 },
+ { 0x000a0000, 0x00000002 },
+ { 0x000671cc, 0x00000002 },
+ { 0x0286f1cd, 0x00000002 },
+ { 0x000000a3, 0x00000010 },
+ { 0x21007000, 0000000000 },
+ { 0x000000aa, 0x0000001c },
+ { 0x00065000, 0x00000002 },
+ { 0x000a0000, 0x00000002 },
+ { 0x00061000, 0x00000002 },
+ { 0x000b0000, 0x00000002 },
+ { 0x38067000, 0x00000002 },
+ { 0x000a00a6, 0x00000004 },
+ { 0x20007000, 0000000000 },
+ { 0x01200000, 0x00000002 },
+ { 0x20077000, 0x00000002 },
+ { 0x01200000, 0x00000002 },
+ { 0x20007000, 0000000000 },
+ { 0x00061000, 0x00000002 },
+ { 0x0120751b, 0x00000002 },
+ { 0x8040750a, 0x00000002 },
+ { 0x8040750b, 0x00000002 },
+ { 0x00110000, 0x00000002 },
+ { 0x000380dd, 0x00000002 },
+ { 0x000000bd, 0x0000001c },
+ { 0x00061096, 0x00000018 },
+ { 0x844075bd, 0x00000002 },
+ { 0x00061095, 0x00000018 },
+ { 0x840075bb, 0x00000002 },
+ { 0x00061096, 0x00000018 },
+ { 0x844075bc, 0x00000002 },
+ { 0x000000c0, 0x00000004 },
+ { 0x804075bd, 0x00000002 },
+ { 0x800075bb, 0x00000002 },
+ { 0x804075bc, 0x00000002 },
+ { 0x00108000, 0x00000002 },
+ { 0x01400000, 0x00000002 },
+ { 0x006000c4, 0x0000000c },
+ { 0x20c07000, 0x00000020 },
+ { 0x000000c6, 0x00000012 },
+ { 0x00800000, 0x00000006 },
+ { 0x0080751d, 0x00000006 },
+ { 0x000025bb, 0x00000002 },
+ { 0x000040c0, 0x00000004 },
+ { 0x0000775c, 0x00000002 },
+ { 0x00a05000, 0x00000002 },
+ { 0x00661000, 0x00000002 },
+ { 0x0460275d, 0x00000020 },
+ { 0x00004000, 0000000000 },
+ { 0x00007999, 0x00000002 },
+ { 0x00a05000, 0x00000002 },
+ { 0x00661000, 0x00000002 },
+ { 0x0460299b, 0x00000020 },
+ { 0x00004000, 0000000000 },
+ { 0x01e00830, 0x00000002 },
+ { 0x21007000, 0000000000 },
+ { 0x00005000, 0x00000002 },
+ { 0x00038042, 0x00000002 },
+ { 0x040025e0, 0x00000002 },
+ { 0x000075e1, 0000000000 },
+ { 0x00000001, 0000000000 },
+ { 0x000380d9, 0x00000002 },
+ { 0x04007394, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+ { 0000000000, 0000000000 },
+};
+
+
static u32 radeon_cp_microcode[][2] = {
{ 0x21007000, 0000000000 },
{ 0x20007000, 0000000000 },
@@ -345,6 +605,8 @@ static int radeon_do_pixcache_flush( drm_radeon_private_t *dev_priv )
u32 tmp;
int i;
+ dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
+
tmp = RADEON_READ( RADEON_RB2D_DSTCACHE_CTLSTAT );
tmp |= RADEON_RB2D_DC_FLUSH_ALL;
RADEON_WRITE( RADEON_RB2D_DSTCACHE_CTLSTAT, tmp );
@@ -369,6 +631,8 @@ static int radeon_do_wait_for_fifo( drm_radeon_private_t *dev_priv,
{
int i;
+ dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
+
for ( i = 0 ; i < dev_priv->usec_timeout ; i++ ) {
int slots = ( RADEON_READ( RADEON_RBBM_STATUS )
& RADEON_RBBM_FIFOCNT_MASK );
@@ -387,6 +651,8 @@ static int radeon_do_wait_for_idle( drm_radeon_private_t *dev_priv )
{
int i, ret;
+ dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
+
ret = radeon_do_wait_for_fifo( dev_priv, 64 );
if ( ret ) return ret;
@@ -420,11 +686,26 @@ static void radeon_cp_load_microcode( drm_radeon_private_t *dev_priv )
radeon_do_wait_for_idle( dev_priv );
RADEON_WRITE( RADEON_CP_ME_RAM_ADDR, 0 );
- for ( i = 0 ; i < 256 ; i++ ) {
- RADEON_WRITE( RADEON_CP_ME_RAM_DATAH,
- radeon_cp_microcode[i][1] );
- RADEON_WRITE( RADEON_CP_ME_RAM_DATAL,
- radeon_cp_microcode[i][0] );
+
+ if (dev_priv->is_r200)
+ {
+ DRM_INFO("Loading R200 Microcode\n");
+ for ( i = 0 ; i < 256 ; i++ )
+ {
+ RADEON_WRITE( RADEON_CP_ME_RAM_DATAH,
+ R200_cp_microcode[i][1] );
+ RADEON_WRITE( RADEON_CP_ME_RAM_DATAL,
+ R200_cp_microcode[i][0] );
+ }
+ }
+ else
+ {
+ for ( i = 0 ; i < 256 ; i++ ) {
+ RADEON_WRITE( RADEON_CP_ME_RAM_DATAH,
+ radeon_cp_microcode[i][1] );
+ RADEON_WRITE( RADEON_CP_ME_RAM_DATAL,
+ radeon_cp_microcode[i][0] );
+ }
}
}
@@ -736,12 +1017,10 @@ static int radeon_do_init_cp( drm_device_t *dev, drm_radeon_init_t *init )
return DRM_ERR(EINVAL);
}
+ dev_priv->is_r200 = (init->func == RADEON_INIT_R200_CP);
+ dev_priv->do_boxes = 1;
dev_priv->cp_mode = init->cp_mode;
- /* Simple idle check.
- */
- atomic_set( &dev_priv->idle_count, 0 );
-
/* We don't support anything other than bus-mastering ring mode,
* but the ring can be in either AGP or PCI space for the ring
* read pointer.
@@ -1028,6 +1307,7 @@ int radeon_cp_init( DRM_IOCTL_ARGS )
switch ( init.func ) {
case RADEON_INIT_CP:
+ case RADEON_INIT_R200_CP:
return radeon_do_init_cp( dev, &init );
case RADEON_CLEANUP_CP:
return radeon_do_cleanup_cp( dev );
@@ -1169,6 +1449,14 @@ int radeon_fullscreen( DRM_IOCTL_ARGS )
* completed rendering.
*
* KW: It's also a good way to find free buffers quickly.
+ *
+ * KW: Ideally this loop wouldn't exist, and freelist_get wouldn't
+ * sleep. However, bugs in older versions of radeon_accel.c mean that
+ * we essentially have to do this, else old clients will break.
+ *
+ * However, it does leave open a potential deadlock where all the
+ * buffers are held by other clients, which can't release them because
+ * they can't get the lock.
*/
drm_buf_t *radeon_freelist_get( drm_device_t *dev )
@@ -1193,17 +1481,56 @@ drm_buf_t *radeon_freelist_get( drm_device_t *dev )
buf_priv = buf->dev_private;
if ( buf->pid == 0 || (buf->pending &&
buf_priv->age <= done_age) ) {
+ dev_priv->stats.requested_bufs++;
buf->pending = 0;
return buf;
}
start = 0;
}
- DRM_UDELAY( 1 );
+
+ if (t) {
+ DRM_UDELAY( 1 );
+ dev_priv->stats.freelist_loops++;
+ }
}
DRM_ERROR( "returning NULL!\n" );
return NULL;
}
+#if 0
+drm_buf_t *radeon_freelist_get( drm_device_t *dev )
+{
+ drm_device_dma_t *dma = dev->dma;
+ drm_radeon_private_t *dev_priv = dev->dev_private;
+ drm_radeon_buf_priv_t *buf_priv;
+ drm_buf_t *buf;
+ int i, t;
+ int start;
+ u32 done_age = DRM_READ32(&dev_priv->scratch[1]);
+
+ if ( ++dev_priv->last_buf >= dma->buf_count )
+ dev_priv->last_buf = 0;
+
+ start = dev_priv->last_buf;
+ dev_priv->stats.freelist_loops++;
+
+ for ( t = 0 ; t < 2 ; t++ ) {
+ for ( i = start ; i < dma->buf_count ; i++ ) {
+ buf = dma->buflist[i];
+ buf_priv = buf->dev_private;
+ if ( buf->pid == 0 || (buf->pending &&
+ buf_priv->age <= done_age) ) {
+ dev_priv->stats.requested_bufs++;
+ buf->pending = 0;
+ return buf;
+ }
+ }
+ start = 0;
+ }
+
+ return NULL;
+}
+#endif
void radeon_freelist_reset( drm_device_t *dev )
{
@@ -1228,11 +1555,23 @@ int radeon_wait_ring( drm_radeon_private_t *dev_priv, int n )
{
drm_radeon_ring_buffer_t *ring = &dev_priv->ring;
int i;
+ u32 last_head = GET_RING_HEAD(ring);
for ( i = 0 ; i < dev_priv->usec_timeout ; i++ ) {
- radeon_update_ring_snapshot( ring );
+ u32 head = GET_RING_HEAD(ring);
+
+ ring->space = (head - ring->tail) * sizeof(u32);
+ if ( ring->space <= 0 )
+ ring->space += ring->size;
if ( ring->space > n )
return 0;
+
+ dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
+
+ if (head != last_head)
+ i = 0;
+ last_head = head;
+
DRM_UDELAY( 1 );
}
@@ -1251,7 +1590,7 @@ static int radeon_cp_get_buffers( drm_device_t *dev, drm_dma_t *d )
for ( i = d->granted_count ; i < d->request_count ; i++ ) {
buf = radeon_freelist_get( dev );
- if ( !buf ) return DRM_ERR(EAGAIN);
+ if ( !buf ) return DRM_ERR(EBUSY); /* NOTE: broken client */
buf->pid = DRM_CURRENTPID;
diff --git a/shared/radeon_drm.h b/shared/radeon_drm.h
index 3802e46c..6469bfb8 100644
--- a/shared/radeon_drm.h
+++ b/shared/radeon_drm.h
@@ -89,7 +89,47 @@
#define RADEON_EMIT_SE_ZBIAS_FACTOR 18 /* zbias/2 */
#define RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT 19 /* tcl/11 */
#define RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED 20 /* material/17 */
-#define RADEON_MAX_STATE_PACKETS 21
+#define R200_EMIT_PP_TXCBLEND_0 21 /* tex0/4 */
+#define R200_EMIT_PP_TXCBLEND_1 22 /* tex1/4 */
+#define R200_EMIT_PP_TXCBLEND_2 23 /* tex2/4 */
+#define R200_EMIT_PP_TXCBLEND_3 24 /* tex3/4 */
+#define R200_EMIT_PP_TXCBLEND_4 25 /* tex4/4 */
+#define R200_EMIT_PP_TXCBLEND_5 26 /* tex5/4 */
+#define R200_EMIT_PP_TXCBLEND_6 27 /* /4 */
+#define R200_EMIT_PP_TXCBLEND_7 28 /* /4 */
+#define R200_EMIT_TCL_LIGHT_MODEL_CTL_0 29 /* tcl/7 */
+#define R200_EMIT_TFACTOR_0 30 /* tf/7 */
+#define R200_EMIT_VTX_FMT_0 31 /* vtx/5 */
+#define R200_EMIT_VAP_CTL 32 /* vap/1 */
+#define R200_EMIT_MATRIX_SELECT_0 33 /* msl/5 */
+#define R200_EMIT_TEX_PROC_CTL_2 34 /* tcg/5 */
+#define R200_EMIT_TCL_UCP_VERT_BLEND_CTL 35 /* tcl/1 */
+#define R200_EMIT_PP_TXFILTER_0 36 /* tex0/6 */
+#define R200_EMIT_PP_TXFILTER_1 37 /* tex1/6 */
+#define R200_EMIT_PP_TXFILTER_2 38 /* tex2/6 */
+#define R200_EMIT_PP_TXFILTER_3 39 /* tex3/6 */
+#define R200_EMIT_PP_TXFILTER_4 40 /* tex4/6 */
+#define R200_EMIT_PP_TXFILTER_5 41 /* tex5/6 */
+#define R200_EMIT_PP_TXOFFSET_0 42 /* tex0/1 */
+#define R200_EMIT_PP_TXOFFSET_1 43 /* tex1/1 */
+#define R200_EMIT_PP_TXOFFSET_2 44 /* tex2/1 */
+#define R200_EMIT_PP_TXOFFSET_3 45 /* tex3/1 */
+#define R200_EMIT_PP_TXOFFSET_4 46 /* tex4/1 */
+#define R200_EMIT_PP_TXOFFSET_5 47 /* tex5/1 */
+#define R200_EMIT_VTE_CNTL 48 /* vte/1 */
+#define R200_EMIT_OUTPUT_VTX_COMP_SEL 49 /* vtx/1 */
+#define R200_EMIT_PP_TAM_DEBUG3 50 /* tam/1 */
+#define R200_EMIT_PP_CNTL_X 51 /* cst/1 */
+#define R200_EMIT_RB3D_DEPTHXY_OFFSET 52 /* cst/1 */
+#define R200_EMIT_RE_AUX_SCISSOR_CNTL 53 /* cst/1 */
+#define R200_EMIT_RE_SCISSOR_TL_0 54 /* cst/2 */
+#define R200_EMIT_RE_SCISSOR_TL_1 55 /* cst/2 */
+#define R200_EMIT_RE_SCISSOR_TL_2 56 /* cst/2 */
+#define R200_EMIT_SE_VAP_CNTL_STATUS 57 /* cst/1 */
+#define R200_EMIT_SE_VTX_STATE_CNTL 58 /* cst/1 */
+#define R200_EMIT_RE_POINTSIZE 59 /* cst/1 */
+#define R200_EMIT_TCL_INPUT_VTX_VECTOR_ADDR_0 60 /* cst/4 */
+#define RADEON_MAX_STATE_PACKETS 61
/* Commands understood by cmd_buffer ioctl. More can be added but
@@ -101,24 +141,25 @@
#define RADEON_CMD_DMA_DISCARD 4 /* discard current dma buf */
#define RADEON_CMD_PACKET3 5 /* emit hw packet */
#define RADEON_CMD_PACKET3_CLIP 6 /* emit hw packet wrapped in cliprects */
+#define RADEON_CMD_SCALARS2 7 /* r200 stopgap */
typedef union {
int i;
struct {
- char cmd_type, pad0, pad1, pad2;
+ unsigned char cmd_type, pad0, pad1, pad2;
} header;
struct {
- char cmd_type, packet_id, pad0, pad1;
+ unsigned char cmd_type, packet_id, pad0, pad1;
} packet;
struct {
- char cmd_type, offset, stride, count;
+ unsigned char cmd_type, offset, stride, count;
} scalars;
struct {
- char cmd_type, offset, stride, count;
+ unsigned char cmd_type, offset, stride, count;
} vectors;
struct {
- char cmd_type, buf_idx, pad0, pad1;
+ unsigned char cmd_type, buf_idx, pad0, pad1;
} dma;
} drm_radeon_cmd_header_t;
@@ -327,7 +368,8 @@ typedef struct {
typedef struct drm_radeon_init {
enum {
RADEON_INIT_CP = 0x01,
- RADEON_CLEANUP_CP = 0x02
+ RADEON_CLEANUP_CP = 0x02,
+ RADEON_INIT_R200_CP = 0x03,
} func;
unsigned long sarea_priv_offset;
int is_pci;
diff --git a/shared/radeon_drv.h b/shared/radeon_drv.h
index 15c0d4dd..7c341b39 100644
--- a/shared/radeon_drv.h
+++ b/shared/radeon_drv.h
@@ -79,12 +79,25 @@ typedef struct drm_radeon_private {
int writeback_works;
int usec_timeout;
+
+ int is_r200;
+
int is_pci;
unsigned long phys_pci_gart;
dma_addr_t bus_pci_gart;
- atomic_t idle_count;
-
+ struct {
+ u32 boxes;
+ int freelist_timeouts;
+ int freelist_loops;
+ int requested_bufs;
+ int last_frame_reads;
+ int last_clear_reads;
+ int clears;
+ int texture_uploads;
+ } stats;
+
+ int do_boxes;
int page_flipping;
int current_page;
u32 crtc_offset;
@@ -134,14 +147,6 @@ extern drm_buf_t *radeon_freelist_get( drm_device_t *dev );
extern int radeon_wait_ring( drm_radeon_private_t *dev_priv, int n );
-static __inline__ void
-radeon_update_ring_snapshot( drm_radeon_ring_buffer_t *ring )
-{
- ring->space = (GET_RING_HEAD(ring) - ring->tail) * sizeof(u32);
- if ( ring->space <= 0 )
- ring->space += ring->size;
-}
-
extern int radeon_do_cp_idle( drm_radeon_private_t *dev_priv );
extern int radeon_do_cleanup_cp( drm_device_t *dev );
extern int radeon_do_cleanup_pageflip( drm_device_t *dev );
@@ -159,6 +164,14 @@ extern int radeon_cp_cmdbuf( DRM_IOCTL_ARGS );
extern int radeon_cp_getparam( DRM_IOCTL_ARGS );
extern int radeon_cp_flip( DRM_IOCTL_ARGS );
+/* Flags for stats.boxes
+ */
+#define RADEON_BOX_DMA_IDLE 0x1
+#define RADEON_BOX_RING_FULL 0x2
+#define RADEON_BOX_FLIP 0x4
+#define RADEON_BOX_WAIT_IDLE 0x8
+#define RADEON_BOX_TEXTURE_LOAD 0x10
+
/* Register definitions, register access macros and drmAddMap constants
@@ -282,6 +295,7 @@ extern int radeon_cp_flip( DRM_IOCTL_ARGS );
# define RADEON_STENCIL_ENABLE (1 << 7)
# define RADEON_Z_ENABLE (1 << 8)
#define RADEON_RB3D_DEPTHOFFSET 0x1c24
+#define RADEON_RB3D_DEPTHPITCH 0x1c28
#define RADEON_RB3D_PLANEMASK 0x1d84
#define RADEON_RB3D_STENCILREFMASK 0x1d7c
#define RADEON_RB3D_ZCACHE_MODE 0x3250
@@ -513,6 +527,62 @@ extern int radeon_cp_flip( DRM_IOCTL_ARGS );
#define RADEON_TXFORMAT_ARGB8888 6
#define RADEON_TXFORMAT_RGBA8888 7
+#define R200_PP_TXCBLEND_0 0x2f00
+#define R200_PP_TXCBLEND_1 0x2f10
+#define R200_PP_TXCBLEND_2 0x2f20
+#define R200_PP_TXCBLEND_3 0x2f30
+#define R200_PP_TXCBLEND_4 0x2f40
+#define R200_PP_TXCBLEND_5 0x2f50
+#define R200_PP_TXCBLEND_6 0x2f60
+#define R200_PP_TXCBLEND_7 0x2f70
+#define R200_SE_TCL_LIGHT_MODEL_CTL_0 0x2268
+#define R200_PP_TFACTOR_0 0x2ee0
+#define R200_SE_VTX_FMT_0 0x2088
+#define R200_SE_VAP_CNTL 0x2080
+#define R200_SE_TCL_MATRIX_SEL_0 0x2230
+#define R200_SE_TCL_TEX_PROC_CTL_2 0x22a8
+#define R200_SE_TCL_UCP_VERT_BLEND_CTL 0x22c0
+#define R200_PP_TXFILTER_5 0x2ca0
+#define R200_PP_TXFILTER_4 0x2c80
+#define R200_PP_TXFILTER_3 0x2c60
+#define R200_PP_TXFILTER_2 0x2c40
+#define R200_PP_TXFILTER_1 0x2c20
+#define R200_PP_TXFILTER_0 0x2c00
+#define R200_PP_TXOFFSET_5 0x2d78
+#define R200_PP_TXOFFSET_4 0x2d60
+#define R200_PP_TXOFFSET_3 0x2d48
+#define R200_PP_TXOFFSET_2 0x2d30
+#define R200_PP_TXOFFSET_1 0x2d18
+#define R200_PP_TXOFFSET_0 0x2d00
+#define R200_RE_AUX_SCISSOR_CNTL 0x26f0
+#define R200_SE_VTE_CNTL 0x20b0
+#define R200_SE_TCL_OUTPUT_VTX_COMP_SEL 0x2250
+#define R200_PP_TAM_DEBUG3 0x2d9c
+#define R200_PP_CNTL_X 0x2cc4
+#define R200_SE_VAP_CNTL_STATUS 0x2140
+#define R200_RE_SCISSOR_TL_0 0x1cd8
+#define R200_RE_SCISSOR_TL_1 0x1ce0
+#define R200_RE_SCISSOR_TL_2 0x1ce8
+#define R200_RB3D_DEPTHXY_OFFSET 0x1d60
+#define R200_RE_AUX_SCISSOR_CNTL 0x26f0
+#define R200_SE_VTX_STATE_CNTL 0x2180
+#define R200_RE_POINTSIZE 0x2648
+#define R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0 0x2254
+
+
+#define SE_VAP_CNTL__TCL_ENA_MASK 0x00000001
+#define SE_VAP_CNTL__FORCE_W_TO_ONE_MASK 0x00010000
+#define SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT 0x00000012
+#define SE_VTE_CNTL__VTX_XY_FMT_MASK 0x00000100
+#define SE_VTE_CNTL__VTX_Z_FMT_MASK 0x00000200
+#define SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK 0x00000001
+#define SE_VTX_FMT_0__VTX_W0_PRESENT_MASK 0x00000002
+#define SE_VTX_FMT_0__VTX_COLOR_0_FMT__SHIFT 0x0000000b
+#define R200_3D_DRAW_IMMD_2 0xC0003500
+#define R200_SE_VTX_FMT_1 0x208c
+#define R200_RE_CNTL 0x1c50
+
+
/* Constants */
#define RADEON_MAX_USEC_TIMEOUT 100000 /* 100 ms */
@@ -620,30 +690,16 @@ do { \
} \
} while (0)
+
+/* Perfbox functionality only.
+ */
#define RING_SPACE_TEST_WITH_RETURN( dev_priv ) \
do { \
- drm_radeon_ring_buffer_t *ring = &dev_priv->ring; int i; \
- if ( ring->space < ring->high_mark ) { \
- for ( i = 0 ; i < dev_priv->usec_timeout ; i++ ) { \
- radeon_update_ring_snapshot( ring ); \
- if ( ring->space >= ring->high_mark ) \
- goto __ring_space_done; \
- DRM_UDELAY( 1 ); \
- } \
- DRM_ERROR( "ring space check from memory failed, reading register...\n" ); \
- /* If ring space check fails from RAM, try reading the \
- register directly */ \
- ring->space = 4 * ( RADEON_READ( RADEON_CP_RB_RPTR ) - ring->tail ); \
- if ( ring->space <= 0 ) \
- ring->space += ring->size; \
- if ( ring->space >= ring->high_mark ) \
- goto __ring_space_done; \
- \
- DRM_ERROR( "ring space check failed!\n" ); \
- return DRM_ERR(EBUSY); \
+ if (!(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE)) { \
+ u32 head = GET_RING_HEAD(&dev_priv->ring); \
+ if (head == dev_priv->ring.tail) \
+ dev_priv->stats.boxes |= RADEON_BOX_DMA_IDLE; \
} \
- __ring_space_done: \
- ; \
} while (0)
#define VB_AGE_TEST_WITH_RETURN( dev_priv ) \
@@ -710,16 +766,15 @@ do { \
} \
if (((dev_priv->ring.tail + _nr) & mask) != write) { \
DRM_ERROR( \
- "ADVANCE_RING(): mismatch: nr: %x write: %x\n", \
+ "ADVANCE_RING(): mismatch: nr: %x write: %x line: %d\n", \
((dev_priv->ring.tail + _nr) & mask), \
- write); \
+ write, __LINE__); \
} else \
dev_priv->ring.tail = write; \
} while (0)
#define COMMIT_RING() do { \
- radeon_flush_write_combine(); \
- RADEON_WRITE( RADEON_CP_RB_WPTR, dev_priv->ring.tail ); \
+ RADEON_WRITE( RADEON_CP_RB_WPTR, dev_priv->ring.tail ); \
} while (0)
#define OUT_RING( x ) do { \
@@ -760,6 +815,4 @@ do { \
} while (0)
-#define RADEON_PERFORMANCE_BOXES 0
-
#endif /* __RADEON_DRV_H__ */
diff --git a/shared/radeon_state.c b/shared/radeon_state.c
index 1cc6bde8..7f84e739 100644
--- a/shared/radeon_state.c
+++ b/shared/radeon_state.c
@@ -239,18 +239,50 @@ static struct {
{ RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" },
{ RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" },
{ RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" },
+ { R200_PP_TXCBLEND_0, 4, "R200_PP_TXCBLEND_0" },
+ { R200_PP_TXCBLEND_1, 4, "R200_PP_TXCBLEND_1" },
+ { R200_PP_TXCBLEND_2, 4, "R200_PP_TXCBLEND_2" },
+ { R200_PP_TXCBLEND_3, 4, "R200_PP_TXCBLEND_3" },
+ { R200_PP_TXCBLEND_4, 4, "R200_PP_TXCBLEND_4" },
+ { R200_PP_TXCBLEND_5, 4, "R200_PP_TXCBLEND_5" },
+ { R200_PP_TXCBLEND_6, 4, "R200_PP_TXCBLEND_6" },
+ { R200_PP_TXCBLEND_7, 4, "R200_PP_TXCBLEND_7" },
+ { R200_SE_TCL_LIGHT_MODEL_CTL_0, 6, "R200_SE_TCL_LIGHT_MODEL_CTL_0" },
+ { R200_PP_TFACTOR_0, 6, "R200_PP_TFACTOR_0" },
+ { R200_SE_VTX_FMT_0, 4, "R200_SE_VTX_FMT_0" },
+ { R200_SE_VAP_CNTL, 1, "R200_SE_VAP_CNTL" },
+ { R200_SE_TCL_MATRIX_SEL_0, 5, "R200_SE_TCL_MATRIX_SEL_0" },
+ { R200_SE_TCL_TEX_PROC_CTL_2, 5, "R200_SE_TCL_TEX_PROC_CTL_2" },
+ { R200_SE_TCL_UCP_VERT_BLEND_CTL, 1, "R200_SE_TCL_UCP_VERT_BLEND_CTL" },
+ { R200_PP_TXFILTER_0, 6, "R200_PP_TXFILTER_0" },
+ { R200_PP_TXFILTER_1, 6, "R200_PP_TXFILTER_1" },
+ { R200_PP_TXFILTER_2, 6, "R200_PP_TXFILTER_2" },
+ { R200_PP_TXFILTER_3, 6, "R200_PP_TXFILTER_3" },
+ { R200_PP_TXFILTER_4, 6, "R200_PP_TXFILTER_4" },
+ { R200_PP_TXFILTER_5, 6, "R200_PP_TXFILTER_5" },
+ { R200_PP_TXOFFSET_0, 1, "R200_PP_TXOFFSET_0" },
+ { R200_PP_TXOFFSET_1, 1, "R200_PP_TXOFFSET_1" },
+ { R200_PP_TXOFFSET_2, 1, "R200_PP_TXOFFSET_2" },
+ { R200_PP_TXOFFSET_3, 1, "R200_PP_TXOFFSET_3" },
+ { R200_PP_TXOFFSET_4, 1, "R200_PP_TXOFFSET_4" },
+ { R200_PP_TXOFFSET_5, 1, "R200_PP_TXOFFSET_5" },
+ { R200_SE_VTE_CNTL, 1, "R200_SE_VTE_CNTL" },
+ { R200_SE_TCL_OUTPUT_VTX_COMP_SEL, 1, "R200_SE_TCL_OUTPUT_VTX_COMP_SEL" },
+ { R200_PP_TAM_DEBUG3, 1, "R200_PP_TAM_DEBUG3" },
+ { R200_PP_CNTL_X, 1, "R200_PP_CNTL_X" },
+ { R200_RB3D_DEPTHXY_OFFSET, 1, "R200_RB3D_DEPTHXY_OFFSET" },
+ { R200_RE_AUX_SCISSOR_CNTL, 1, "R200_RE_AUX_SCISSOR_CNTL" },
+ { R200_RE_SCISSOR_TL_0, 2, "R200_RE_SCISSOR_TL_0" },
+ { R200_RE_SCISSOR_TL_1, 2, "R200_RE_SCISSOR_TL_1" },
+ { R200_RE_SCISSOR_TL_2, 2, "R200_RE_SCISSOR_TL_2" },
+ { R200_SE_VAP_CNTL_STATUS, 1, "R200_SE_VAP_CNTL_STATUS" },
+ { R200_SE_VTX_STATE_CNTL, 1, "R200_SE_VTX_STATE_CNTL" },
+ { R200_RE_POINTSIZE, 1, "R200_RE_POINTSIZE" },
+ { R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0, 4, "R200_SE_TCL_INPUT_VTX_VECTOR_ADDR_0" },
};
-
-
-
-
-
-
-
-#if RADEON_PERFORMANCE_BOXES
/* ================================================================
* Performance monitoring functions
*/
@@ -259,10 +291,12 @@ static void radeon_clear_box( drm_radeon_private_t *dev_priv,
int x, int y, int w, int h,
int r, int g, int b )
{
- u32 pitch, offset;
u32 color;
RING_LOCALS;
+ x += dev_priv->sarea_priv->boxes[0].x1;
+ y += dev_priv->sarea_priv->boxes[0].y1;
+
switch ( dev_priv->color_fmt ) {
case RADEON_COLOR_FORMAT_RGB565:
color = (((r & 0xf8) << 8) |
@@ -275,8 +309,11 @@ static void radeon_clear_box( drm_radeon_private_t *dev_priv,
break;
}
- offset = dev_priv->back_offset;
- pitch = dev_priv->back_pitch >> 3;
+ BEGIN_RING( 4 );
+ RADEON_WAIT_UNTIL_3D_IDLE();
+ OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) );
+ OUT_RING( 0xffffffff );
+ ADVANCE_RING();
BEGIN_RING( 6 );
@@ -288,7 +325,12 @@ static void radeon_clear_box( drm_radeon_private_t *dev_priv,
RADEON_ROP3_P |
RADEON_GMC_CLR_CMP_CNTL_DIS );
- OUT_RING( (pitch << 22) | (offset >> 5) );
+ if ( dev_priv->page_flipping && dev_priv->current_page == 1 ) {
+ OUT_RING( dev_priv->front_pitch_offset );
+ } else {
+ OUT_RING( dev_priv->back_pitch_offset );
+ }
+
OUT_RING( color );
OUT_RING( (x << 16) | y );
@@ -299,16 +341,57 @@ static void radeon_clear_box( drm_radeon_private_t *dev_priv,
static void radeon_cp_performance_boxes( drm_radeon_private_t *dev_priv )
{
- if ( atomic_read( &dev_priv->idle_count ) == 0 ) {
- radeon_clear_box( dev_priv, 64, 4, 8, 8, 0, 255, 0 );
- } else {
- atomic_set( &dev_priv->idle_count, 0 );
+ /* Collapse various things into a wait flag -- trying to
+ * guess if userspase slept -- better just to have them tell us.
+ */
+ if (dev_priv->stats.last_frame_reads > 1 ||
+ dev_priv->stats.last_clear_reads > dev_priv->stats.clears) {
+ dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
}
-}
-#endif
+ if (dev_priv->stats.freelist_loops) {
+ dev_priv->stats.boxes |= RADEON_BOX_WAIT_IDLE;
+ }
+ /* Purple box for page flipping
+ */
+ if ( dev_priv->stats.boxes & RADEON_BOX_FLIP )
+ radeon_clear_box( dev_priv, 4, 4, 8, 8, 255, 0, 255 );
+
+ /* Red box if we have to wait for idle at any point
+ */
+ if ( dev_priv->stats.boxes & RADEON_BOX_WAIT_IDLE )
+ radeon_clear_box( dev_priv, 16, 4, 8, 8, 255, 0, 0 );
+
+ /* Blue box: lost context?
+ */
+ /* Yellow box for texture swaps
+ */
+ if ( dev_priv->stats.boxes & RADEON_BOX_TEXTURE_LOAD )
+ radeon_clear_box( dev_priv, 40, 4, 8, 8, 255, 255, 0 );
+
+ /* Green box if hardware never idles (as far as we can tell)
+ */
+ if ( !(dev_priv->stats.boxes & RADEON_BOX_DMA_IDLE) )
+ radeon_clear_box( dev_priv, 64, 4, 8, 8, 0, 255, 0 );
+
+
+ /* Draw bars indicating number of buffers allocated
+ * (not a great measure, easily confused)
+ */
+ if (dev_priv->stats.requested_bufs) {
+ if (dev_priv->stats.requested_bufs > 100)
+ dev_priv->stats.requested_bufs = 100;
+
+ radeon_clear_box( dev_priv, 4, 16,
+ dev_priv->stats.requested_bufs, 4,
+ 196, 128, 128 );
+ }
+
+ memset( &dev_priv->stats, 0, sizeof(dev_priv->stats) );
+
+}
/* ================================================================
* CP command dispatch functions
*/
@@ -328,6 +411,8 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,
RING_LOCALS;
DRM_DEBUG( "flags = 0x%x\n", flags );
+ dev_priv->stats.clears++;
+
if ( dev_priv->page_flipping && dev_priv->current_page == 1 ) {
unsigned int tmp = flags;
@@ -336,120 +421,251 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,
if ( tmp & RADEON_BACK ) flags |= RADEON_FRONT;
}
+ if ( flags & (RADEON_FRONT | RADEON_BACK) ) {
+
+ BEGIN_RING( 4 );
+
+ /* Ensure the 3D stream is idle before doing a
+ * 2D fill to clear the front or back buffer.
+ */
+ RADEON_WAIT_UNTIL_3D_IDLE();
+
+ OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) );
+ OUT_RING( clear->color_mask );
+
+ ADVANCE_RING();
+
+ /* Make sure we restore the 3D state next time.
+ */
+ dev_priv->sarea_priv->ctx_owner = 0;
+
+ for ( i = 0 ; i < nbox ; i++ ) {
+ int x = pbox[i].x1;
+ int y = pbox[i].y1;
+ int w = pbox[i].x2 - x;
+ int h = pbox[i].y2 - y;
+
+ DRM_DEBUG( "dispatch clear %d,%d-%d,%d flags 0x%x\n",
+ x, y, w, h, flags );
+
+ if ( flags & RADEON_FRONT ) {
+ BEGIN_RING( 6 );
+
+ OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
+ OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
+ RADEON_GMC_BRUSH_SOLID_COLOR |
+ (dev_priv->color_fmt << 8) |
+ RADEON_GMC_SRC_DATATYPE_COLOR |
+ RADEON_ROP3_P |
+ RADEON_GMC_CLR_CMP_CNTL_DIS );
+
+ OUT_RING( dev_priv->front_pitch_offset );
+ OUT_RING( clear->clear_color );
+
+ OUT_RING( (x << 16) | y );
+ OUT_RING( (w << 16) | h );
+
+ ADVANCE_RING();
+ }
+
+ if ( flags & RADEON_BACK ) {
+ BEGIN_RING( 6 );
+
+ OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
+ OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
+ RADEON_GMC_BRUSH_SOLID_COLOR |
+ (dev_priv->color_fmt << 8) |
+ RADEON_GMC_SRC_DATATYPE_COLOR |
+ RADEON_ROP3_P |
+ RADEON_GMC_CLR_CMP_CNTL_DIS );
+
+ OUT_RING( dev_priv->back_pitch_offset );
+ OUT_RING( clear->clear_color );
+
+ OUT_RING( (x << 16) | y );
+ OUT_RING( (w << 16) | h );
+
+ ADVANCE_RING();
+ }
+ }
+ }
+
/* We have to clear the depth and/or stencil buffers by
* rendering a quad into just those buffers. Thus, we have to
* make sure the 3D engine is configured correctly.
*/
- if ( flags & (RADEON_DEPTH | RADEON_STENCIL) ) {
- rb3d_cntl = depth_clear->rb3d_cntl;
+ if ( dev_priv->is_r200 &&
+ (flags & (RADEON_DEPTH | RADEON_STENCIL)) ) {
- if ( flags & RADEON_DEPTH ) {
- rb3d_cntl |= RADEON_Z_ENABLE;
- } else {
- rb3d_cntl &= ~RADEON_Z_ENABLE;
- }
+ int tempPP_CNTL;
+ int tempRE_CNTL;
+ int tempRB3D_CNTL;
+ int tempRB3D_ZSTENCILCNTL;
+ int tempRB3D_STENCILREFMASK;
+ int tempRB3D_PLANEMASK;
+ int tempSE_CNTL;
+ int tempSE_VTE_CNTL;
+ int tempSE_VTX_FMT_0;
+ int tempSE_VTX_FMT_1;
+ int tempSE_VAP_CNTL;
+ int tempRE_AUX_SCISSOR_CNTL;
- if ( flags & RADEON_STENCIL ) {
- rb3d_cntl |= RADEON_STENCIL_ENABLE;
- rb3d_stencilrefmask = clear->depth_mask; /* misnamed field */
- } else {
- rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
- rb3d_stencilrefmask = 0x00000000;
- }
- }
+ tempPP_CNTL = 0;
+ tempRE_CNTL = 0;
- for ( i = 0 ; i < nbox ; i++ ) {
- int x = pbox[i].x1;
- int y = pbox[i].y1;
- int w = pbox[i].x2 - x;
- int h = pbox[i].y2 - y;
+ tempRB3D_CNTL = depth_clear->rb3d_cntl;
+ tempRB3D_CNTL &= ~(1<<15); /* unset radeon magic flag */
- DRM_DEBUG( "dispatch clear %d,%d-%d,%d flags 0x%x\n",
- x, y, w, h, flags );
+ tempRB3D_ZSTENCILCNTL = depth_clear->rb3d_zstencilcntl;
+ tempRB3D_STENCILREFMASK = 0x0;
- if ( flags & (RADEON_FRONT | RADEON_BACK) ) {
- BEGIN_RING( 4 );
+ tempSE_CNTL = depth_clear->se_cntl;
- /* Ensure the 3D stream is idle before doing a
- * 2D fill to clear the front or back buffer.
- */
- RADEON_WAIT_UNTIL_3D_IDLE();
- OUT_RING( CP_PACKET0( RADEON_DP_WRITE_MASK, 0 ) );
- OUT_RING( clear->color_mask );
- ADVANCE_RING();
+ /* Disable TCL */
- /* Make sure we restore the 3D state next time.
- */
- dev_priv->sarea_priv->ctx_owner = 0;
- }
+ tempSE_VAP_CNTL = (/* SE_VAP_CNTL__FORCE_W_TO_ONE_MASK | */
+ (0x9 << SE_VAP_CNTL__VF_MAX_VTX_NUM__SHIFT));
- if ( flags & RADEON_FRONT ) {
- BEGIN_RING( 6 );
+ tempRB3D_PLANEMASK = 0x0;
- OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
- OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
- RADEON_GMC_BRUSH_SOLID_COLOR |
- (dev_priv->color_fmt << 8) |
- RADEON_GMC_SRC_DATATYPE_COLOR |
- RADEON_ROP3_P |
- RADEON_GMC_CLR_CMP_CNTL_DIS );
+ tempRE_AUX_SCISSOR_CNTL = 0x0;
- OUT_RING( dev_priv->front_pitch_offset );
- OUT_RING( clear->clear_color );
+ tempSE_VTE_CNTL =
+ SE_VTE_CNTL__VTX_XY_FMT_MASK |
+ SE_VTE_CNTL__VTX_Z_FMT_MASK;
- OUT_RING( (x << 16) | y );
- OUT_RING( (w << 16) | h );
+ /* Vertex format (X, Y, Z, W)*/
+ tempSE_VTX_FMT_0 =
+ SE_VTX_FMT_0__VTX_Z0_PRESENT_MASK |
+ SE_VTX_FMT_0__VTX_W0_PRESENT_MASK;
+ tempSE_VTX_FMT_1 = 0x0;
- ADVANCE_RING();
+
+ /*
+ * Depth buffer specific enables
+ */
+ if (flags & RADEON_DEPTH) {
+ /* Enable depth buffer */
+ tempRB3D_CNTL |= RADEON_Z_ENABLE;
+ } else {
+ /* Disable depth buffer */
+ tempRB3D_CNTL &= ~RADEON_Z_ENABLE;
}
- if ( flags & RADEON_BACK ) {
- BEGIN_RING( 6 );
+ /*
+ * Stencil buffer specific enables
+ */
+ if ( flags & RADEON_STENCIL ) {
+ tempRB3D_CNTL |= RADEON_STENCIL_ENABLE;
+ tempRB3D_STENCILREFMASK = clear->depth_mask;
+ } else {
+ tempRB3D_CNTL &= ~RADEON_STENCIL_ENABLE;
+ tempRB3D_STENCILREFMASK = 0x00000000;
+ }
- OUT_RING( CP_PACKET3( RADEON_CNTL_PAINT_MULTI, 4 ) );
- OUT_RING( RADEON_GMC_DST_PITCH_OFFSET_CNTL |
- RADEON_GMC_BRUSH_SOLID_COLOR |
- (dev_priv->color_fmt << 8) |
- RADEON_GMC_SRC_DATATYPE_COLOR |
- RADEON_ROP3_P |
- RADEON_GMC_CLR_CMP_CNTL_DIS );
+ BEGIN_RING( 26 );
+ RADEON_WAIT_UNTIL_2D_IDLE();
+
+ OUT_RING_REG( RADEON_PP_CNTL, tempPP_CNTL );
+ OUT_RING_REG( R200_RE_CNTL, tempRE_CNTL );
+ OUT_RING_REG( RADEON_RB3D_CNTL, tempRB3D_CNTL );
+ OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL,
+ tempRB3D_ZSTENCILCNTL );
+ OUT_RING_REG( RADEON_RB3D_STENCILREFMASK,
+ tempRB3D_STENCILREFMASK );
+ OUT_RING_REG( RADEON_RB3D_PLANEMASK, tempRB3D_PLANEMASK );
+ OUT_RING_REG( RADEON_SE_CNTL, tempSE_CNTL );
+ OUT_RING_REG( R200_SE_VTE_CNTL, tempSE_VTE_CNTL );
+ OUT_RING_REG( R200_SE_VTX_FMT_0, tempSE_VTX_FMT_0 );
+ OUT_RING_REG( R200_SE_VTX_FMT_1, tempSE_VTX_FMT_1 );
+ OUT_RING_REG( R200_SE_VAP_CNTL, tempSE_VAP_CNTL );
+ OUT_RING_REG( R200_RE_AUX_SCISSOR_CNTL,
+ tempRE_AUX_SCISSOR_CNTL );
+ ADVANCE_RING();
- OUT_RING( dev_priv->back_pitch_offset );
- OUT_RING( clear->clear_color );
+ /* Make sure we restore the 3D state next time.
+ */
+ dev_priv->sarea_priv->ctx_owner = 0;
- OUT_RING( (x << 16) | y );
- OUT_RING( (w << 16) | h );
+ for ( i = 0 ; i < nbox ; i++ ) {
+
+ /* Funny that this should be required --
+ * sets top-left?
+ */
+ radeon_emit_clip_rect( dev_priv,
+ &sarea_priv->boxes[i] );
+ BEGIN_RING( 14 );
+ OUT_RING( CP_PACKET3( R200_3D_DRAW_IMMD_2, 12 ) );
+ OUT_RING( (RADEON_PRIM_TYPE_RECT_LIST |
+ RADEON_PRIM_WALK_RING |
+ (3 << RADEON_NUM_VERTICES_SHIFT)) );
+ OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
+ OUT_RING( depth_boxes[i].ui[CLEAR_Y1] );
+ OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
+ OUT_RING( 0x3f800000 );
+ OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
+ OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
+ OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
+ OUT_RING( 0x3f800000 );
+ OUT_RING( depth_boxes[i].ui[CLEAR_X2] );
+ OUT_RING( depth_boxes[i].ui[CLEAR_Y2] );
+ OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
+ OUT_RING( 0x3f800000 );
ADVANCE_RING();
}
+ }
+ else if ( (flags & (RADEON_DEPTH | RADEON_STENCIL)) ) {
- if ( flags & (RADEON_DEPTH | RADEON_STENCIL) ) {
+ rb3d_cntl = depth_clear->rb3d_cntl;
- radeon_emit_clip_rect( dev_priv,
- &sarea_priv->boxes[i] );
+ if ( flags & RADEON_DEPTH ) {
+ rb3d_cntl |= RADEON_Z_ENABLE;
+ } else {
+ rb3d_cntl &= ~RADEON_Z_ENABLE;
+ }
- BEGIN_RING( 28 );
+ if ( flags & RADEON_STENCIL ) {
+ rb3d_cntl |= RADEON_STENCIL_ENABLE;
+ rb3d_stencilrefmask = clear->depth_mask; /* misnamed field */
+ } else {
+ rb3d_cntl &= ~RADEON_STENCIL_ENABLE;
+ rb3d_stencilrefmask = 0x00000000;
+ }
- RADEON_WAIT_UNTIL_2D_IDLE();
+ BEGIN_RING( 13 );
+ RADEON_WAIT_UNTIL_2D_IDLE();
- OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 1 ) );
- OUT_RING( 0x00000000 );
- OUT_RING( rb3d_cntl );
+ OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 1 ) );
+ OUT_RING( 0x00000000 );
+ OUT_RING( rb3d_cntl );
+
+ OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL,
+ depth_clear->rb3d_zstencilcntl );
+ OUT_RING_REG( RADEON_RB3D_STENCILREFMASK,
+ rb3d_stencilrefmask );
+ OUT_RING_REG( RADEON_RB3D_PLANEMASK,
+ 0x00000000 );
+ OUT_RING_REG( RADEON_SE_CNTL,
+ depth_clear->se_cntl );
+ ADVANCE_RING();
- OUT_RING_REG( RADEON_RB3D_ZSTENCILCNTL,
- depth_clear->rb3d_zstencilcntl );
- OUT_RING_REG( RADEON_RB3D_STENCILREFMASK,
- rb3d_stencilrefmask );
- OUT_RING_REG( RADEON_RB3D_PLANEMASK,
- 0x00000000 );
- OUT_RING_REG( RADEON_SE_CNTL,
- depth_clear->se_cntl );
+ /* Make sure we restore the 3D state next time.
+ */
+ dev_priv->sarea_priv->ctx_owner = 0;
- /* Radeon 7500 doesn't like vertices without
- * color.
+ for ( i = 0 ; i < nbox ; i++ ) {
+
+ /* Funny that this should be required --
+ * sets top-left?
*/
+ radeon_emit_clip_rect( dev_priv,
+ &sarea_priv->boxes[i] );
+
+ BEGIN_RING( 15 );
+
OUT_RING( CP_PACKET3( RADEON_3D_DRAW_IMMD, 13 ) );
OUT_RING( RADEON_VTX_Z_PRESENT |
RADEON_VTX_PKCOLOR_PRESENT);
@@ -459,6 +675,7 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,
RADEON_VTX_FMT_RADEON_MODE |
(3 << RADEON_NUM_VERTICES_SHIFT)) );
+
OUT_RING( depth_boxes[i].ui[CLEAR_X1] );
OUT_RING( depth_boxes[i].ui[CLEAR_Y1] );
OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] );
@@ -475,10 +692,6 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev,
OUT_RING( 0x0 );
ADVANCE_RING();
-
- /* Make sure we restore the 3D state next time.
- */
- dev_priv->sarea_priv->ctx_owner = 0;
}
}
@@ -506,11 +719,12 @@ static void radeon_cp_dispatch_swap( drm_device_t *dev )
RING_LOCALS;
DRM_DEBUG( "\n" );
-#if RADEON_PERFORMANCE_BOXES
+
/* Do some trivial performance monitoring...
*/
- radeon_cp_performance_boxes( dev_priv );
-#endif
+ if (dev_priv->do_boxes)
+ radeon_cp_performance_boxes( dev_priv );
+
/* Wait for the 3D stream to idle before dispatching the bitblt.
* This will prevent data corruption between the two streams.
@@ -579,20 +793,21 @@ static void radeon_cp_dispatch_flip( drm_device_t *dev )
{
drm_radeon_private_t *dev_priv = dev->dev_private;
RING_LOCALS;
- DRM_DEBUG( "page=%d\n", dev_priv->current_page );
+ DRM_DEBUG( "%s: page=%d pfCurrentPage=%d\n",
+ __FUNCTION__,
+ dev_priv->current_page,
+ dev_priv->sarea_priv->pfCurrentPage);
-#if RADEON_PERFORMANCE_BOXES
/* Do some trivial performance monitoring...
*/
- radeon_cp_performance_boxes( dev_priv );
-#endif
+ if (dev_priv->do_boxes) {
+ dev_priv->stats.boxes |= RADEON_BOX_FLIP;
+ radeon_cp_performance_boxes( dev_priv );
+ }
BEGIN_RING( 4 );
RADEON_WAIT_UNTIL_3D_IDLE();
-/*
- RADEON_WAIT_UNTIL_PAGE_FLIPPED();
-*/
OUT_RING( CP_PACKET0( RADEON_CRTC_OFFSET, 0 ) );
if ( dev_priv->current_page == 0 ) {
@@ -847,6 +1062,8 @@ static int radeon_cp_dispatch_texture( drm_device_t *dev,
int ret = 0, i;
RING_LOCALS;
+ dev_priv->stats.boxes |= RADEON_BOX_TEXTURE_LOAD;
+
/* FIXME: Be smarter about this...
*/
buf = radeon_freelist_get( dev );
@@ -1611,6 +1828,30 @@ static __inline__ int radeon_emit_scalars(
return 0;
}
+/* God this is ugly
+ */
+static __inline__ int radeon_emit_scalars2(
+ drm_radeon_private_t *dev_priv,
+ drm_radeon_cmd_header_t header,
+ drm_radeon_cmd_buffer_t *cmdbuf )
+{
+ int sz = header.scalars.count;
+ int *data = (int *)cmdbuf->buf;
+ int start = ((unsigned int)header.scalars.offset) + 0x100;
+ int stride = header.scalars.stride;
+ RING_LOCALS;
+
+ BEGIN_RING( 3+sz );
+ OUT_RING( CP_PACKET0( RADEON_SE_TCL_SCALAR_INDX_REG, 0 ) );
+ OUT_RING( start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT));
+ OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_SCALAR_DATA_REG, sz-1 ) );
+ OUT_RING_USER_TABLE( data, sz );
+ ADVANCE_RING();
+ cmdbuf->buf += sz * sizeof(int);
+ cmdbuf->bufsz -= sz * sizeof(int);
+ return 0;
+}
+
static __inline__ int radeon_emit_vectors(
drm_radeon_private_t *dev_priv,
drm_radeon_cmd_header_t header,
@@ -1775,6 +2016,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )
switch (header.header.cmd_type) {
case RADEON_CMD_PACKET:
+ DRM_DEBUG("RADEON_CMD_PACKET\n");
if (radeon_emit_packets( dev_priv, header, &cmdbuf )) {
DRM_ERROR("radeon_emit_packets failed\n");
return DRM_ERR(EINVAL);
@@ -1782,6 +2024,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )
break;
case RADEON_CMD_SCALARS:
+ DRM_DEBUG("RADEON_CMD_SCALARS\n");
if (radeon_emit_scalars( dev_priv, header, &cmdbuf )) {
DRM_ERROR("radeon_emit_scalars failed\n");
return DRM_ERR(EINVAL);
@@ -1789,6 +2032,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )
break;
case RADEON_CMD_VECTORS:
+ DRM_DEBUG("RADEON_CMD_VECTORS\n");
if (radeon_emit_vectors( dev_priv, header, &cmdbuf )) {
DRM_ERROR("radeon_emit_vectors failed\n");
return DRM_ERR(EINVAL);
@@ -1796,6 +2040,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )
break;
case RADEON_CMD_DMA_DISCARD:
+ DRM_DEBUG("RADEON_CMD_DMA_DISCARD\n");
idx = header.dma.buf_idx;
if ( idx < 0 || idx >= dma->buf_count ) {
DRM_ERROR( "buffer index %d (of %d max)\n",
@@ -1813,6 +2058,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )
break;
case RADEON_CMD_PACKET3:
+ DRM_DEBUG("RADEON_CMD_PACKET3\n");
if (radeon_emit_packet3( dev, &cmdbuf )) {
DRM_ERROR("radeon_emit_packet3 failed\n");
return DRM_ERR(EINVAL);
@@ -1820,12 +2066,20 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )
break;
case RADEON_CMD_PACKET3_CLIP:
+ DRM_DEBUG("RADEON_CMD_PACKET3_CLIP\n");
if (radeon_emit_packet3_cliprect( dev, &cmdbuf, orig_nbox )) {
DRM_ERROR("radeon_emit_packet3_clip failed\n");
return DRM_ERR(EINVAL);
}
break;
+ case RADEON_CMD_SCALARS2:
+ DRM_DEBUG("RADEON_CMD_SCALARS2\n");
+ if (radeon_emit_scalars2( dev_priv, header, &cmdbuf )) {
+ DRM_ERROR("radeon_emit_scalars2 failed\n");
+ return DRM_ERR(EINVAL);
+ }
+ break;
default:
DRM_ERROR("bad cmd_type %d at %p\n",
header.header.cmd_type,
@@ -1835,6 +2089,7 @@ int radeon_cp_cmdbuf( DRM_IOCTL_ARGS )
}
+ DRM_DEBUG("DONE\n");
COMMIT_RING();
return 0;
}
@@ -1863,12 +2118,14 @@ int radeon_cp_getparam( DRM_IOCTL_ARGS )
value = dev_priv->agp_buffers_offset;
break;
case RADEON_PARAM_LAST_FRAME:
+ dev_priv->stats.last_frame_reads++;
value = GET_SCRATCH( 0 );
break;
case RADEON_PARAM_LAST_DISPATCH:
value = GET_SCRATCH( 1 );
break;
case RADEON_PARAM_LAST_CLEAR:
+ dev_priv->stats.last_clear_reads++;
value = GET_SCRATCH( 2 );
break;
default: