summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--shared-core/r300_cmdbuf.c120
-rw-r--r--shared-core/r300_reg.h5
-rw-r--r--shared-core/radeon_cp.c38
-rw-r--r--shared-core/radeon_drv.h31
4 files changed, 138 insertions, 56 deletions
diff --git a/shared-core/r300_cmdbuf.c b/shared-core/r300_cmdbuf.c
index e9f99c8f..addc075d 100644
--- a/shared-core/r300_cmdbuf.c
+++ b/shared-core/r300_cmdbuf.c
@@ -136,6 +136,18 @@ static int r300_emit_cliprects(drm_radeon_private_t *dev_priv,
ADVANCE_RING();
}
+ /* flus cache and wait idle clean after cliprect change */
+ BEGIN_RING(2);
+ OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
+ OUT_RING(R300_RB3D_DC_FLUSH);
+ ADVANCE_RING();
+ BEGIN_RING(2);
+ OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
+ OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
+ ADVANCE_RING();
+ /* set flush flag */
+ dev_priv->track_flush |= RADEON_FLUSH_EMITED;
+
return 0;
}
@@ -166,13 +178,13 @@ void r300_init_reg_flags(struct drm_device *dev)
ADD_RANGE(0x21DC, 1);
ADD_RANGE(R300_VAP_UNKNOWN_221C, 1);
ADD_RANGE(R300_VAP_CLIP_X_0, 4);
- ADD_RANGE(R300_VAP_PVS_WAITIDLE, 1);
+ ADD_RANGE(R300_VAP_PVS_STATE_FLUSH_REG, 1);
ADD_RANGE(R300_VAP_UNKNOWN_2288, 1);
ADD_RANGE(R300_VAP_OUTPUT_VTX_FMT_0, 2);
ADD_RANGE(R300_VAP_PVS_CNTL_1, 3);
ADD_RANGE(R300_GB_ENABLE, 1);
ADD_RANGE(R300_GB_MSPOS0, 5);
- ADD_RANGE(R300_TX_CNTL, 1);
+ ADD_RANGE(R300_TX_INVALTAGS, 1);
ADD_RANGE(R300_TX_ENABLE, 1);
ADD_RANGE(0x4200, 4);
ADD_RANGE(0x4214, 1);
@@ -389,15 +401,28 @@ static __inline__ int r300_emit_vpu(drm_radeon_private_t *dev_priv,
if (sz * 16 > cmdbuf->bufsz)
return -EINVAL;
- BEGIN_RING(5 + sz * 4);
- /* Wait for VAP to come to senses.. */
- /* there is no need to emit it multiple times, (only once before VAP is programmed,
- but this optimization is for later */
- OUT_RING_REG(R300_VAP_PVS_WAITIDLE, 0);
+ /* VAP is very sensitive so we purge cache before we program it
+ * and we also flush its state before & after */
+ BEGIN_RING(6);
+ OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
+ OUT_RING(R300_RB3D_DC_FLUSH);
+ OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
+ OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
+ OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0));
+ OUT_RING(0);
+ ADVANCE_RING();
+ /* set flush flag */
+ dev_priv->track_flush |= RADEON_FLUSH_EMITED;
+
+ BEGIN_RING(3 + sz * 4);
OUT_RING_REG(R300_VAP_PVS_UPLOAD_ADDRESS, addr);
OUT_RING(CP_PACKET0_TABLE(R300_VAP_PVS_UPLOAD_DATA, sz * 4 - 1));
OUT_RING_TABLE((int *)cmdbuf->buf, sz * 4);
+ ADVANCE_RING();
+ BEGIN_RING(2);
+ OUT_RING(CP_PACKET0(R300_VAP_PVS_STATE_FLUSH_REG, 0));
+ OUT_RING(0);
ADVANCE_RING();
cmdbuf->buf += sz * 16;
@@ -425,6 +450,15 @@ static __inline__ int r300_emit_clear(drm_radeon_private_t *dev_priv,
OUT_RING_TABLE((int *)cmdbuf->buf, 8);
ADVANCE_RING();
+ BEGIN_RING(4);
+ OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
+ OUT_RING(R300_RB3D_DC_FLUSH);
+ OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
+ OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
+ ADVANCE_RING();
+ /* set flush flag */
+ dev_priv->track_flush |= RADEON_FLUSH_EMITED;
+
cmdbuf->buf += 8 * 4;
cmdbuf->bufsz -= 8 * 4;
@@ -614,13 +648,23 @@ static __inline__ int r300_emit_raw_packet3(drm_radeon_private_t *dev_priv,
case RADEON_CNTL_BITBLT_MULTI:
return r300_emit_bitblt_multi(dev_priv, cmdbuf);
- case RADEON_CP_INDX_BUFFER: /* DRAW_INDX_2 without INDX_BUFFER seems to lock up the gpu */
+ case RADEON_CP_INDX_BUFFER:
+ /* whenever we send vertex we clear flush & purge */
+ dev_priv->track_flush ^= (RADEON_FLUSH_EMITED |
+ RADEON_PURGE_EMITED);
+ /* DRAW_INDX_2 without INDX_BUFFER seems to lock up the gpu */
return r300_emit_indx_buffer(dev_priv, cmdbuf);
- case RADEON_CP_3D_DRAW_IMMD_2: /* triggers drawing using in-packet vertex data */
- case RADEON_CP_3D_DRAW_VBUF_2: /* triggers drawing of vertex buffers setup elsewhere */
- case RADEON_CP_3D_DRAW_INDX_2: /* triggers drawing using indices to vertex buffer */
+ case RADEON_CP_3D_DRAW_IMMD_2:
+ /* triggers drawing using in-packet vertex data */
+ case RADEON_CP_3D_DRAW_VBUF_2:
+ /* triggers drawing of vertex buffers setup elsewhere */
+ case RADEON_CP_3D_DRAW_INDX_2:
+ /* triggers drawing using indices to vertex buffer */
case RADEON_WAIT_FOR_IDLE:
case RADEON_CP_NOP:
+ /* whenever we send vertex we clear flush & purge */
+ dev_priv->track_flush ^= (RADEON_FLUSH_EMITED |
+ RADEON_PURGE_EMITED);
/* these packets are safe */
break;
default:
@@ -715,16 +759,47 @@ static __inline__ int r300_emit_packet3(drm_radeon_private_t *dev_priv,
static __inline__ void r300_pacify(drm_radeon_private_t *dev_priv)
{
RING_LOCALS;
+
+ if ((dev_priv->track_flush & RADEON_PURGE_EMITED)) {
+ /* purge already emited without vertex in btw don't purge
+ * again or lockup will likely happen */
+ return;
+ }
- BEGIN_RING(6);
- OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
- OUT_RING(R300_RB3D_DSTCACHE_UNKNOWN_0A);
+ /* flush & purge zbuffer */
+ BEGIN_RING(2);
OUT_RING(CP_PACKET0(R300_ZB_ZCACHE_CTLSTAT, 0));
- OUT_RING(R300_ZB_ZCACHE_CTLSTAT_ZC_FLUSH_FLUSH_AND_FREE|
- R300_ZB_ZCACHE_CTLSTAT_ZC_FREE_FREE);
- OUT_RING(CP_PACKET3(RADEON_CP_NOP, 0));
- OUT_RING(0x0);
+ OUT_RING(R300_ZC_FLUSH | R300_ZC_FREE);
+ ADVANCE_RING();
+ /* flush & purge 3d */
+ BEGIN_RING(2);
+ OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0));
+ OUT_RING(R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE);
+ ADVANCE_RING();
+ /* flush & purge texture */
+ BEGIN_RING(2);
+ OUT_RING(CP_PACKET0(R300_TX_INVALTAGS, 0));
+ OUT_RING(0);
+ ADVANCE_RING();
+ /* FIXME: is this one really needed ? */
+ BEGIN_RING(2);
+ OUT_RING(CP_PACKET0(R300_RB3D_AARESOLVE_CTL, 0));
+ OUT_RING(0);
ADVANCE_RING();
+ BEGIN_RING(2);
+ OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
+ OUT_RING(RADEON_WAIT_3D_IDLECLEAN);
+ ADVANCE_RING();
+ /* flush & purge 2d through E2 as RB2D will trigger lockup */
+ BEGIN_RING(4);
+ OUT_RING(CP_PACKET0(R300_DSTCACHE_CTLSTAT, 0));
+ OUT_RING(R300_RB2D_DC_FLUSH | R300_RB2D_DC_FREE);
+ OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
+ OUT_RING(RADEON_WAIT_2D_IDLECLEAN |
+ RADEON_WAIT_HOST_IDLECLEAN);
+ ADVANCE_RING();
+ /* set flush & purge flags */
+ dev_priv->track_flush |= RADEON_FLUSH_EMITED | RADEON_PURGE_EMITED;
}
/**
@@ -902,12 +977,15 @@ int r300_do_cp_cmdbuf(struct drm_device *dev,
struct drm_buf *buf = NULL;
int emit_dispatch_age = 0;
int ret = 0;
+ RING_LOCALS;
DRM_DEBUG("\n");
- /* See the comment above r300_emit_begin3d for why this call must be here,
- * and what the cleanup gotos are for. */
- r300_pacify(dev_priv);
+ /* start by a wait, should be necessary */
+ BEGIN_RING(2);
+ OUT_RING(CP_PACKET0(RADEON_WAIT_UNTIL, 0));
+ OUT_RING(RADEON_WAIT_3D_IDLE | RADEON_WAIT_2D_IDLE);
+ ADVANCE_RING();
if (cmdbuf->nbox <= R300_SIMULTANEOUS_CLIPRECTS) {
ret = r300_emit_cliprects(dev_priv, cmdbuf, 0);
diff --git a/shared-core/r300_reg.h b/shared-core/r300_reg.h
index 1920ab07..d35dd39d 100644
--- a/shared-core/r300_reg.h
+++ b/shared-core/r300_reg.h
@@ -320,7 +320,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
* Therefore, I suspect writing zero to 0x2284 synchronizes the engine and
* avoids bugs caused by still running shaders reading bad data from memory.
*/
-#define R300_VAP_PVS_WAITIDLE 0x2284 /* GUESS */
+#define R300_VAP_PVS_STATE_FLUSH_REG 0x2284
/* Absolutely no clue what this register is about. */
#define R300_VAP_UNKNOWN_2288 0x2288
@@ -516,7 +516,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
/* gap */
/* Zero to flush caches. */
-#define R300_TX_CNTL 0x4100
+#define R300_TX_INVALTAGS 0x4100
#define R300_TX_FLUSH 0x0
/* The upper enable bits are guessed, based on fglrx reported limits. */
@@ -1365,6 +1365,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
#define R300_RB3D_COLORPITCH2 0x4E40 /* GUESS */
#define R300_RB3D_COLORPITCH3 0x4E44 /* GUESS */
+#define R300_RB3D_AARESOLVE_CTL 0x4E88
/* gap */
/* Guess by Vladimir.
diff --git a/shared-core/radeon_cp.c b/shared-core/radeon_cp.c
index 7317d184..ec914df1 100644
--- a/shared-core/radeon_cp.c
+++ b/shared-core/radeon_cp.c
@@ -39,6 +39,7 @@
#define RADEON_FIFO_DEBUG 0
static int radeon_do_cleanup_cp(struct drm_device * dev);
+static void radeon_do_cp_start(drm_radeon_private_t * dev_priv);
static u32 R500_READ_MCIND(drm_radeon_private_t *dev_priv, int addr)
{
@@ -198,23 +199,8 @@ static int radeon_do_pixcache_flush(drm_radeon_private_t * dev_priv)
DRM_UDELAY(1);
}
} else {
- /* 3D */
- tmp = RADEON_READ(R300_RB3D_DSTCACHE_CTLSTAT);
- tmp |= RADEON_RB3D_DC_FLUSH_ALL;
- RADEON_WRITE(R300_RB3D_DSTCACHE_CTLSTAT, tmp);
-
- /* 2D */
- tmp = RADEON_READ(R300_DSTCACHE_CTLSTAT);
- tmp |= RADEON_RB3D_DC_FLUSH_ALL;
- RADEON_WRITE(R300_DSTCACHE_CTLSTAT, tmp);
-
- for (i = 0; i < dev_priv->usec_timeout; i++) {
- if (!(RADEON_READ(R300_DSTCACHE_CTLSTAT)
- & RADEON_RB3D_DC_BUSY)) {
- return 0;
- }
- DRM_UDELAY(1);
- }
+ /* don't flush or purge cache here or lockup */
+ return 0;
}
#if RADEON_FIFO_DEBUG
@@ -237,6 +223,9 @@ static int radeon_do_wait_for_fifo(drm_radeon_private_t * dev_priv, int entries)
return 0;
DRM_UDELAY(1);
}
+ DRM_INFO("wait for fifo failed status : 0x%08X 0x%08X\n",
+ RADEON_READ(RADEON_RBBM_STATUS),
+ RADEON_READ(R300_VAP_CNTL_STATUS));
#if RADEON_FIFO_DEBUG
DRM_ERROR("failed!\n");
@@ -263,6 +252,9 @@ static int radeon_do_wait_for_idle(drm_radeon_private_t * dev_priv)
}
DRM_UDELAY(1);
}
+ DRM_INFO("wait idle failed status : 0x%08X 0x%08X\n",
+ RADEON_READ(RADEON_RBBM_STATUS),
+ RADEON_READ(R300_VAP_CNTL_STATUS));
#if RADEON_FIFO_DEBUG
DRM_ERROR("failed!\n");
@@ -445,14 +437,20 @@ static void radeon_do_cp_start(drm_radeon_private_t * dev_priv)
dev_priv->cp_running = 1;
- BEGIN_RING(6);
-
+ BEGIN_RING(8);
+ /* isync can only be written through cp on r5xx write it here */
+ OUT_RING(CP_PACKET0(RADEON_ISYNC_CNTL, 0));
+ OUT_RING(RADEON_ISYNC_ANY2D_IDLE3D |
+ RADEON_ISYNC_ANY3D_IDLE2D |
+ RADEON_ISYNC_CPSCRATCH_IDLEGUI |
+ RADEON_ISYNC_CPSCRATCH_IDLEGUI);
RADEON_PURGE_CACHE();
RADEON_PURGE_ZCACHE();
RADEON_WAIT_UNTIL_IDLE();
-
ADVANCE_RING();
COMMIT_RING();
+
+ dev_priv->track_flush |= RADEON_FLUSH_EMITED | RADEON_PURGE_EMITED;
}
/* Reset the Command Processor. This will not flush any pending
diff --git a/shared-core/radeon_drv.h b/shared-core/radeon_drv.h
index 20f9b956..71669c22 100644
--- a/shared-core/radeon_drv.h
+++ b/shared-core/radeon_drv.h
@@ -222,6 +222,9 @@ struct radeon_virt_surface {
struct drm_file *file_priv;
};
+#define RADEON_FLUSH_EMITED (1 < 0)
+#define RADEON_PURGE_EMITED (1 < 1)
+
typedef struct drm_radeon_private {
drm_radeon_ring_buffer_t ring;
@@ -317,6 +320,7 @@ typedef struct drm_radeon_private {
unsigned long fb_aper_offset;
int num_gb_pipes;
+ int track_flush;
} drm_radeon_private_t;
typedef struct drm_radeon_buf_priv {
@@ -704,7 +708,6 @@ extern int r300_do_cp_cmdbuf(struct drm_device *dev,
#define R300_ZB_ZCACHE_CTLSTAT 0x4f18
# define R300_ZC_FLUSH (1 << 0)
# define R300_ZC_FREE (1 << 1)
-# define R300_ZC_FLUSH_ALL 0x3
# define R300_ZC_BUSY (1 << 31)
#define RADEON_RB3D_DSTCACHE_CTLSTAT 0x325c
# define RADEON_RB3D_DC_FLUSH (3 << 0)
@@ -712,6 +715,8 @@ extern int r300_do_cp_cmdbuf(struct drm_device *dev,
# define RADEON_RB3D_DC_FLUSH_ALL 0xf
# define RADEON_RB3D_DC_BUSY (1 << 31)
#define R300_RB3D_DSTCACHE_CTLSTAT 0x4e4c
+# define R300_RB3D_DC_FLUSH (2 << 0)
+# define R300_RB3D_DC_FREE (2 << 2)
# define R300_RB3D_DC_FINISH (1 << 4)
#define RADEON_RB3D_ZSTENCILCNTL 0x1c2c
# define RADEON_Z_TEST_MASK (7 << 4)
@@ -1281,21 +1286,21 @@ do { \
#define RADEON_FLUSH_CACHE() do { \
if ((dev_priv->flags & RADEON_FAMILY_MASK) <= CHIP_RV280) { \
- OUT_RING( CP_PACKET0( RADEON_RB3D_DSTCACHE_CTLSTAT, 0 ) ); \
- OUT_RING( RADEON_RB3D_DC_FLUSH ); \
+ OUT_RING(CP_PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0)); \
+ OUT_RING(RADEON_RB3D_DC_FLUSH); \
} else { \
- OUT_RING( CP_PACKET0( R300_RB3D_DSTCACHE_CTLSTAT, 0 ) ); \
- OUT_RING( RADEON_RB3D_DC_FLUSH ); \
+ OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); \
+ OUT_RING(R300_RB3D_DC_FLUSH); \
} \
} while (0)
#define RADEON_PURGE_CACHE() do { \
if ((dev_priv->flags & RADEON_FAMILY_MASK) <= CHIP_RV280) { \
- OUT_RING( CP_PACKET0( RADEON_RB3D_DSTCACHE_CTLSTAT, 0 ) ); \
- OUT_RING( RADEON_RB3D_DC_FLUSH_ALL ); \
+ OUT_RING(CP_PACKET0( RADEON_RB3D_DSTCACHE_CTLSTAT, 0)); \
+ OUT_RING(RADEON_RB3D_DC_FLUSH | RADEON_RB3D_DC_FREE); \
} else { \
- OUT_RING( CP_PACKET0( R300_RB3D_DSTCACHE_CTLSTAT, 0 ) ); \
- OUT_RING( RADEON_RB3D_DC_FLUSH_ALL ); \
+ OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); \
+ OUT_RING(R300_RB3D_DC_FLUSH | R300_RB3D_DC_FREE ); \
} \
} while (0)
@@ -1311,11 +1316,11 @@ do { \
#define RADEON_PURGE_ZCACHE() do { \
if ((dev_priv->flags & RADEON_FAMILY_MASK) <= CHIP_RV280) { \
- OUT_RING( CP_PACKET0( RADEON_RB3D_ZCACHE_CTLSTAT, 0 ) ); \
- OUT_RING( RADEON_RB3D_ZC_FLUSH_ALL ); \
+ OUT_RING(CP_PACKET0(RADEON_RB3D_ZCACHE_CTLSTAT, 0)); \
+ OUT_RING(RADEON_RB3D_ZC_FLUSH | RADEON_RB3D_ZC_FREE); \
} else { \
- OUT_RING( CP_PACKET0( R300_RB3D_DSTCACHE_CTLSTAT, 0 ) ); \
- OUT_RING( R300_ZC_FLUSH_ALL ); \
+ OUT_RING(CP_PACKET0(R300_RB3D_DSTCACHE_CTLSTAT, 0)); \
+ OUT_RING(R300_ZC_FLUSH | R300_ZC_FREE); \
} \
} while (0)