diff options
author | Keith Whitwell <keith@tungstengraphics.com> | 2002-06-12 15:50:28 +0000 |
---|---|---|
committer | Keith Whitwell <keith@tungstengraphics.com> | 2002-06-12 15:50:28 +0000 |
commit | 2dcada361db7db00bf0796e399b4188578e3efbe (patch) | |
tree | 562908294d06253cf1c7542192f4a3cc5f777da0 /linux | |
parent | 5676a2a6105afdfc343e7f36f3c87e528a9d14b3 (diff) |
merged tcl-0-0-branch
Diffstat (limited to 'linux')
-rw-r--r-- | linux/drm.h | 4 | ||||
-rw-r--r-- | linux/radeon_cp.c | 162 | ||||
-rw-r--r-- | linux/radeon_drm.h | 123 | ||||
-rw-r--r-- | linux/radeon_drv.c | 23 | ||||
-rw-r--r-- | linux/radeon_drv.h | 80 | ||||
-rw-r--r-- | linux/radeon_state.c | 1202 |
6 files changed, 899 insertions, 695 deletions
diff --git a/linux/drm.h b/linux/drm.h index d116f375..6ab295c4 100644 --- a/linux/drm.h +++ b/linux/drm.h @@ -84,6 +84,10 @@ typedef unsigned int drm_magic_t; /* Warning: If you change this structure, make sure you change * XF86DRIClipRectRec in the server as well */ +/* KW: Actually it's illegal to change either for + * backwards-compatibility reasons. + */ + typedef struct drm_clip_rect { unsigned short x1; unsigned short y1; diff --git a/linux/radeon_cp.c b/linux/radeon_cp.c index 14901f59..5486f1c1 100644 --- a/linux/radeon_cp.c +++ b/linux/radeon_cp.c @@ -461,6 +461,7 @@ int radeon_do_cp_idle( drm_radeon_private_t *dev_priv ) RADEON_WAIT_UNTIL_IDLE(); ADVANCE_RING(); + COMMIT_RING(); return radeon_do_wait_for_idle( dev_priv ); } @@ -485,6 +486,7 @@ static void radeon_do_cp_start( drm_radeon_private_t *dev_priv ) RADEON_WAIT_UNTIL_IDLE(); ADVANCE_RING(); + COMMIT_RING(); } /* Reset the Command Processor. This will not flush any pending @@ -751,7 +753,7 @@ static int radeon_do_init_cp( drm_device_t *dev, drm_radeon_init_t *init ) */ dev_priv->depth_clear.rb3d_cntl = (RADEON_PLANE_MASK_ENABLE | (dev_priv->color_fmt << 10) | - RADEON_ZBLOCK16); + (1<<15)); dev_priv->depth_clear.rb3d_zstencilcntl = (dev_priv->depth_fmt | @@ -970,9 +972,7 @@ static int radeon_do_init_cp( drm_device_t *dev, drm_radeon_init_t *init ) radeon_cp_load_microcode( dev_priv ); radeon_cp_init_ring_buffer( dev, dev_priv ); -#if ROTATE_BUFS dev_priv->last_buf = 0; -#endif dev->dev_private = (void *)dev_priv; @@ -1152,116 +1152,27 @@ int radeon_engine_reset( struct inode *inode, struct file *filp, * Fullscreen mode */ -static int radeon_do_init_pageflip( drm_device_t *dev ) -{ - drm_radeon_private_t *dev_priv = dev->dev_private; - DRM_DEBUG( "%s\n", __FUNCTION__ ); - - dev_priv->crtc_offset = RADEON_READ( RADEON_CRTC_OFFSET ); - dev_priv->crtc_offset_cntl = RADEON_READ( RADEON_CRTC_OFFSET_CNTL ); - - RADEON_WRITE( RADEON_CRTC_OFFSET, dev_priv->front_offset ); - RADEON_WRITE( RADEON_CRTC_OFFSET_CNTL, - dev_priv->crtc_offset_cntl | - RADEON_CRTC_OFFSET_FLIP_CNTL ); - - dev_priv->page_flipping = 1; - dev_priv->current_page = 0; - - return 0; -} - -int radeon_do_cleanup_pageflip( drm_device_t *dev ) -{ - drm_radeon_private_t *dev_priv = dev->dev_private; - DRM_DEBUG( "%s\n", __FUNCTION__ ); - - RADEON_WRITE( RADEON_CRTC_OFFSET, dev_priv->crtc_offset ); - RADEON_WRITE( RADEON_CRTC_OFFSET_CNTL, dev_priv->crtc_offset_cntl ); - - dev_priv->page_flipping = 0; - dev_priv->current_page = 0; - - return 0; -} - +/* KW: Deprecated to say the least: + */ int radeon_fullscreen( struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg ) { - drm_file_t *priv = filp->private_data; - drm_device_t *dev = priv->dev; - drm_radeon_fullscreen_t fs; - - LOCK_TEST_WITH_RETURN( dev ); - - if ( copy_from_user( &fs, (drm_radeon_fullscreen_t *)arg, - sizeof(fs) ) ) - return -EFAULT; - - switch ( fs.func ) { - case RADEON_INIT_FULLSCREEN: - return radeon_do_init_pageflip( dev ); - case RADEON_CLEANUP_FULLSCREEN: - return radeon_do_cleanup_pageflip( dev ); - } - - return -EINVAL; + return 0; } /* ================================================================ * Freelist management */ -#define RADEON_BUFFER_USED 0xffffffff -#define RADEON_BUFFER_FREE 0 - -#if 0 -static int radeon_freelist_init( drm_device_t *dev ) -{ - drm_device_dma_t *dma = dev->dma; - drm_radeon_private_t *dev_priv = dev->dev_private; - drm_buf_t *buf; - drm_radeon_buf_priv_t *buf_priv; - drm_radeon_freelist_t *entry; - int i; - - dev_priv->head = DRM(alloc)( sizeof(drm_radeon_freelist_t), - DRM_MEM_DRIVER ); - if ( dev_priv->head == NULL ) - return -ENOMEM; - - memset( dev_priv->head, 0, sizeof(drm_radeon_freelist_t) ); - dev_priv->head->age = RADEON_BUFFER_USED; - - for ( i = 0 ; i < dma->buf_count ; i++ ) { - buf = dma->buflist[i]; - buf_priv = buf->dev_private; - - entry = DRM(alloc)( sizeof(drm_radeon_freelist_t), - DRM_MEM_DRIVER ); - if ( !entry ) return -ENOMEM; - - entry->age = RADEON_BUFFER_FREE; - entry->buf = buf; - entry->prev = dev_priv->head; - entry->next = dev_priv->head->next; - if ( !entry->next ) - dev_priv->tail = entry; - buf_priv->discard = 0; - buf_priv->dispatched = 0; - buf_priv->list_entry = entry; - - dev_priv->head->next = entry; - - if ( dev_priv->head->next ) - dev_priv->head->next->prev = entry; - } - - return 0; - -} -#endif +/* Original comment: FIXME: ROTATE_BUFS is a hack to cycle through + * bufs until freelist code is used. Note this hides a problem with + * the scratch register * (used to keep track of last buffer + * completed) being written to before * the last buffer has actually + * completed rendering. + * + * KW: It's also a good way to find free buffers quickly. + */ drm_buf_t *radeon_freelist_get( drm_device_t *dev ) { @@ -1270,57 +1181,24 @@ drm_buf_t *radeon_freelist_get( drm_device_t *dev ) drm_radeon_buf_priv_t *buf_priv; drm_buf_t *buf; int i, t; -#if ROTATE_BUFS int start; -#endif - - /* FIXME: Optimize -- use freelist code */ - for ( i = 0 ; i < dma->buf_count ; i++ ) { - buf = dma->buflist[i]; - buf_priv = buf->dev_private; - if ( buf->pid == 0 ) { - DRM_DEBUG( " ret buf=%d last=%d pid=0\n", - buf->idx, dev_priv->last_buf ); - return buf; - } - DRM_DEBUG( " skipping buf=%d pid=%d\n", - buf->idx, buf->pid ); - } - -#if ROTATE_BUFS if ( ++dev_priv->last_buf >= dma->buf_count ) dev_priv->last_buf = 0; + start = dev_priv->last_buf; -#endif + for ( t = 0 ; t < dev_priv->usec_timeout ; t++ ) { -#if 0 - /* FIXME: Disable this for now */ - u32 done_age = dev_priv->scratch[RADEON_LAST_DISPATCH]; -#else u32 done_age = RADEON_READ( RADEON_LAST_DISPATCH_REG ); -#endif -#if ROTATE_BUFS for ( i = start ; i < dma->buf_count ; i++ ) { -#else - for ( i = 0 ; i < dma->buf_count ; i++ ) { -#endif buf = dma->buflist[i]; buf_priv = buf->dev_private; - if ( buf->pending && buf_priv->age <= done_age ) { - /* The buffer has been processed, so it - * can now be used. - */ + if ( buf->pid == 0 || (buf->pending && + buf_priv->age <= done_age) ) { buf->pending = 0; - DRM_DEBUG( " ret buf=%d last=%d age=%d done=%d\n", buf->idx, dev_priv->last_buf, buf_priv->age, done_age ); return buf; } - DRM_DEBUG( " skipping buf=%d age=%d done=%d\n", - buf->idx, buf_priv->age, - done_age ); -#if ROTATE_BUFS start = 0; -#endif } udelay( 1 ); } @@ -1332,14 +1210,10 @@ drm_buf_t *radeon_freelist_get( drm_device_t *dev ) void radeon_freelist_reset( drm_device_t *dev ) { drm_device_dma_t *dma = dev->dma; -#if ROTATE_BUFS drm_radeon_private_t *dev_priv = dev->dev_private; -#endif int i; -#if ROTATE_BUFS dev_priv->last_buf = 0; -#endif for ( i = 0 ; i < dma->buf_count ; i++ ) { drm_buf_t *buf = dma->buflist[i]; drm_radeon_buf_priv_t *buf_priv = buf->dev_private; diff --git a/linux/radeon_drm.h b/linux/radeon_drm.h index 6774b2bc..dd24d429 100644 --- a/linux/radeon_drm.h +++ b/linux/radeon_drm.h @@ -2,6 +2,7 @@ * * Copyright 2000 Precision Insight, Inc., Cedar Park, Texas. * Copyright 2000 VA Linux Systems, Inc., Fremont, California. + * Copyright 2002 Tungsten Graphics, Inc., Cedar Park, Texas. * All rights reserved. * * Permission is hereby granted, free of charge, to any person obtaining a @@ -38,7 +39,8 @@ #ifndef __RADEON_SAREA_DEFINES__ #define __RADEON_SAREA_DEFINES__ -/* What needs to be changed for the current vertex buffer? +/* Old style state flags, required for sarea interface (1.1 and 1.2 + * clears) and 1.2 drm_vertex2 ioctl. */ #define RADEON_UPLOAD_CONTEXT 0x00000001 #define RADEON_UPLOAD_VERTFMT 0x00000002 @@ -58,8 +60,68 @@ #define RADEON_UPLOAD_CLIPRECTS 0x00008000 /* handled client-side */ #define RADEON_REQUIRE_QUIESCENCE 0x00010000 #define RADEON_UPLOAD_ZBIAS 0x00020000 /* version 1.2 and newer */ -#define RADEON_UPLOAD_ALL 0x0002ffff -#define RADEON_UPLOAD_CONTEXT_ALL 0x000201ff +#define RADEON_UPLOAD_ALL 0x003effff +#define RADEON_UPLOAD_CONTEXT_ALL 0x003e01ff + + +/* New style per-packet identifiers for use in cmd_buffer ioctl with + * the RADEON_EMIT_PACKET command. Comments relate new packets to old + * state bits and the packet size: + */ +#define RADEON_EMIT_PP_MISC 0 /* context/7 */ +#define RADEON_EMIT_PP_CNTL 1 /* context/3 */ +#define RADEON_EMIT_RB3D_COLORPITCH 2 /* context/1 */ +#define RADEON_EMIT_RE_LINE_PATTERN 3 /* line/2 */ +#define RADEON_EMIT_SE_LINE_WIDTH 4 /* line/1 */ +#define RADEON_EMIT_PP_LUM_MATRIX 5 /* bumpmap/1 */ +#define RADEON_EMIT_PP_ROT_MATRIX_0 6 /* bumpmap/2 */ +#define RADEON_EMIT_RB3D_STENCILREFMASK 7 /* masks/3 */ +#define RADEON_EMIT_SE_VPORT_XSCALE 8 /* viewport/6 */ +#define RADEON_EMIT_SE_CNTL 9 /* setup/2 */ +#define RADEON_EMIT_SE_CNTL_STATUS 10 /* setup/1 */ +#define RADEON_EMIT_RE_MISC 11 /* misc/1 */ +#define RADEON_EMIT_PP_TXFILTER_0 12 /* tex0/6 */ +#define RADEON_EMIT_PP_BORDER_COLOR_0 13 /* tex0/1 */ +#define RADEON_EMIT_PP_TXFILTER_1 14 /* tex1/6 */ +#define RADEON_EMIT_PP_BORDER_COLOR_1 15 /* tex1/1 */ +#define RADEON_EMIT_PP_TXFILTER_2 16 /* tex2/6 */ +#define RADEON_EMIT_PP_BORDER_COLOR_2 17 /* tex2/1 */ +#define RADEON_EMIT_SE_ZBIAS_FACTOR 18 /* zbias/2 */ +#define RADEON_EMIT_SE_TCL_OUTPUT_VTX_FMT 19 /* tcl/11 */ +#define RADEON_EMIT_SE_TCL_MATERIAL_EMMISSIVE_RED 20 /* material/17 */ +#define RADEON_MAX_STATE_PACKETS 21 + + +/* Commands understood by cmd_buffer ioctl. More can be added but + * obviously these can't be removed or changed: + */ +#define RADEON_CMD_PACKET 1 /* emit one of the register packets above */ +#define RADEON_CMD_SCALARS 2 /* emit scalar data */ +#define RADEON_CMD_VECTORS 3 /* emit vector data */ +#define RADEON_CMD_DMA_DISCARD 4 /* discard current dma buf */ +#define RADEON_CMD_PACKET3 5 /* emit hw packet */ +#define RADEON_CMD_PACKET3_CLIP 6 /* emit hw packet wrapped in cliprects */ + + +typedef union { + int i; + struct { + char cmd_type, pad0, pad1, pad2; + } header; + struct { + char cmd_type, packet_id, pad0, pad1; + } packet; + struct { + char cmd_type, offset, stride, count; + } scalars; + struct { + char cmd_type, offset, stride, count; + } vectors; + struct { + char cmd_type, buf_idx, pad0, pad1; + } dma; +} drm_radeon_cmd_header_t; + #define RADEON_FRONT 0x1 #define RADEON_BACK 0x2 @@ -82,7 +144,6 @@ /* Byte offsets for indirect buffer data */ #define RADEON_INDEX_PRIM_OFFSET 20 -#define RADEON_HOSTDATA_BLIT_OFFSET 32 #define RADEON_SCRATCH_REG_OFFSET 32 @@ -181,8 +242,6 @@ typedef struct { unsigned int pp_border_color; } drm_radeon_texture_regs_t; -/* Space is crucial; there is some redunancy here: - */ typedef struct { unsigned int start; unsigned int finish; @@ -192,6 +251,7 @@ typedef struct { unsigned int vc_format; /* vertex format */ } drm_radeon_prim_t; + typedef struct { drm_radeon_context_regs_t context; drm_radeon_texture_regs_t tex[RADEON_MAX_TEXTURE_UNITS]; @@ -231,6 +291,8 @@ typedef struct { drm_radeon_tex_region_t tex_list[RADEON_NR_TEX_HEAPS][RADEON_NR_TEX_REGIONS+1]; int tex_age[RADEON_NR_TEX_HEAPS]; int ctx_owner; + int pfState; /* number of 3d windows (0,1,2ormore) */ + int pfCurrentPage; /* which buffer is being displayed? */ } drm_radeon_sarea_t; @@ -258,6 +320,9 @@ typedef struct { #define DRM_IOCTL_RADEON_INDIRECT DRM_IOWR(0x4d, drm_radeon_indirect_t) #define DRM_IOCTL_RADEON_TEXTURE DRM_IOWR(0x4e, drm_radeon_texture_t) #define DRM_IOCTL_RADEON_VERTEX2 DRM_IOW( 0x4f, drm_radeon_vertex_t) +#define DRM_IOCTL_RADEON_CMDBUF DRM_IOW( 0x50, drm_radeon_cmd_buffer_t) +#define DRM_IOCTL_RADEON_GETPARAM DRM_IOWR(0x51, drm_radeon_getparam_t) +#define DRM_IOCTL_RADEON_FLIP DRM_IO( 0x52) typedef struct drm_radeon_init { enum { @@ -324,6 +389,18 @@ typedef struct drm_radeon_vertex { int discard; /* Client finished with buffer? */ } drm_radeon_vertex_t; +typedef struct drm_radeon_indices { + int prim; + int idx; + int start; + int end; + int discard; /* Client finished with buffer? */ +} drm_radeon_indices_t; + +/* v1.2 - obsoletes drm_radeon_vertex and drm_radeon_indices + * - allows multiple primitives and state changes in a single ioctl + * - supports driver change to emit native primitives + */ typedef struct drm_radeon_vertex2 { int idx; /* Index of vertex buffer */ int discard; /* Client finished with buffer? */ @@ -333,13 +410,22 @@ typedef struct drm_radeon_vertex2 { drm_radeon_prim_t *prim; } drm_radeon_vertex2_t; -typedef struct drm_radeon_indices { - int prim; - int idx; - int start; - int end; - int discard; /* Client finished with buffer? */ -} drm_radeon_indices_t; +/* v1.3 - obsoletes drm_radeon_vertex2 + * - allows arbitarily large cliprect list + * - allows updating of tcl packet, vector and scalar state + * - allows memory-efficient description of state updates + * - allows state to be emitted without a primitive + * (for clears, ctx switches) + * - allows more than one dma buffer to be referenced per ioctl + * - supports tcl driver + * - may be extended in future versions with new cmd types, packets + */ +typedef struct drm_radeon_cmd_buffer { + int bufsz; + char *buf; + int nbox; + drm_clip_rect_t *boxes; +} drm_radeon_cmd_buffer_t; typedef struct drm_radeon_tex_image { unsigned int x, y; /* Blit coordinates */ @@ -367,4 +453,15 @@ typedef struct drm_radeon_indirect { int discard; } drm_radeon_indirect_t; + +/* 1.3: An ioctl to get parameters that aren't available to the 3d + * client any other way. + */ +#define RADEON_PARAM_AGP_BUFFER_OFFSET 0x1 + +typedef struct drm_radeon_getparam { + int param; + int *value; +} drm_radeon_getparam_t; + #endif diff --git a/linux/radeon_drv.c b/linux/radeon_drv.c index 135dd184..e4af560b 100644 --- a/linux/radeon_drv.c +++ b/linux/radeon_drv.c @@ -39,10 +39,10 @@ #define DRIVER_NAME "radeon" #define DRIVER_DESC "ATI Radeon" -#define DRIVER_DATE "20020602" +#define DRIVER_DATE "20020611" #define DRIVER_MAJOR 1 -#define DRIVER_MINOR 2 +#define DRIVER_MINOR 3 #define DRIVER_PATCHLEVEL 1 /* Interface history: @@ -51,6 +51,10 @@ * 1.2 - Add vertex2 ioctl (keith) * - Add stencil capability to clear ioctl (gareth, keith) * - Increase MAX_TEXTURE_LEVELS (brian) + * 1.3 - Add cmdbuf ioctl (keith) + * - Add support for new radeon packets (keith) + * - Add getparam ioctl (keith) + * - Add flip-buffers ioctl, deprecate fullscreen foo (keith). */ #define DRIVER_IOCTLS \ [DRM_IOCTL_NR(DRM_IOCTL_DMA)] = { radeon_cp_buffers, 1, 0 }, \ @@ -68,17 +72,10 @@ [DRM_IOCTL_NR(DRM_IOCTL_RADEON_TEXTURE)] = { radeon_cp_texture, 1, 0 }, \ [DRM_IOCTL_NR(DRM_IOCTL_RADEON_STIPPLE)] = { radeon_cp_stipple, 1, 0 }, \ [DRM_IOCTL_NR(DRM_IOCTL_RADEON_INDIRECT)] = { radeon_cp_indirect, 1, 1 }, \ - [DRM_IOCTL_NR(DRM_IOCTL_RADEON_VERTEX2)] = { radeon_cp_vertex2, 1, 0 }, - - -#if 0 -/* GH: Count data sent to card via ring or vertex/indirect buffers. - */ -#define __HAVE_COUNTERS 3 -#define __HAVE_COUNTER6 _DRM_STAT_IRQ -#define __HAVE_COUNTER7 _DRM_STAT_PRIMARY -#define __HAVE_COUNTER8 _DRM_STAT_SECONDARY -#endif + [DRM_IOCTL_NR(DRM_IOCTL_RADEON_VERTEX2)] = { radeon_cp_vertex2, 1, 0 }, \ + [DRM_IOCTL_NR(DRM_IOCTL_RADEON_CMDBUF)] = { radeon_cp_cmdbuf, 1, 0 }, \ + [DRM_IOCTL_NR(DRM_IOCTL_RADEON_GETPARAM)] = { radeon_cp_getparam, 1, 0 }, \ + [DRM_IOCTL_NR(DRM_IOCTL_RADEON_FLIP)] = { radeon_cp_flip, 1, 0 }, #include "drm_agpsupport.h" diff --git a/linux/radeon_drv.h b/linux/radeon_drv.h index d6a90078..ba9f8de9 100644 --- a/linux/radeon_drv.h +++ b/linux/radeon_drv.h @@ -74,14 +74,7 @@ typedef struct drm_radeon_private { drm_radeon_freelist_t *head; drm_radeon_freelist_t *tail; -/* FIXME: ROTATE_BUFS is a hask to cycle through bufs until freelist - code is used. Note this hides a problem with the scratch register - (used to keep track of last buffer completed) being written to before - the last buffer has actually completed rendering. */ -#define ROTATE_BUFS 1 -#if ROTATE_BUFS int last_buf; -#endif volatile u32 *scratch; int usec_timeout; @@ -123,10 +116,6 @@ typedef struct drm_radeon_private { typedef struct drm_radeon_buf_priv { u32 age; - int prim; - int discard; - int dispatched; - drm_radeon_freelist_t *list_entry; } drm_radeon_buf_priv_t; /* radeon_cp.c */ @@ -181,6 +170,13 @@ extern int radeon_cp_indirect( struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg ); extern int radeon_cp_vertex2( struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg ); +extern int radeon_cp_cmdbuf( struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg ); +extern int radeon_cp_getparam( struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg ); +extern int radeon_cp_flip( struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg ); + /* Register definitions, register access macros and drmAddMap constants @@ -209,8 +205,6 @@ extern int radeon_cp_vertex2( struct inode *inode, struct file *filp, # define RADEON_CRTC_OFFSET_FLIP_CNTL (1 << 16) #define RADEON_RB3D_COLORPITCH 0x1c48 -#define RADEON_RB3D_DEPTHCLEARVALUE 0x1c30 -#define RADEON_RB3D_DEPTHXY_OFFSET 0x1c60 #define RADEON_DP_GUI_MASTER_CNTL 0x146c # define RADEON_GMC_SRC_PITCH_OFFSET_CNTL (1 << 0) @@ -301,9 +295,6 @@ extern int radeon_cp_vertex2( struct inode *inode, struct file *filp, # define RADEON_ROP_ENABLE (1 << 6) # define RADEON_STENCIL_ENABLE (1 << 7) # define RADEON_Z_ENABLE (1 << 8) -# define RADEON_DEPTH_XZ_OFFEST_ENABLE (1 << 9) -# define RADEON_ZBLOCK8 (0 << 15) -# define RADEON_ZBLOCK16 (1 << 15) #define RADEON_RB3D_DEPTHOFFSET 0x1c24 #define RADEON_RB3D_PLANEMASK 0x1d84 #define RADEON_RB3D_STENCILREFMASK 0x1d7c @@ -369,6 +360,15 @@ extern int radeon_cp_vertex2( struct inode *inode, struct file *filp, #define RADEON_SE_LINE_WIDTH 0x1db8 #define RADEON_SE_VPORT_XSCALE 0x1d98 #define RADEON_SE_ZBIAS_FACTOR 0x1db0 +#define RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED 0x2210 +#define RADEON_SE_TCL_OUTPUT_VTX_FMT 0x2254 +#define RADEON_SE_TCL_VECTOR_INDX_REG 0x2200 +# define RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT 16 +# define RADEON_VEC_INDX_DWORD_COUNT_SHIFT 28 +#define RADEON_SE_TCL_VECTOR_DATA_REG 0x2204 +#define RADEON_SE_TCL_SCALAR_INDX_REG 0x2208 +# define RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT 16 +#define RADEON_SE_TCL_SCALAR_DATA_REG 0x220C #define RADEON_SURFACE_ACCESS_FLAGS 0x0bf8 #define RADEON_SURFACE_ACCESS_CLR 0x0bfc #define RADEON_SURFACE_CNTL 0x0b00 @@ -470,8 +470,10 @@ extern int radeon_cp_vertex2( struct inode *inode, struct file *filp, #define RADEON_CP_PACKET3 0xC0000000 # define RADEON_3D_RNDR_GEN_INDX_PRIM 0x00002300 # define RADEON_WAIT_FOR_IDLE 0x00002600 +# define RADEON_3D_DRAW_VBUF 0x00002800 # define RADEON_3D_DRAW_IMMD 0x00002900 -# define RADEON_3D_CLEAR_ZMASK 0x00003200 +# define RADEON_3D_DRAW_INDX 0x00002A00 +# define RADEON_3D_LOAD_VBPNTR 0x00002F00 # define RADEON_CNTL_HOSTDATA_BLT 0x00009400 # define RADEON_CNTL_PAINT_MULTI 0x00009A00 # define RADEON_CNTL_BITBLT_MULTI 0x00009B00 @@ -483,6 +485,7 @@ extern int radeon_cp_vertex2( struct inode *inode, struct file *filp, #define RADEON_CP_PACKET1_REG1_MASK 0x003ff800 #define RADEON_VTX_Z_PRESENT (1 << 31) +#define RADEON_VTX_PKCOLOR_PRESENT (1 << 3) #define RADEON_PRIM_TYPE_NONE (0 << 0) #define RADEON_PRIM_TYPE_POINT (1 << 0) @@ -696,7 +699,7 @@ do { \ #define RADEON_VERBOSE 0 -#define RING_LOCALS int write; unsigned int mask; volatile u32 *ring; +#define RING_LOCALS int write, _nr; unsigned int mask; volatile u32 *ring; #define BEGIN_RING( n ) do { \ if ( RADEON_VERBOSE ) { \ @@ -704,9 +707,10 @@ do { \ n, __FUNCTION__ ); \ } \ if ( dev_priv->ring.space <= (n) * sizeof(u32) ) { \ + COMMIT_RING(); \ radeon_wait_ring( dev_priv, (n) * sizeof(u32) ); \ } \ - dev_priv->ring.space -= (n) * sizeof(u32); \ + _nr = n; dev_priv->ring.space -= (n) * sizeof(u32); \ ring = dev_priv->ring.start; \ write = dev_priv->ring.tail; \ mask = dev_priv->ring.tail_mask; \ @@ -717,9 +721,17 @@ do { \ DRM_INFO( "ADVANCE_RING() wr=0x%06x tail=0x%06x\n", \ write, dev_priv->ring.tail ); \ } \ - radeon_flush_write_combine(); \ - dev_priv->ring.tail = write; \ - RADEON_WRITE( RADEON_CP_RB_WPTR, write ); \ + if (((dev_priv->ring.tail + _nr) & mask) != write) { \ + DRM_ERROR( \ + "ADVANCE_RING(): mismatch: nr: %x write: %x\n", \ + ((dev_priv->ring.tail + _nr) & mask), \ + write); \ + } else \ + dev_priv->ring.tail = write; \ +} while (0) + +#define COMMIT_RING() do { \ + RADEON_WRITE( RADEON_CP_RB_WPTR, dev_priv->ring.tail ); \ } while (0) #define OUT_RING( x ) do { \ @@ -736,6 +748,30 @@ do { \ OUT_RING( val ); \ } while (0) + +#define OUT_RING_USER_TABLE( tab, sz ) do { \ + int _size = (sz); \ + int *_tab = (tab); \ + \ + if (write + _size > mask) { \ + int i = (mask+1) - write; \ + if (__copy_from_user( (int *)(ring+write), \ + _tab, i*4 )) \ + return -EFAULT; \ + write = 0; \ + _size -= i; \ + _tab += i; \ + } \ + \ + if (_size && __copy_from_user( (int *)(ring+write), \ + _tab, _size*4 )) \ + return -EFAULT; \ + \ + write += _size; \ + write &= mask; \ +} while (0) + + #define RADEON_PERFORMANCE_BOXES 0 #endif /* __RADEON_DRV_H__ */ diff --git a/linux/radeon_state.c b/linux/radeon_state.c index cc518a0e..08a8f8a8 100644 --- a/linux/radeon_state.c +++ b/linux/radeon_state.c @@ -49,329 +49,210 @@ static inline void radeon_emit_clip_rect( drm_radeon_private_t *dev_priv, box->x1, box->y1, box->x2, box->y2 ); BEGIN_RING( 4 ); - OUT_RING( CP_PACKET0( RADEON_RE_TOP_LEFT, 0 ) ); OUT_RING( (box->y1 << 16) | box->x1 ); - OUT_RING( CP_PACKET0( RADEON_RE_WIDTH_HEIGHT, 0 ) ); - OUT_RING( ((box->y2 - 1) << 16) | (box->x2 - 1) ); - - ADVANCE_RING(); -} - -static inline void radeon_emit_context( drm_radeon_private_t *dev_priv, - drm_radeon_context_regs_t *ctx ) -{ - RING_LOCALS; - DRM_DEBUG( " %s\n", __FUNCTION__ ); - - BEGIN_RING( 14 ); - - OUT_RING( CP_PACKET0( RADEON_PP_MISC, 6 ) ); - OUT_RING( ctx->pp_misc ); - OUT_RING( ctx->pp_fog_color ); - OUT_RING( ctx->re_solid_color ); - OUT_RING( ctx->rb3d_blendcntl ); - OUT_RING( ctx->rb3d_depthoffset ); - OUT_RING( ctx->rb3d_depthpitch ); - OUT_RING( ctx->rb3d_zstencilcntl ); - - OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 2 ) ); - OUT_RING( ctx->pp_cntl ); - OUT_RING( ctx->rb3d_cntl ); - OUT_RING( ctx->rb3d_coloroffset ); - - OUT_RING( CP_PACKET0( RADEON_RB3D_COLORPITCH, 0 ) ); - OUT_RING( ctx->rb3d_colorpitch ); - - ADVANCE_RING(); -} - -static inline void radeon_emit_vertfmt( drm_radeon_private_t *dev_priv, - drm_radeon_context_regs_t *ctx ) -{ - RING_LOCALS; - DRM_DEBUG( " %s\n", __FUNCTION__ ); - - BEGIN_RING( 2 ); - - OUT_RING( CP_PACKET0( RADEON_SE_COORD_FMT, 0 ) ); - OUT_RING( ctx->se_coord_fmt ); - - ADVANCE_RING(); -} - -static inline void radeon_emit_line( drm_radeon_private_t *dev_priv, - drm_radeon_context_regs_t *ctx ) -{ - RING_LOCALS; -/* printk( " %s %x %x %x\n", __FUNCTION__, */ -/* ctx->re_line_pattern, */ -/* ctx->re_line_state, */ -/* ctx->se_line_width); */ - - BEGIN_RING( 5 ); - - OUT_RING( CP_PACKET0( RADEON_RE_LINE_PATTERN, 1 ) ); - OUT_RING( ctx->re_line_pattern ); - OUT_RING( ctx->re_line_state ); - - OUT_RING( CP_PACKET0( RADEON_SE_LINE_WIDTH, 0 ) ); - OUT_RING( ctx->se_line_width ); - - ADVANCE_RING(); -} - -static inline void radeon_emit_bumpmap( drm_radeon_private_t *dev_priv, - drm_radeon_context_regs_t *ctx ) -{ - RING_LOCALS; - DRM_DEBUG( " %s\n", __FUNCTION__ ); - - BEGIN_RING( 5 ); - - OUT_RING( CP_PACKET0( RADEON_PP_LUM_MATRIX, 0 ) ); - OUT_RING( ctx->pp_lum_matrix ); - - OUT_RING( CP_PACKET0( RADEON_PP_ROT_MATRIX_0, 1 ) ); - OUT_RING( ctx->pp_rot_matrix_0 ); - OUT_RING( ctx->pp_rot_matrix_1 ); - - ADVANCE_RING(); -} - -static inline void radeon_emit_masks( drm_radeon_private_t *dev_priv, - drm_radeon_context_regs_t *ctx ) -{ - RING_LOCALS; - DRM_DEBUG( " %s\n", __FUNCTION__ ); - - BEGIN_RING( 4 ); - - OUT_RING( CP_PACKET0( RADEON_RB3D_STENCILREFMASK, 2 ) ); - OUT_RING( ctx->rb3d_stencilrefmask ); - OUT_RING( ctx->rb3d_ropcntl ); - OUT_RING( ctx->rb3d_planemask ); - - ADVANCE_RING(); -} - -static inline void radeon_emit_viewport( drm_radeon_private_t *dev_priv, - drm_radeon_context_regs_t *ctx ) -{ - RING_LOCALS; - DRM_DEBUG( " %s\n", __FUNCTION__ ); - - BEGIN_RING( 7 ); - - OUT_RING( CP_PACKET0( RADEON_SE_VPORT_XSCALE, 5 ) ); - OUT_RING( ctx->se_vport_xscale ); - OUT_RING( ctx->se_vport_xoffset ); - OUT_RING( ctx->se_vport_yscale ); - OUT_RING( ctx->se_vport_yoffset ); - OUT_RING( ctx->se_vport_zscale ); - OUT_RING( ctx->se_vport_zoffset ); - ADVANCE_RING(); -} - -static inline void radeon_emit_setup( drm_radeon_private_t *dev_priv, - drm_radeon_context_regs_t *ctx ) -{ - RING_LOCALS; - DRM_DEBUG( " %s\n", __FUNCTION__ ); - - BEGIN_RING( 4 ); - - OUT_RING( CP_PACKET0( RADEON_SE_CNTL, 0 ) ); - OUT_RING( ctx->se_cntl ); - OUT_RING( CP_PACKET0( RADEON_SE_CNTL_STATUS, 0 ) ); - OUT_RING( ctx->se_cntl_status ); - - ADVANCE_RING(); -} - - -static inline void radeon_emit_misc( drm_radeon_private_t *dev_priv, - drm_radeon_context_regs_t *ctx ) -{ - RING_LOCALS; - DRM_DEBUG( " %s\n", __FUNCTION__ ); - - BEGIN_RING( 2 ); - - OUT_RING( CP_PACKET0( RADEON_RE_MISC, 0 ) ); - OUT_RING( ctx->re_misc ); - - ADVANCE_RING(); -} - -static inline void radeon_emit_tex0( drm_radeon_private_t *dev_priv, - drm_radeon_texture_regs_t *tex ) -{ - RING_LOCALS; - DRM_DEBUG( " %s: offset=0x%x\n", __FUNCTION__, tex->pp_txoffset ); - - BEGIN_RING( 9 ); - - OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_0, 5 ) ); - OUT_RING( tex->pp_txfilter ); - OUT_RING( tex->pp_txformat ); - OUT_RING( tex->pp_txoffset ); - OUT_RING( tex->pp_txcblend ); - OUT_RING( tex->pp_txablend ); - OUT_RING( tex->pp_tfactor ); - - OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_0, 0 ) ); - OUT_RING( tex->pp_border_color ); - - ADVANCE_RING(); -} - -static inline void radeon_emit_tex1( drm_radeon_private_t *dev_priv, - drm_radeon_texture_regs_t *tex ) -{ - RING_LOCALS; - DRM_DEBUG( " %s: offset=0x%x\n", __FUNCTION__, tex->pp_txoffset ); - - BEGIN_RING( 9 ); - - OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_1, 5 ) ); - OUT_RING( tex->pp_txfilter ); - OUT_RING( tex->pp_txformat ); - OUT_RING( tex->pp_txoffset ); - OUT_RING( tex->pp_txcblend ); - OUT_RING( tex->pp_txablend ); - OUT_RING( tex->pp_tfactor ); - - OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_1, 0 ) ); - OUT_RING( tex->pp_border_color ); - +/* OUT_RING( ((box->y2 - 1) << 16) | (box->x2 - 1) );*/ + OUT_RING( (box->y2 << 16) | box->x2 ); ADVANCE_RING(); } -static inline void radeon_emit_tex2( drm_radeon_private_t *dev_priv, - drm_radeon_texture_regs_t *tex ) +/* Emit 1.1 state + */ +static void radeon_emit_state( drm_radeon_private_t *dev_priv, + drm_radeon_context_regs_t *ctx, + drm_radeon_texture_regs_t *tex, + unsigned int dirty ) { RING_LOCALS; - DRM_DEBUG( " %s\n", __FUNCTION__ ); - - BEGIN_RING( 9 ); - - OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_2, 5 ) ); - OUT_RING( tex->pp_txfilter ); - OUT_RING( tex->pp_txformat ); - OUT_RING( tex->pp_txoffset ); - OUT_RING( tex->pp_txcblend ); - OUT_RING( tex->pp_txablend ); - OUT_RING( tex->pp_tfactor ); - - OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_2, 0 ) ); - OUT_RING( tex->pp_border_color ); - - ADVANCE_RING(); -} - -#if 0 -static void radeon_print_dirty( const char *msg, unsigned int flags ) -{ - DRM_DEBUG( "%s: (0x%x) %s%s%s%s%s%s%s%s%s%s%s%s%s\n", - msg, - flags, - (flags & RADEON_UPLOAD_CONTEXT) ? "context, " : "", - (flags & RADEON_UPLOAD_VERTFMT) ? "vertfmt, " : "", - (flags & RADEON_UPLOAD_LINE) ? "line, " : "", - (flags & RADEON_UPLOAD_BUMPMAP) ? "bumpmap, " : "", - (flags & RADEON_UPLOAD_MASKS) ? "masks, " : "", - (flags & RADEON_UPLOAD_VIEWPORT) ? "viewport, " : "", - (flags & RADEON_UPLOAD_SETUP) ? "setup, " : "", - (flags & RADEON_UPLOAD_MISC) ? "misc, " : "", - (flags & RADEON_UPLOAD_TEX0) ? "tex0, " : "", - (flags & RADEON_UPLOAD_TEX1) ? "tex1, " : "", - (flags & RADEON_UPLOAD_TEX2) ? "tex2, " : "", - (flags & RADEON_UPLOAD_CLIPRECTS) ? "cliprects, " : "", - (flags & RADEON_REQUIRE_QUIESCENCE) ? "quiescence, " : "" ); -} -#endif - -static inline void radeon_emit_state( drm_radeon_private_t *dev_priv, - drm_radeon_context_regs_t *ctx, - drm_radeon_texture_regs_t *tex, - unsigned int dirty ) -{ DRM_DEBUG( "%s: dirty=0x%08x\n", __FUNCTION__, dirty ); if ( dirty & RADEON_UPLOAD_CONTEXT ) { - radeon_emit_context( dev_priv, ctx ); + BEGIN_RING( 14 ); + OUT_RING( CP_PACKET0( RADEON_PP_MISC, 6 ) ); + OUT_RING( ctx->pp_misc ); + OUT_RING( ctx->pp_fog_color ); + OUT_RING( ctx->re_solid_color ); + OUT_RING( ctx->rb3d_blendcntl ); + OUT_RING( ctx->rb3d_depthoffset ); + OUT_RING( ctx->rb3d_depthpitch ); + OUT_RING( ctx->rb3d_zstencilcntl ); + OUT_RING( CP_PACKET0( RADEON_PP_CNTL, 2 ) ); + OUT_RING( ctx->pp_cntl ); + OUT_RING( ctx->rb3d_cntl ); + OUT_RING( ctx->rb3d_coloroffset ); + OUT_RING( CP_PACKET0( RADEON_RB3D_COLORPITCH, 0 ) ); + OUT_RING( ctx->rb3d_colorpitch ); + ADVANCE_RING(); } if ( dirty & RADEON_UPLOAD_VERTFMT ) { - radeon_emit_vertfmt( dev_priv, ctx ); + BEGIN_RING( 2 ); + OUT_RING( CP_PACKET0( RADEON_SE_COORD_FMT, 0 ) ); + OUT_RING( ctx->se_coord_fmt ); + ADVANCE_RING(); } if ( dirty & RADEON_UPLOAD_LINE ) { - radeon_emit_line( dev_priv, ctx ); + BEGIN_RING( 5 ); + OUT_RING( CP_PACKET0( RADEON_RE_LINE_PATTERN, 1 ) ); + OUT_RING( ctx->re_line_pattern ); + OUT_RING( ctx->re_line_state ); + OUT_RING( CP_PACKET0( RADEON_SE_LINE_WIDTH, 0 ) ); + OUT_RING( ctx->se_line_width ); + ADVANCE_RING(); } if ( dirty & RADEON_UPLOAD_BUMPMAP ) { - radeon_emit_bumpmap( dev_priv, ctx ); + BEGIN_RING( 5 ); + OUT_RING( CP_PACKET0( RADEON_PP_LUM_MATRIX, 0 ) ); + OUT_RING( ctx->pp_lum_matrix ); + OUT_RING( CP_PACKET0( RADEON_PP_ROT_MATRIX_0, 1 ) ); + OUT_RING( ctx->pp_rot_matrix_0 ); + OUT_RING( ctx->pp_rot_matrix_1 ); + ADVANCE_RING(); } if ( dirty & RADEON_UPLOAD_MASKS ) { - radeon_emit_masks( dev_priv, ctx ); + BEGIN_RING( 4 ); + OUT_RING( CP_PACKET0( RADEON_RB3D_STENCILREFMASK, 2 ) ); + OUT_RING( ctx->rb3d_stencilrefmask ); + OUT_RING( ctx->rb3d_ropcntl ); + OUT_RING( ctx->rb3d_planemask ); + ADVANCE_RING(); } if ( dirty & RADEON_UPLOAD_VIEWPORT ) { - radeon_emit_viewport( dev_priv, ctx ); + BEGIN_RING( 7 ); + OUT_RING( CP_PACKET0( RADEON_SE_VPORT_XSCALE, 5 ) ); + OUT_RING( ctx->se_vport_xscale ); + OUT_RING( ctx->se_vport_xoffset ); + OUT_RING( ctx->se_vport_yscale ); + OUT_RING( ctx->se_vport_yoffset ); + OUT_RING( ctx->se_vport_zscale ); + OUT_RING( ctx->se_vport_zoffset ); + ADVANCE_RING(); } if ( dirty & RADEON_UPLOAD_SETUP ) { - radeon_emit_setup( dev_priv, ctx ); + BEGIN_RING( 4 ); + OUT_RING( CP_PACKET0( RADEON_SE_CNTL, 0 ) ); + OUT_RING( ctx->se_cntl ); + OUT_RING( CP_PACKET0( RADEON_SE_CNTL_STATUS, 0 ) ); + OUT_RING( ctx->se_cntl_status ); + ADVANCE_RING(); } if ( dirty & RADEON_UPLOAD_MISC ) { - radeon_emit_misc( dev_priv, ctx ); + BEGIN_RING( 2 ); + OUT_RING( CP_PACKET0( RADEON_RE_MISC, 0 ) ); + OUT_RING( ctx->re_misc ); + ADVANCE_RING(); } if ( dirty & RADEON_UPLOAD_TEX0 ) { - radeon_emit_tex0( dev_priv, &tex[0] ); + BEGIN_RING( 9 ); + OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_0, 5 ) ); + OUT_RING( tex[0].pp_txfilter ); + OUT_RING( tex[0].pp_txformat ); + OUT_RING( tex[0].pp_txoffset ); + OUT_RING( tex[0].pp_txcblend ); + OUT_RING( tex[0].pp_txablend ); + OUT_RING( tex[0].pp_tfactor ); + OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_0, 0 ) ); + OUT_RING( tex[0].pp_border_color ); + ADVANCE_RING(); } if ( dirty & RADEON_UPLOAD_TEX1 ) { - radeon_emit_tex1( dev_priv, &tex[1] ); + BEGIN_RING( 9 ); + OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_1, 5 ) ); + OUT_RING( tex[1].pp_txfilter ); + OUT_RING( tex[1].pp_txformat ); + OUT_RING( tex[1].pp_txoffset ); + OUT_RING( tex[1].pp_txcblend ); + OUT_RING( tex[1].pp_txablend ); + OUT_RING( tex[1].pp_tfactor ); + OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_1, 0 ) ); + OUT_RING( tex[1].pp_border_color ); + ADVANCE_RING(); } if ( dirty & RADEON_UPLOAD_TEX2 ) { - radeon_emit_tex2( dev_priv, &tex[2] ); + BEGIN_RING( 9 ); + OUT_RING( CP_PACKET0( RADEON_PP_TXFILTER_2, 5 ) ); + OUT_RING( tex[2].pp_txfilter ); + OUT_RING( tex[2].pp_txformat ); + OUT_RING( tex[2].pp_txoffset ); + OUT_RING( tex[2].pp_txcblend ); + OUT_RING( tex[2].pp_txablend ); + OUT_RING( tex[2].pp_tfactor ); + OUT_RING( CP_PACKET0( RADEON_PP_BORDER_COLOR_2, 0 ) ); + OUT_RING( tex[2].pp_border_color ); + ADVANCE_RING(); } } - - -static inline void radeon_emit_zbias( drm_radeon_private_t *dev_priv, - drm_radeon_context2_regs_t *ctx ) +/* Emit 1.2 state + */ +static void radeon_emit_state2( drm_radeon_private_t *dev_priv, + drm_radeon_state_t *state ) { RING_LOCALS; -/* printk( " %s %x %x\n", __FUNCTION__, */ -/* ctx->se_zbias_factor, */ -/* ctx->se_zbias_constant ); */ - - BEGIN_RING( 3 ); - OUT_RING( CP_PACKET0( RADEON_SE_ZBIAS_FACTOR, 1 ) ); - OUT_RING( ctx->se_zbias_factor ); - OUT_RING( ctx->se_zbias_constant ); - ADVANCE_RING(); -} -static inline void radeon_emit_state2( drm_radeon_private_t *dev_priv, - drm_radeon_state_t *state ) -{ - if (state->dirty & RADEON_UPLOAD_ZBIAS) - radeon_emit_zbias( dev_priv, &state->context2 ); + if (state->dirty & RADEON_UPLOAD_ZBIAS) { + BEGIN_RING( 3 ); + OUT_RING( CP_PACKET0( RADEON_SE_ZBIAS_FACTOR, 1 ) ); + OUT_RING( state->context2.se_zbias_factor ); + OUT_RING( state->context2.se_zbias_constant ); + ADVANCE_RING(); + } radeon_emit_state( dev_priv, &state->context, state->tex, state->dirty ); } +/* New (1.3) state mechanism. 3 commands (packet, scalar, vector) in + * 1.3 cmdbuffers allow all previous state to be updated as well as + * the tcl scalar and vector areas. + */ +static struct { + int start; + int len; + const char *name; +} packet[RADEON_MAX_STATE_PACKETS] = { + { RADEON_PP_MISC,7,"RADEON_PP_MISC" }, + { RADEON_PP_CNTL,3,"RADEON_PP_CNTL" }, + { RADEON_RB3D_COLORPITCH,1,"RADEON_RB3D_COLORPITCH" }, + { RADEON_RE_LINE_PATTERN,2,"RADEON_RE_LINE_PATTERN" }, + { RADEON_SE_LINE_WIDTH,1,"RADEON_SE_LINE_WIDTH" }, + { RADEON_PP_LUM_MATRIX,1,"RADEON_PP_LUM_MATRIX" }, + { RADEON_PP_ROT_MATRIX_0,2,"RADEON_PP_ROT_MATRIX_0" }, + { RADEON_RB3D_STENCILREFMASK,3,"RADEON_RB3D_STENCILREFMASK" }, + { RADEON_SE_VPORT_XSCALE,6,"RADEON_SE_VPORT_XSCALE" }, + { RADEON_SE_CNTL,2,"RADEON_SE_CNTL" }, + { RADEON_SE_CNTL_STATUS,1,"RADEON_SE_CNTL_STATUS" }, + { RADEON_RE_MISC,1,"RADEON_RE_MISC" }, + { RADEON_PP_TXFILTER_0,6,"RADEON_PP_TXFILTER_0" }, + { RADEON_PP_BORDER_COLOR_0,1,"RADEON_PP_BORDER_COLOR_0" }, + { RADEON_PP_TXFILTER_1,6,"RADEON_PP_TXFILTER_1" }, + { RADEON_PP_BORDER_COLOR_1,1,"RADEON_PP_BORDER_COLOR_1" }, + { RADEON_PP_TXFILTER_2,6,"RADEON_PP_TXFILTER_2" }, + { RADEON_PP_BORDER_COLOR_2,1,"RADEON_PP_BORDER_COLOR_2" }, + { RADEON_SE_ZBIAS_FACTOR,2,"RADEON_SE_ZBIAS_FACTOR" }, + { RADEON_SE_TCL_OUTPUT_VTX_FMT,11,"RADEON_SE_TCL_OUTPUT_VTX_FMT" }, + { RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED,17,"RADEON_SE_TCL_MATERIAL_EMMISSIVE_RED" }, +}; + + + + + + + + + + #if RADEON_PERFORMANCE_BOXES /* ================================================================ * Performance monitoring functions @@ -552,7 +433,7 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev, radeon_emit_clip_rect( dev_priv, &sarea_priv->boxes[i] ); - BEGIN_RING( 25 ); + BEGIN_RING( 28 ); RADEON_WAIT_UNTIL_2D_IDLE(); @@ -569,32 +450,32 @@ static void radeon_cp_dispatch_clear( drm_device_t *dev, OUT_RING_REG( RADEON_SE_CNTL, depth_clear->se_cntl ); - OUT_RING( CP_PACKET3( RADEON_3D_DRAW_IMMD, 10 ) ); - OUT_RING( RADEON_VTX_Z_PRESENT ); + /* Radeon 7500 doesn't like vertices without + * color. + */ + OUT_RING( CP_PACKET3( RADEON_3D_DRAW_IMMD, 13 ) ); + OUT_RING( RADEON_VTX_Z_PRESENT | + RADEON_VTX_PKCOLOR_PRESENT); OUT_RING( (RADEON_PRIM_TYPE_RECT_LIST | RADEON_PRIM_WALK_RING | RADEON_MAOS_ENABLE | RADEON_VTX_FMT_RADEON_MODE | (3 << RADEON_NUM_VERTICES_SHIFT)) ); -/* printk( "depth box %d: %x %x %x %x\n", */ -/* i, */ -/* depth_boxes[i].ui[CLEAR_X1], */ -/* depth_boxes[i].ui[CLEAR_Y1], */ -/* depth_boxes[i].ui[CLEAR_X2], */ -/* depth_boxes[i].ui[CLEAR_Y2]); */ - OUT_RING( depth_boxes[i].ui[CLEAR_X1] ); OUT_RING( depth_boxes[i].ui[CLEAR_Y1] ); OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] ); + OUT_RING( 0x0 ); OUT_RING( depth_boxes[i].ui[CLEAR_X1] ); OUT_RING( depth_boxes[i].ui[CLEAR_Y2] ); OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] ); + OUT_RING( 0x0 ); OUT_RING( depth_boxes[i].ui[CLEAR_X2] ); OUT_RING( depth_boxes[i].ui[CLEAR_Y2] ); OUT_RING( depth_boxes[i].ui[CLEAR_DEPTH] ); + OUT_RING( 0x0 ); ADVANCE_RING(); @@ -664,9 +545,17 @@ static void radeon_cp_dispatch_swap( drm_device_t *dev ) RADEON_DP_SRC_SOURCE_MEMORY | RADEON_GMC_CLR_CMP_CNTL_DIS | RADEON_GMC_WR_MSK_DIS ); - - OUT_RING( dev_priv->back_pitch_offset ); - OUT_RING( dev_priv->front_pitch_offset ); + + /* Make this work even if front & back are flipped: + */ + if (dev_priv->current_page == 0) { + OUT_RING( dev_priv->back_pitch_offset ); + OUT_RING( dev_priv->front_pitch_offset ); + } + else { + OUT_RING( dev_priv->front_pitch_offset ); + OUT_RING( dev_priv->back_pitch_offset ); + } OUT_RING( (x << 16) | y ); OUT_RING( (x << 16) | y ); @@ -701,11 +590,12 @@ static void radeon_cp_dispatch_flip( drm_device_t *dev ) radeon_cp_performance_boxes( dev_priv ); #endif - BEGIN_RING( 6 ); + BEGIN_RING( 4 ); RADEON_WAIT_UNTIL_3D_IDLE(); +/* RADEON_WAIT_UNTIL_PAGE_FLIPPED(); - +*/ OUT_RING( CP_PACKET0( RADEON_CRTC_OFFSET, 0 ) ); if ( dev_priv->current_page == 0 ) { @@ -723,6 +613,7 @@ static void radeon_cp_dispatch_flip( drm_device_t *dev ) * performing the swapbuffer ioctl. */ dev_priv->sarea_priv->last_frame++; + dev_priv->sarea_priv->pfCurrentPage = dev_priv->current_page; BEGIN_RING( 2 ); @@ -731,78 +622,75 @@ static void radeon_cp_dispatch_flip( drm_device_t *dev ) ADVANCE_RING(); } - -static void radeon_cp_dispatch_vertex( drm_device_t *dev, - drm_buf_t *buf, - drm_radeon_prim_t *prim ) +static int bad_prim_vertex_nr( int primitive, int nr ) { - drm_radeon_private_t *dev_priv = dev->dev_private; - drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv; - drm_radeon_buf_priv_t *buf_priv = buf->dev_private; - int offset = dev_priv->agp_buffers_offset + buf->offset + prim->start; - int numverts = (int)prim->numverts; - int i = 0; - RING_LOCALS; - - DRM_DEBUG( __FUNCTION__": nbox=%d %d..%d prim %x nvert %d\n", - sarea_priv->nbox, prim->start, prim->finish, - prim->prim, numverts ); - - switch (prim->prim & RADEON_PRIM_TYPE_MASK) { + switch (primitive & RADEON_PRIM_TYPE_MASK) { case RADEON_PRIM_TYPE_NONE: case RADEON_PRIM_TYPE_POINT: - if (prim->numverts < 1) { - DRM_ERROR( "Bad nr verts for line %d\n", - prim->numverts); - return; - } - break; + return nr < 1; case RADEON_PRIM_TYPE_LINE: - if ((prim->numverts & 1) || prim->numverts == 0) { - DRM_ERROR( "Bad nr verts for line %d\n", - prim->numverts); - return; - } - break; + return (nr & 1) || nr == 0; case RADEON_PRIM_TYPE_LINE_STRIP: - if (prim->numverts < 2) { - DRM_ERROR( "Bad nr verts for line_strip %d\n", - prim->numverts); - return; - } - break; + return nr < 2; case RADEON_PRIM_TYPE_TRI_LIST: case RADEON_PRIM_TYPE_3VRT_POINT_LIST: case RADEON_PRIM_TYPE_3VRT_LINE_LIST: case RADEON_PRIM_TYPE_RECT_LIST: - if (prim->numverts % 3 || prim->numverts == 0) { - DRM_ERROR( "Bad nr verts for tri %d\n", - prim->numverts); - return; - } - break; + return nr % 3 || nr == 0; case RADEON_PRIM_TYPE_TRI_FAN: case RADEON_PRIM_TYPE_TRI_STRIP: - if (prim->numverts < 3) { - DRM_ERROR( "Bad nr verts for strip/fan %d\n", - prim->numverts); - return; - } - break; + return nr < 3; default: - DRM_ERROR( "buffer prim %x start %x\n", - prim->prim, prim->start ); - return; + return 1; } +} + - buf_priv->dispatched = 1; +typedef struct { + unsigned int start; + unsigned int finish; + unsigned int prim; + unsigned int numverts; + unsigned int offset; + unsigned int vc_format; +} drm_radeon_tcl_prim_t; + +static void radeon_cp_dispatch_vertex( drm_device_t *dev, + drm_buf_t *buf, + drm_radeon_tcl_prim_t *prim, + drm_clip_rect_t *boxes, + int nbox ) + +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_clip_rect_t box; + int offset = dev_priv->agp_buffers_offset + buf->offset + prim->start; + int numverts = (int)prim->numverts; + int i = 0; + RING_LOCALS; + + DRM_DEBUG("%s: hwprim 0x%x vfmt 0x%x %d..%d %d verts\n", + __FUNCTION__, + prim->prim, + prim->vc_format, + prim->start, + prim->finish, + prim->numverts); + + if (bad_prim_vertex_nr( prim->prim, prim->numverts )) { + DRM_ERROR( "bad prim %x numverts %d\n", + prim->prim, prim->numverts ); + return; + } do { /* Emit the next cliprect */ - if ( i < sarea_priv->nbox ) { - radeon_emit_clip_rect( dev_priv, - &sarea_priv->boxes[i] ); + if ( i < nbox ) { + if (__copy_from_user( &box, &boxes[i], sizeof(box) )) + return; + + radeon_emit_clip_rect( dev_priv, &box ); } /* Emit the vertex buffer rendering commands */ @@ -820,19 +708,18 @@ static void radeon_cp_dispatch_vertex( drm_device_t *dev, ADVANCE_RING(); i++; - } while ( i < sarea_priv->nbox ); - - dev_priv->sarea_priv->last_dispatch++; + } while ( i < nbox ); } + static void radeon_cp_discard_buffer( drm_device_t *dev, drm_buf_t *buf ) { drm_radeon_private_t *dev_priv = dev->dev_private; drm_radeon_buf_priv_t *buf_priv = buf->dev_private; RING_LOCALS; - buf_priv->age = dev_priv->sarea_priv->last_dispatch; + buf_priv->age = ++dev_priv->sarea_priv->last_dispatch; /* Emit the vertex buffer age */ BEGIN_RING( 2 ); @@ -841,8 +728,6 @@ static void radeon_cp_discard_buffer( drm_device_t *dev, drm_buf_t *buf ) buf->pending = 1; buf->used = 0; - /* FIXME: Check dispatched field */ - buf_priv->dispatched = 0; } static void radeon_cp_dispatch_indirect( drm_device_t *dev, @@ -850,7 +735,6 @@ static void radeon_cp_dispatch_indirect( drm_device_t *dev, int start, int end ) { drm_radeon_private_t *dev_priv = dev->dev_private; - drm_radeon_buf_priv_t *buf_priv = buf->dev_private; RING_LOCALS; DRM_DEBUG( "indirect: buf=%d s=0x%x e=0x%x\n", buf->idx, start, end ); @@ -871,8 +755,6 @@ static void radeon_cp_dispatch_indirect( drm_device_t *dev, data[dwords++] = RADEON_CP_PACKET2; } - buf_priv->dispatched = 1; - /* Fire off the indirect buffer */ BEGIN_RING( 3 ); @@ -882,112 +764,76 @@ static void radeon_cp_dispatch_indirect( drm_device_t *dev, ADVANCE_RING(); } - - dev_priv->sarea_priv->last_dispatch++; } + static void radeon_cp_dispatch_indices( drm_device_t *dev, drm_buf_t *elt_buf, - drm_radeon_prim_t *prim ) + drm_radeon_tcl_prim_t *prim, + drm_clip_rect_t *boxes, + int nbox ) { drm_radeon_private_t *dev_priv = dev->dev_private; - drm_radeon_buf_priv_t *buf_priv = elt_buf->dev_private; - drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv; - int offset = dev_priv->agp_buffers_offset + prim->numverts * 64; + drm_clip_rect_t box; + int offset = dev_priv->agp_buffers_offset + prim->offset; u32 *data; int dwords; int i = 0; int start = prim->start + RADEON_INDEX_PRIM_OFFSET; int count = (prim->finish - start) / sizeof(u16); - DRM_DEBUG( "indices: start=%x/%x end=%x count=%d nv %d offset %x\n", - prim->start, start, prim->finish, - count, prim->numverts, offset ); - - switch (prim->prim & RADEON_PRIM_TYPE_MASK) { - case RADEON_PRIM_TYPE_NONE: - case RADEON_PRIM_TYPE_POINT: - if (count < 1) { - DRM_ERROR( "Bad nr verts %d\n", - count); - return; - } - break; - case RADEON_PRIM_TYPE_LINE: - if ((count & 1) || count == 0) { - DRM_ERROR( "Bad nr verts for line %d\n", - count); - return; - } - break; - case RADEON_PRIM_TYPE_LINE_STRIP: - if (count < 2) { - DRM_ERROR( "Bad nr verts for line_strip %d\n", - count); - return; - } - break; - case RADEON_PRIM_TYPE_TRI_LIST: - case RADEON_PRIM_TYPE_3VRT_POINT_LIST: - case RADEON_PRIM_TYPE_3VRT_LINE_LIST: - case RADEON_PRIM_TYPE_RECT_LIST: - if (count % 3 || count == 0) { - DRM_ERROR( "Bad nr verts for tri %d\n", count); - return; - } - break; - case RADEON_PRIM_TYPE_TRI_FAN: - case RADEON_PRIM_TYPE_TRI_STRIP: - if (count < 3) { - DRM_ERROR( "Bad nr verts for strip/fan %d\n", count); - return; - } - break; - default: - DRM_ERROR( "buffer prim %x start %x\n", - prim->prim, prim->start ); + DRM_DEBUG("%s: hwprim 0x%x vfmt 0x%x %d..%d offset: %x nr %d\n", + __FUNCTION__, + prim->prim, + prim->vc_format, + prim->start, + prim->finish, + prim->offset, + prim->numverts); + + if (bad_prim_vertex_nr( prim->prim, count )) { + DRM_ERROR( "bad prim %x count %d\n", + prim->prim, count ); return; - } + } - if ( start < prim->finish ) { - buf_priv->dispatched = 1; - dwords = (prim->finish - prim->start + 3) / sizeof(u32); + if ( start >= prim->finish || + (prim->start & 0x7) ) { + DRM_ERROR( "buffer prim %d\n", prim->prim ); + return; + } - data = (u32 *)((char *)dev_priv->buffers->handle + - elt_buf->offset + prim->start); + dwords = (prim->finish - prim->start + 3) / sizeof(u32); - data[0] = CP_PACKET3( RADEON_3D_RNDR_GEN_INDX_PRIM, dwords-2 ); - data[1] = offset; - data[2] = RADEON_MAX_VB_VERTS; - data[3] = prim->vc_format; - data[4] = (prim->prim | - RADEON_PRIM_WALK_IND | - RADEON_COLOR_ORDER_RGBA | - RADEON_VTX_FMT_RADEON_MODE | - (count << RADEON_NUM_VERTICES_SHIFT) ); + data = (u32 *)((char *)dev_priv->buffers->handle + + elt_buf->offset + prim->start); - if ( count & 0x1 ) { - /* unnecessary? */ - data[dwords-1] &= 0x0000ffff; - } + data[0] = CP_PACKET3( RADEON_3D_RNDR_GEN_INDX_PRIM, dwords-2 ); + data[1] = offset; + data[2] = prim->numverts; + data[3] = prim->vc_format; + data[4] = (prim->prim | + RADEON_PRIM_WALK_IND | + RADEON_COLOR_ORDER_RGBA | + RADEON_VTX_FMT_RADEON_MODE | + (count << RADEON_NUM_VERTICES_SHIFT) ); - do { - /* Emit the next set of up to three cliprects */ - if ( i < sarea_priv->nbox ) { - radeon_emit_clip_rect( dev_priv, - &sarea_priv->boxes[i] ); - } + do { + if ( i < nbox ) { + if (__copy_from_user( &box, &boxes[i], sizeof(box) )) + return; + + radeon_emit_clip_rect( dev_priv, &box ); + } - radeon_cp_dispatch_indirect( dev, elt_buf, - prim->start, - prim->finish ); + radeon_cp_dispatch_indirect( dev, elt_buf, + prim->start, + prim->finish ); - i++; - } while ( i < sarea_priv->nbox ); - } + i++; + } while ( i < nbox ); - sarea_priv->last_dispatch++; } #define RADEON_MAX_TEXTURE_SIZE (RADEON_BUFFER_SIZE - 8 * sizeof(u32)) @@ -998,7 +844,6 @@ static int radeon_cp_dispatch_texture( drm_device_t *dev, { drm_radeon_private_t *dev_priv = dev->dev_private; drm_buf_t *buf; - drm_radeon_buf_priv_t *buf_priv; u32 format; u32 *buffer; u8 *data; @@ -1016,8 +861,6 @@ static int radeon_cp_dispatch_texture( drm_device_t *dev, tex->offset >> 10, tex->pitch, tex->format, image->x, image->y, image->width, image->height ); - buf_priv = buf->dev_private; - /* The compiler won't optimize away a division by a variable, * even if the only legal values are powers of two. Thus, we'll * use a shift instead. @@ -1153,7 +996,6 @@ static int radeon_cp_dispatch_texture( drm_device_t *dev, buf->pid = current->pid; buf->used = (dwords + 8) * sizeof(u32); - buf_priv->discard = 1; radeon_cp_dispatch_indirect( dev, buf, 0, buf->used ); radeon_cp_discard_buffer( dev, buf ); @@ -1223,25 +1065,73 @@ int radeon_cp_clear( struct inode *inode, struct file *filp, sarea_priv->nbox * sizeof(depth_boxes[0]) ) ) return -EFAULT; - /* Needed for depth clears via triangles??? - */ - if ( sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS ) { - radeon_emit_state( dev_priv, - &sarea_priv->context_state, - sarea_priv->tex_state, - sarea_priv->dirty ); + radeon_cp_dispatch_clear( dev, &clear, depth_boxes ); - sarea_priv->dirty &= ~(RADEON_UPLOAD_TEX0IMAGES | - RADEON_UPLOAD_TEX1IMAGES | - RADEON_UPLOAD_TEX2IMAGES | - RADEON_REQUIRE_QUIESCENCE); - } + COMMIT_RING(); + return 0; +} - radeon_cp_dispatch_clear( dev, &clear, depth_boxes ); + + +/* Not sure why this isn't set all the time: + */ +static int radeon_do_init_pageflip( drm_device_t *dev ) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + DRM_DEBUG( "%s\n", __FUNCTION__ ); + + dev_priv->crtc_offset = RADEON_READ( RADEON_CRTC_OFFSET ); + dev_priv->crtc_offset_cntl = RADEON_READ( RADEON_CRTC_OFFSET_CNTL ); + + RADEON_WRITE( RADEON_CRTC_OFFSET, dev_priv->front_offset ); + RADEON_WRITE( RADEON_CRTC_OFFSET_CNTL, + dev_priv->crtc_offset_cntl | + RADEON_CRTC_OFFSET_FLIP_CNTL ); + + dev_priv->page_flipping = 1; + dev_priv->current_page = 0; + + return 0; +} + +int radeon_do_cleanup_pageflip( drm_device_t *dev ) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + DRM_DEBUG( "%s\n", __FUNCTION__ ); + + RADEON_WRITE( RADEON_CRTC_OFFSET, dev_priv->crtc_offset ); + RADEON_WRITE( RADEON_CRTC_OFFSET_CNTL, dev_priv->crtc_offset_cntl ); + + dev_priv->page_flipping = 0; + dev_priv->current_page = 0; return 0; } +/* Swapping and flipping are different operations, need different ioctls. + * They can & should be intermixed to support multiple 3d windows. + */ +int radeon_cp_flip( struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg ) +{ + drm_file_t *priv = filp->private_data; + drm_device_t *dev = priv->dev; + drm_radeon_private_t *dev_priv = dev->dev_private; + DRM_DEBUG( "%s\n", __FUNCTION__ ); + + LOCK_TEST_WITH_RETURN( dev ); + + RING_SPACE_TEST_WITH_RETURN( dev_priv ); + + if (!dev_priv->page_flipping) + radeon_do_init_pageflip( dev ); + + radeon_cp_dispatch_flip( dev ); + + COMMIT_RING(); + return 0; +} + int radeon_cp_swap( struct inode *inode, struct file *filp, unsigned int cmd, unsigned long arg ) { @@ -1258,13 +1148,10 @@ int radeon_cp_swap( struct inode *inode, struct file *filp, if ( sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS ) sarea_priv->nbox = RADEON_NR_SAREA_CLIPRECTS; - if ( !dev_priv->page_flipping ) { - radeon_cp_dispatch_swap( dev ); - dev_priv->sarea_priv->ctx_owner = 0; - } else { - radeon_cp_dispatch_flip( dev ); - } + radeon_cp_dispatch_swap( dev ); + dev_priv->sarea_priv->ctx_owner = 0; + COMMIT_RING(); return 0; } @@ -1277,9 +1164,8 @@ int radeon_cp_vertex( struct inode *inode, struct file *filp, drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv; drm_device_dma_t *dma = dev->dma; drm_buf_t *buf; - drm_radeon_buf_priv_t *buf_priv; drm_radeon_vertex_t vertex; - drm_radeon_prim_t prim; + drm_radeon_tcl_prim_t prim; LOCK_TEST_WITH_RETURN( dev ); @@ -1311,7 +1197,6 @@ int radeon_cp_vertex( struct inode *inode, struct file *filp, VB_AGE_TEST_WITH_RETURN( dev_priv ); buf = dma->buflist[vertex.idx]; - buf_priv = buf->dev_private; if ( buf->pid != current->pid ) { DRM_ERROR( "process %d using buffer owned by %d\n", @@ -1323,9 +1208,11 @@ int radeon_cp_vertex( struct inode *inode, struct file *filp, return -EINVAL; } - buf->used = vertex.count; /* not used? */ - + /* Build up a prim_t record: + */ if (vertex.count) { + buf->used = vertex.count; /* not used? */ + if ( sarea_priv->dirty & ~RADEON_UPLOAD_CLIPRECTS ) { radeon_emit_state( dev_priv, &sarea_priv->context_state, @@ -1338,22 +1225,22 @@ int radeon_cp_vertex( struct inode *inode, struct file *filp, RADEON_REQUIRE_QUIESCENCE); } - /* Build up a prim_t record: - */ prim.start = 0; prim.finish = vertex.count; /* unused */ prim.prim = vertex.prim; - prim.stateidx = 0xff; /* unused */ prim.numverts = vertex.count; prim.vc_format = dev_priv->sarea_priv->vc_format; - radeon_cp_dispatch_vertex( dev, buf, &prim ); + radeon_cp_dispatch_vertex( dev, buf, &prim, + dev_priv->sarea_priv->boxes, + dev_priv->sarea_priv->nbox ); } if (vertex.discard) { radeon_cp_discard_buffer( dev, buf ); } + COMMIT_RING(); return 0; } @@ -1366,9 +1253,8 @@ int radeon_cp_indices( struct inode *inode, struct file *filp, drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv; drm_device_dma_t *dma = dev->dma; drm_buf_t *buf; - drm_radeon_buf_priv_t *buf_priv; drm_radeon_indices_t elts; - drm_radeon_prim_t prim; + drm_radeon_tcl_prim_t prim; int count; LOCK_TEST_WITH_RETURN( dev ); @@ -1401,7 +1287,6 @@ int radeon_cp_indices( struct inode *inode, struct file *filp, VB_AGE_TEST_WITH_RETURN( dev_priv ); buf = dma->buflist[elts.idx]; - buf_priv = buf->dev_private; if ( buf->pid != current->pid ) { DRM_ERROR( "process %d using buffer owned by %d\n", @@ -1445,15 +1330,18 @@ int radeon_cp_indices( struct inode *inode, struct file *filp, prim.start = elts.start; prim.finish = elts.end; prim.prim = elts.prim; - prim.stateidx = 0xff; /* unused */ - prim.numverts = 0; /* indexed from start of dma area */ + prim.offset = 0; /* offset from start of dma buffers */ + prim.numverts = RADEON_MAX_VB_VERTS; /* duh */ prim.vc_format = dev_priv->sarea_priv->vc_format; - radeon_cp_dispatch_indices( dev, buf, &prim ); + radeon_cp_dispatch_indices( dev, buf, &prim, + dev_priv->sarea_priv->boxes, + dev_priv->sarea_priv->nbox ); if (elts.discard) { - radeon_cp_discard_buffer( dev, buf ); + radeon_cp_discard_buffer( dev, buf ); } + COMMIT_RING(); return 0; } @@ -1465,6 +1353,7 @@ int radeon_cp_texture( struct inode *inode, struct file *filp, drm_radeon_private_t *dev_priv = dev->dev_private; drm_radeon_texture_t tex; drm_radeon_tex_image_t image; + int ret; LOCK_TEST_WITH_RETURN( dev ); @@ -1484,7 +1373,10 @@ int radeon_cp_texture( struct inode *inode, struct file *filp, RING_SPACE_TEST_WITH_RETURN( dev_priv ); VB_AGE_TEST_WITH_RETURN( dev_priv ); - return radeon_cp_dispatch_texture( dev, &tex, &image ); + ret = radeon_cp_dispatch_texture( dev, &tex, &image ); + + COMMIT_RING(); + return ret; } int radeon_cp_stipple( struct inode *inode, struct file *filp, @@ -1509,6 +1401,7 @@ int radeon_cp_stipple( struct inode *inode, struct file *filp, radeon_cp_dispatch_stipple( dev, mask ); + COMMIT_RING(); return 0; } @@ -1520,7 +1413,6 @@ int radeon_cp_indirect( struct inode *inode, struct file *filp, drm_radeon_private_t *dev_priv = dev->dev_private; drm_device_dma_t *dma = dev->dma; drm_buf_t *buf; - drm_radeon_buf_priv_t *buf_priv; drm_radeon_indirect_t indirect; RING_LOCALS; @@ -1546,7 +1438,6 @@ int radeon_cp_indirect( struct inode *inode, struct file *filp, } buf = dma->buflist[indirect.idx]; - buf_priv = buf->dev_private; if ( buf->pid != current->pid ) { DRM_ERROR( "process %d using buffer owned by %d\n", @@ -1568,7 +1459,6 @@ int radeon_cp_indirect( struct inode *inode, struct file *filp, VB_AGE_TEST_WITH_RETURN( dev_priv ); buf->used = indirect.end; - buf_priv->discard = indirect.discard; /* Wait for the 3D stream to idle before the indirect buffer * containing 2D acceleration commands is processed. @@ -1585,10 +1475,11 @@ int radeon_cp_indirect( struct inode *inode, struct file *filp, */ radeon_cp_dispatch_indirect( dev, buf, indirect.start, indirect.end ); if (indirect.discard) { - radeon_cp_discard_buffer( dev, buf ); + radeon_cp_discard_buffer( dev, buf ); } + COMMIT_RING(); return 0; } @@ -1598,9 +1489,9 @@ int radeon_cp_vertex2( struct inode *inode, struct file *filp, drm_file_t *priv = filp->private_data; drm_device_t *dev = priv->dev; drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_sarea_t *sarea_priv = dev_priv->sarea_priv; drm_device_dma_t *dma = dev->dma; drm_buf_t *buf; - drm_radeon_buf_priv_t *buf_priv; drm_radeon_vertex2_t vertex; int i; unsigned char laststate; @@ -1629,7 +1520,6 @@ int radeon_cp_vertex2( struct inode *inode, struct file *filp, VB_AGE_TEST_WITH_RETURN( dev_priv ); buf = dma->buflist[vertex.idx]; - buf_priv = buf->dev_private; if ( buf->pid != current->pid ) { DRM_ERROR( "process %d using buffer owned by %d\n", @@ -1641,23 +1531,17 @@ int radeon_cp_vertex2( struct inode *inode, struct file *filp, DRM_ERROR( "sending pending buffer %d\n", vertex.idx ); return -EINVAL; } + + if (sarea_priv->nbox > RADEON_NR_SAREA_CLIPRECTS) + return -EINVAL; for (laststate = 0xff, i = 0 ; i < vertex.nr_prims ; i++) { drm_radeon_prim_t prim; + drm_radeon_tcl_prim_t tclprim; if ( copy_from_user( &prim, &vertex.prim[i], sizeof(prim) ) ) return -EFAULT; -/* printk( "prim %d vfmt %x hwprim %x start %d finish %d\n", */ -/* i, prim.vc_format, prim.prim, */ -/* prim.start, prim.finish ); */ - - if ( (prim.prim & RADEON_PRIM_TYPE_MASK) > - RADEON_PRIM_TYPE_3VRT_LINE_LIST ) { - DRM_ERROR( "buffer prim %d\n", prim.prim ); - return -EINVAL; - } - if ( prim.stateidx != laststate ) { drm_radeon_state_t state; @@ -1666,34 +1550,346 @@ int radeon_cp_vertex2( struct inode *inode, struct file *filp, sizeof(state) ) ) return -EFAULT; -/* printk("emit state %d (%p) dirty %x\n", */ -/* prim.stateidx, */ -/* &vertex.state[prim.stateidx], */ -/* state.dirty); */ - radeon_emit_state2( dev_priv, &state ); laststate = prim.stateidx; } - if ( prim.finish <= prim.start ) - continue; - - if ( prim.start & 0x7 ) { - DRM_ERROR( "misaligned buffer 0x%x\n", prim.start ); - return -EINVAL; - } + tclprim.start = prim.start; + tclprim.finish = prim.finish; + tclprim.prim = prim.prim; + tclprim.vc_format = prim.vc_format; if ( prim.prim & RADEON_PRIM_WALK_IND ) { - radeon_cp_dispatch_indices( dev, buf, &prim ); + tclprim.offset = prim.numverts * 64; + tclprim.numverts = RADEON_MAX_VB_VERTS; /* duh */ + + radeon_cp_dispatch_indices( dev, buf, &tclprim, + sarea_priv->boxes, + sarea_priv->nbox); } else { - radeon_cp_dispatch_vertex( dev, buf, &prim ); + tclprim.numverts = prim.numverts; + tclprim.offset = 0; /* not used */ + + radeon_cp_dispatch_vertex( dev, buf, &tclprim, + sarea_priv->boxes, + sarea_priv->nbox); } + + if (sarea_priv->nbox == 1) + sarea_priv->nbox = 0; } if ( vertex.discard ) { radeon_cp_discard_buffer( dev, buf ); } + COMMIT_RING(); + return 0; +} + + +static int radeon_emit_packets( + drm_radeon_private_t *dev_priv, + drm_radeon_cmd_header_t header, + drm_radeon_cmd_buffer_t *cmdbuf ) +{ + int id = (int)header.packet.packet_id; + int sz = packet[id].len; + int reg = packet[id].start; + int *data = (int *)cmdbuf->buf; + RING_LOCALS; + + if (sz * sizeof(int) > cmdbuf->bufsz) + return -EINVAL; + + BEGIN_RING(sz+1); + OUT_RING( CP_PACKET0( reg, (sz-1) ) ); + OUT_RING_USER_TABLE( data, sz ); + ADVANCE_RING(); + + cmdbuf->buf += sz * sizeof(int); + cmdbuf->bufsz -= sz * sizeof(int); + return 0; +} + +static inline int radeon_emit_scalars( + drm_radeon_private_t *dev_priv, + drm_radeon_cmd_header_t header, + drm_radeon_cmd_buffer_t *cmdbuf ) +{ + int sz = header.scalars.count; + int *data = (int *)cmdbuf->buf; + int start = header.scalars.offset; + int stride = header.scalars.stride; + RING_LOCALS; + + BEGIN_RING( 3+sz ); + OUT_RING( CP_PACKET0( RADEON_SE_TCL_SCALAR_INDX_REG, 0 ) ); + OUT_RING( start | (stride << RADEON_SCAL_INDX_DWORD_STRIDE_SHIFT)); + OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_SCALAR_DATA_REG, sz-1 ) ); + OUT_RING_USER_TABLE( data, sz ); + ADVANCE_RING(); + cmdbuf->buf += sz * sizeof(int); + cmdbuf->bufsz -= sz * sizeof(int); return 0; } + +static inline int radeon_emit_vectors( + drm_radeon_private_t *dev_priv, + drm_radeon_cmd_header_t header, + drm_radeon_cmd_buffer_t *cmdbuf ) +{ + int sz = header.vectors.count; + int *data = (int *)cmdbuf->buf; + int start = header.vectors.offset; + int stride = header.vectors.stride; + RING_LOCALS; + + BEGIN_RING( 3+sz ); + OUT_RING( CP_PACKET0( RADEON_SE_TCL_VECTOR_INDX_REG, 0 ) ); + OUT_RING( start | (stride << RADEON_VEC_INDX_OCTWORD_STRIDE_SHIFT)); + OUT_RING( CP_PACKET0_TABLE( RADEON_SE_TCL_VECTOR_DATA_REG, (sz-1) ) ); + OUT_RING_USER_TABLE( data, sz ); + ADVANCE_RING(); + + cmdbuf->buf += sz * sizeof(int); + cmdbuf->bufsz -= sz * sizeof(int); + return 0; +} + + +static int radeon_emit_packet3( drm_device_t *dev, + drm_radeon_cmd_buffer_t *cmdbuf ) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + int cmdsz, tmp; + int *cmd = (int *)cmdbuf->buf; + RING_LOCALS; + + + DRM_DEBUG("%s\n", __FUNCTION__); + + if (__get_user( tmp, &cmd[0])) + return -EFAULT; + + cmdsz = 2 + ((tmp & RADEON_CP_PACKET_COUNT_MASK) >> 16); + + if ((tmp & 0xc0000000) != RADEON_CP_PACKET3 || + cmdsz * 4 > cmdbuf->bufsz) + return -EINVAL; + + BEGIN_RING( cmdsz ); + OUT_RING_USER_TABLE( cmd, cmdsz ); + ADVANCE_RING(); + + cmdbuf->buf += cmdsz * 4; + cmdbuf->bufsz -= cmdsz * 4; + return 0; +} + + +static int radeon_emit_packet3_cliprect( drm_device_t *dev, + drm_radeon_cmd_buffer_t *cmdbuf ) +{ + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_clip_rect_t box; + int cmdsz, tmp; + int *cmd = (int *)cmdbuf->buf; + drm_clip_rect_t *boxes = cmdbuf->boxes; + int i = 0; + RING_LOCALS; + + DRM_DEBUG("%s\n", __FUNCTION__); + + if (__get_user( tmp, &cmd[0])) + return -EFAULT; + + cmdsz = 2 + ((tmp & RADEON_CP_PACKET_COUNT_MASK) >> 16); + + if ((tmp & 0xc0000000) != RADEON_CP_PACKET3 || + cmdsz * 4 > cmdbuf->bufsz) + return -EINVAL; + + do { + if ( i < cmdbuf->nbox ) { + if (__copy_from_user( &box, &boxes[i], sizeof(box) )) + return -EFAULT; + radeon_emit_clip_rect( dev_priv, &box ); + } + + BEGIN_RING( cmdsz ); + OUT_RING_USER_TABLE( cmd, cmdsz ); + ADVANCE_RING(); + + } while ( ++i < cmdbuf->nbox ); + + if (cmdbuf->nbox == 1) + cmdbuf->nbox = 0; + + cmdbuf->buf += cmdsz * 4; + cmdbuf->bufsz -= cmdsz * 4; + return 0; +} + + + +int radeon_cp_cmdbuf( struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg ) +{ + drm_file_t *priv = filp->private_data; + drm_device_t *dev = priv->dev; + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_device_dma_t *dma = dev->dma; + drm_buf_t *buf = 0; + int idx; + drm_radeon_cmd_buffer_t cmdbuf; + drm_radeon_cmd_header_t header; + + LOCK_TEST_WITH_RETURN( dev ); + + if ( !dev_priv ) { + DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ ); + return -EINVAL; + } + + if ( copy_from_user( &cmdbuf, (drm_radeon_cmd_buffer_t *)arg, + sizeof(cmdbuf) ) ) { + DRM_ERROR("copy_from_user\n"); + return -EFAULT; + } + + DRM_DEBUG( __FUNCTION__": pid=%d\n", current->pid ); + RING_SPACE_TEST_WITH_RETURN( dev_priv ); + VB_AGE_TEST_WITH_RETURN( dev_priv ); + + + if (verify_area( VERIFY_READ, cmdbuf.buf, cmdbuf.bufsz )) + return -EFAULT; + + if (cmdbuf.nbox && + verify_area( VERIFY_READ, cmdbuf.boxes, + cmdbuf.nbox * sizeof(drm_clip_rect_t))) + return -EFAULT; + + while ( cmdbuf.bufsz >= sizeof(header) ) { + + if (__get_user( header.i, (int *)cmdbuf.buf )) { + DRM_ERROR("__get_user %p\n", cmdbuf.buf); + return -EFAULT; + } + + cmdbuf.buf += sizeof(header); + cmdbuf.bufsz -= sizeof(header); + + switch (header.header.cmd_type) { + case RADEON_CMD_PACKET: + if (radeon_emit_packets( dev_priv, header, &cmdbuf )) { + DRM_ERROR("radeon_emit_packets failed\n"); + return -EINVAL; + } + break; + + case RADEON_CMD_SCALARS: + if (radeon_emit_scalars( dev_priv, header, &cmdbuf )) { + DRM_ERROR("radeon_emit_scalars failed\n"); + return -EINVAL; + } + break; + + case RADEON_CMD_VECTORS: + if (radeon_emit_vectors( dev_priv, header, &cmdbuf )) { + DRM_ERROR("radeon_emit_vectors failed\n"); + return -EINVAL; + } + break; + + case RADEON_CMD_DMA_DISCARD: + idx = header.dma.buf_idx; + if ( idx < 0 || idx >= dma->buf_count ) { + DRM_ERROR( "buffer index %d (of %d max)\n", + idx, dma->buf_count - 1 ); + return -EINVAL; + } + + buf = dma->buflist[idx]; + if ( buf->pid != current->pid || buf->pending ) { + DRM_ERROR( "bad buffer\n" ); + return -EINVAL; + } + + radeon_cp_discard_buffer( dev, buf ); + break; + + case RADEON_CMD_PACKET3: + if (radeon_emit_packet3( dev, &cmdbuf )) { + DRM_ERROR("radeon_emit_packet3 failed\n"); + return -EINVAL; + } + break; + + case RADEON_CMD_PACKET3_CLIP: + if (radeon_emit_packet3_cliprect( dev, &cmdbuf )) { + DRM_ERROR("radeon_emit_packet3_clip failed\n"); + return -EINVAL; + } + break; + + default: + DRM_ERROR("bad cmd_type %d at %p\n", + header.header.cmd_type, + cmdbuf.buf - sizeof(header)); + return -EINVAL; + } + } + + + COMMIT_RING(); + return 0; +} + + + +int radeon_cp_getparam( struct inode *inode, struct file *filp, + unsigned int cmd, unsigned long arg ) +{ + drm_file_t *priv = filp->private_data; + drm_device_t *dev = priv->dev; + drm_radeon_private_t *dev_priv = dev->dev_private; + drm_radeon_getparam_t param; + int value; + + if ( !dev_priv ) { + DRM_ERROR( "%s called with no initialization\n", __FUNCTION__ ); + return -EINVAL; + } + + if ( copy_from_user( ¶m, (drm_radeon_getparam_t *)arg, + sizeof(param) ) ) { + DRM_ERROR("copy_from_user\n"); + return -EFAULT; + } + + DRM_DEBUG( __FUNCTION__": pid=%d\n", current->pid ); + + switch( param.param ) { + case RADEON_PARAM_AGP_BUFFER_OFFSET: + value = dev_priv->agp_buffers_offset; + break; + default: + return -EINVAL; + } + + if ( copy_to_user( param.value, &value, sizeof(int) ) ) { + DRM_ERROR( "copy_to_user\n" ); + return -EFAULT; + } + + return 0; +} + + + + + + |