From 4297a83b48664b2b6a6dc0a72a4d11b043f34778 Mon Sep 17 00:00:00 2001 From: Jeremy Kolb Date: Fri, 12 Jan 2007 00:13:05 -0500 Subject: nouveau: get nv30 context switching to work. * Pulled in some registers from nv10reg.h. Needed for context switching. * Filled in nv30 graphics context (based on nv40_graph.c). * Figure out nv30 context table, set up on context creation. Allows the cards automatic switching to work. --- shared-core/nouveau_drv.h | 8 ++ shared-core/nouveau_fifo.c | 139 +++++++++++++++++++------- shared-core/nouveau_reg.h | 15 +++ shared-core/nv30_graph.c | 243 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 368 insertions(+), 37 deletions(-) create mode 100644 shared-core/nv30_graph.c (limited to 'shared-core') diff --git a/shared-core/nouveau_drv.h b/shared-core/nouveau_drv.h index 6b09046c..219ba123 100644 --- a/shared-core/nouveau_drv.h +++ b/shared-core/nouveau_drv.h @@ -142,6 +142,10 @@ typedef struct drm_nouveau_private { struct mem_block *fb_nomap_heap; struct mem_block *ramin_heap; + /* context table pointed to be NV_PGRAPH_CHANNEL_CTX_TABLE (0x400780) */ + uint32_t ctx_table_size; + struct mem_block *ctx_table; + struct nouveau_config config; } drm_nouveau_private_t; @@ -198,6 +202,10 @@ extern void nouveau_irq_preinstall(drm_device_t*); extern void nouveau_irq_postinstall(drm_device_t*); extern void nouveau_irq_uninstall(drm_device_t*); +/* nv30_graph.c */ +extern int nv30_graph_init(drm_device_t *dev); +extern int nv30_graph_context_create(drm_device_t *dev, int channel); + /* nv40_graph.c */ extern int nv40_graph_init(drm_device_t *dev); extern int nv40_graph_context_create(drm_device_t *dev, int channel); diff --git a/shared-core/nouveau_fifo.c b/shared-core/nouveau_fifo.c index e5f825e6..d41972b2 100644 --- a/shared-core/nouveau_fifo.c +++ b/shared-core/nouveau_fifo.c @@ -265,9 +265,9 @@ nouveau_fifo_cmdbuf_alloc(struct drm_device *dev, int channel) #define RAMFC_WR(offset, val) NV_WRITE(fifoctx + NV04_RAMFC_##offset, (val)) static void nouveau_nv04_context_init(drm_device_t *dev, - drm_nouveau_fifo_alloc_t *init) + drm_nouveau_fifo_alloc_t *init) { - drm_nouveau_private_t *dev_priv = dev->dev_private; + drm_nouveau_private_t *dev_priv = dev->dev_private; struct nouveau_object *cb_obj; uint32_t fifoctx, ctx_size = 32; int i; @@ -275,53 +275,109 @@ static void nouveau_nv04_context_init(drm_device_t *dev, cb_obj = dev_priv->fifos[init->channel].cmdbuf_obj; fifoctx=NV_RAMIN+dev_priv->ramfc_offset+init->channel*ctx_size; - // clear the fifo context - for(i=0;iput_base); + RAMFC_WR(DMA_PUT , init->put_base); RAMFC_WR(DMA_GET , init->put_base); RAMFC_WR(DMA_INSTANCE , nouveau_chip_instance_get(dev, cb_obj->instance)); #ifdef __BIG_ENDIAN - RAMFC_WR(DMA_FETCH, NV_PFIFO_CACH1_DMAF_TRIG_112_BYTES|NV_PFIFO_CACH1_DMAF_SIZE_128_BYTES|NV_PFIFO_CACH1_DMAF_MAX_REQS_4|NV_PFIFO_CACH1_BIG_ENDIAN); + RAMFC_WR(DMA_FETCH, NV_PFIFO_CACH1_DMAF_TRIG_112_BYTES | + NV_PFIFO_CACH1_DMAF_SIZE_128_BYTES | + NV_PFIFO_CACH1_DMAF_MAX_REQS_4 | + NV_PFIFO_CACH1_BIG_ENDIAN); #else - RAMFC_WR(DMA_FETCH, NV_PFIFO_CACH1_DMAF_TRIG_112_BYTES|NV_PFIFO_CACH1_DMAF_SIZE_128_BYTES|NV_PFIFO_CACH1_DMAF_MAX_REQS_4); + RAMFC_WR(DMA_FETCH, NV_PFIFO_CACH1_DMAF_TRIG_112_BYTES | + NV_PFIFO_CACH1_DMAF_SIZE_128_BYTES | + NV_PFIFO_CACH1_DMAF_MAX_REQS_4); #endif } #undef RAMFC_WR #define RAMFC_WR(offset, val) NV_WRITE(fifoctx + NV10_RAMFC_##offset, (val)) static void nouveau_nv10_context_init(drm_device_t *dev, - drm_nouveau_fifo_alloc_t *init) + drm_nouveau_fifo_alloc_t *init) { - drm_nouveau_private_t *dev_priv = dev->dev_private; + drm_nouveau_private_t *dev_priv = dev->dev_private; + struct nouveau_object *cb_obj; + uint32_t fifoctx; + int i; + cb_obj = dev_priv->fifos[init->channel].cmdbuf_obj; + fifoctx = NV_RAMIN + dev_priv->ramfc_offset + init->channel*64; + + for (i=0;i<64;i+=4) + NV_WRITE(fifoctx + i, 0); + + /* Fill entries that are seen filled in dumps of nvidia driver just + * after channel's is put into DMA mode + */ + + RAMFC_WR(DMA_PUT , init->put_base); + RAMFC_WR(DMA_GET , init->put_base); + RAMFC_WR(DMA_INSTANCE , nouveau_chip_instance_get(dev, + cb_obj->instance)); +#ifdef __BIG_ENDIAN + RAMFC_WR(DMA_FETCH, NV_PFIFO_CACH1_DMAF_TRIG_112_BYTES | + NV_PFIFO_CACH1_DMAF_SIZE_128_BYTES | + NV_PFIFO_CACH1_DMAF_MAX_REQS_4 | + NV_PFIFO_CACH1_BIG_ENDIAN); +#else + + RAMFC_WR(DMA_FETCH, NV_PFIFO_CACH1_DMAF_TRIG_112_BYTES | + NV_PFIFO_CACH1_DMAF_SIZE_128_BYTES | + NV_PFIFO_CACH1_DMAF_MAX_REQS_4); +#endif + + RAMFC_WR(DMA_SUBROUTINE, 0); + +} + +static void nouveau_nv30_context_init(drm_device_t *dev, + drm_nouveau_fifo_alloc_t *init) +{ + drm_nouveau_private_t *dev_priv = dev->dev_private; + struct nouveau_fifo *chan = &dev_priv->fifos[init->channel]; struct nouveau_object *cb_obj; - uint32_t fifoctx; + uint32_t fifoctx, grctx_inst, cb_inst, ctx_size = 64; int i; - cb_obj = dev_priv->fifos[init->channel].cmdbuf_obj; - fifoctx = NV_RAMIN + dev_priv->ramfc_offset + init->channel*64; - for (i=0;i<64;i+=4) - NV_WRITE(fifoctx + i, 0); - - /* Fill entries that are seen filled in dumps of nvidia driver just - * after channel's is put into DMA mode - */ - RAMFC_WR(DMA_PUT , init->put_base); - RAMFC_WR(DMA_GET , init->put_base); - RAMFC_WR(DMA_INSTANCE , nouveau_chip_instance_get(dev, - cb_obj->instance)); + cb_obj = dev_priv->fifos[init->channel].cmdbuf_obj; + cb_inst = nouveau_chip_instance_get(dev, chan->cmdbuf_obj->instance); + grctx_inst = nouveau_chip_instance_get(dev, chan->ramin_grctx); + fifoctx = NV_RAMIN + dev_priv->ramfc_offset + init->channel * ctx_size; + + for (i = 0; i < ctx_size; i += 4) + NV_WRITE(fifoctx + i, 0); + + RAMFC_WR(DMA_PUT, init->put_base); + RAMFC_WR(DMA_GET, init->put_base); + RAMFC_WR(REF_CNT, NV_READ(NV_PFIFO_CACH1_REF_CNT)); + RAMFC_WR(DMA_INSTANCE, cb_inst); + RAMFC_WR(DMA_STATE, NV_READ(NV_PFIFO_CACH1_DMAS)); + RAMFC_WR(DMA_FETCH, NV_PFIFO_CACH1_DMAF_TRIG_128_BYTES | + NV_PFIFO_CACH1_DMAF_SIZE_128_BYTES | + NV_PFIFO_CACH1_DMAF_MAX_REQS_8 | #ifdef __BIG_ENDIAN - RAMFC_WR(DMA_FETCH, NV_PFIFO_CACH1_DMAF_TRIG_112_BYTES | - NV_PFIFO_CACH1_DMAF_SIZE_128_BYTES | - NV_PFIFO_CACH1_DMAF_MAX_REQS_4 | - NV_PFIFO_CACH1_BIG_ENDIAN); + NV_PFIFO_CACH1_BIG_ENDIAN | #else - RAMFC_WR(DMA_FETCH, NV_PFIFO_CACH1_DMAF_TRIG_112_BYTES | - NV_PFIFO_CACH1_DMAF_SIZE_128_BYTES | - NV_PFIFO_CACH1_DMAF_MAX_REQS_4); + 0x00000000); #endif - RAMFC_WR(DMA_SUBROUTINE, 0); + + RAMFC_WR(ENGINE, NV_READ(NV_PFIFO_CACH1_ENG)); + RAMFC_WR(PULL1_ENGINE, NV_READ(NV_PFIFO_CACH1_PUL1)); + RAMFC_WR(ACQUIRE_VALUE, NV_READ(NV_PFIFO_CACH1_ACQUIRE_VALUE)); + RAMFC_WR(ACQUIRE_TIMESTAMP, NV_READ(NV_PFIFO_CACH1_ACQUIRE_TIMESTAMP)); + RAMFC_WR(ACQUIRE_TIMEOUT, NV_READ(NV_PFIFO_CACH1_ACQUIRE_TIMEOUT)); + RAMFC_WR(SEMAPHORE, NV_READ(NV_PFIFO_CACH1_SEMAPHORE)); + NV_WRITE(NV_PGRAPH_CHANNEL_CTX_SIZE, grctx_inst); /* Misnomer. Really a ptr to the grctx */ + + /* + * TODO: We need to put this somewhere... + */ + /* INSTANCE_WR(dev_priv->ctx_table, init->channel, grctx_inst); */ + RAMFC_WR(DMA_SUBROUTINE, init->put_base); } static void nouveau_nv10_context_save(drm_device_t *dev) @@ -498,8 +554,15 @@ static int nouveau_fifo_alloc(drm_device_t* dev,drm_nouveau_fifo_alloc_t* init, /* Construct inital RAMFC for new channel */ if (dev_priv->card_type < NV_10) { nouveau_nv04_context_init(dev, init); - } else if (dev_priv->card_type < NV_40) { - nouveau_nv10_context_init(dev, init); + } else if (dev_priv->card_type < NV_30) { + nouveau_nv10_context_init(dev, init); + } else if (dev_priv->card_type < NV_40) { + ret = nv30_graph_context_create(dev, init->channel); + if (ret) { + nouveau_fifo_free(dev, init->channel); + return ret; + } + nouveau_nv30_context_init(dev, init); } else { ret = nv40_graph_context_create(dev, init->channel); if (ret) { @@ -521,7 +584,7 @@ static int nouveau_fifo_alloc(drm_device_t* dev,drm_nouveau_fifo_alloc_t* init, */ if (dev_priv->fifo_alloc_count == 0) { nouveau_fifo_context_restore(dev, init->channel); - if (dev_priv->card_type >= NV_40) { + if (dev_priv->card_type >= NV_30) { struct nouveau_fifo *chan; uint32_t inst; @@ -530,9 +593,11 @@ static int nouveau_fifo_alloc(drm_device_t* dev,drm_nouveau_fifo_alloc_t* init, chan->ramin_grctx); /* see comments in nv40_graph_context_restore() */ - NV_WRITE(0x400784, inst); - NV_WRITE(0x40032C, inst | 0x01000000); - NV_WRITE(NV40_PFIFO_GRCTX_INSTANCE, inst); + NV_WRITE(NV_PGRAPH_CHANNEL_CTX_SIZE, inst); + if (dev_priv->card_type >= NV_40) { + NV_WRITE(0x40032C, inst | 0x01000000); + NV_WRITE(NV40_PFIFO_GRCTX_INSTANCE, inst); + } } } diff --git a/shared-core/nouveau_reg.h b/shared-core/nouveau_reg.h index 23fce39a..97d81048 100644 --- a/shared-core/nouveau_reg.h +++ b/shared-core/nouveau_reg.h @@ -71,6 +71,21 @@ #define NV_PGRAPH_FIFO 0x00400720 #define NV_PGRAPH_FFINTFC_ST2 0x00400764 +/* NV-Register NV_PGRAPH_CHANNEL_CTX_TABLE */ +#define NV_PGRAPH_CHANNEL_CTX_TABLE 0x00400780 +#define NV_PGRAPH_CHANNEL_CTX_TABLE_INST 0x0000FFFF +#define NV_PGRAPH_CHANNEL_CTX_TABLE_INST_0 0x00000000 + +/* NV-Register NV_PGRAPH_CHANNEL_CTX_SIZE */ +#define NV_PGRAPH_CHANNEL_CTX_SIZE 0x00400784 +#define NV_PGRAPH_CHANNEL_CTX_SIZE_VALUE 0x0000FFFF +#define NV_PGRAPH_CHANNEL_CTX_SIZE_VALUE_INIT 0x00001000 + +/* NV-Register NV_PGRAPH_CHANNEL_CTX_POINTER */ +#define NV_PGRAPH_CHANNEL_CTX_POINTER 0x00400788 +#define NV_PGRAPH_CHANNEL_CTX_POINTER_INST 0x0000FFFF +#define NV_PGRAPH_CHANNEL_CTX_POINTER_INST_0 0x00000000 + /* It's a guess that this works on NV03. Confirmed on NV04, though */ #define NV_PFIFO_DELAY_0 0x00002040 #define NV_PFIFO_DMA_TIMESLICE 0x00002044 diff --git a/shared-core/nv30_graph.c b/shared-core/nv30_graph.c new file mode 100644 index 00000000..dbc39490 --- /dev/null +++ b/shared-core/nv30_graph.c @@ -0,0 +1,243 @@ +/* + * Based on nv40_graph.c + * Someday this will all go away... + */ +#include "drmP.h" +#include "drm.h" +#include "nouveau_drv.h" +#include "nouveau_drm.h" + +/* + * TODO: In the dump start seems to be 7654b0 while end is 76ac28. + * This is obviously not the correct size. + */ +#define NV30_GRCTX_SIZE (22392) + +/*TODO: deciper what each offset in the context represents. The below + * contexts are taken from dumps just after the 3D object is + * created. + */ +static void nv30_graph_context_init(drm_device_t *dev, struct mem_block *ctx) +{ + drm_nouveau_private_t *dev_priv = dev->dev_private; + int i; + + INSTANCE_WR(ctx, 0x28/4, 0x10000000); + INSTANCE_WR(ctx, 0x40c/4, 0x00000101); + INSTANCE_WR(ctx, 0x420/4, 0x00000111); + INSTANCE_WR(ctx, 0x424/4, 0x00000060); + INSTANCE_WR(ctx, 0x440/4, 0x00000080); + INSTANCE_WR(ctx, 0x444/4, 0xffff0000); + INSTANCE_WR(ctx, 0x448/4, 0x00000001); + INSTANCE_WR(ctx, 0x45c/4, 0x44400000); + INSTANCE_WR(ctx, 0x448/4, 0xffff0000); + INSTANCE_WR(ctx, 0x4dc/4, 0xfff00000); + INSTANCE_WR(ctx, 0x4e0/4, 0xfff00000); + INSTANCE_WR(ctx, 0x4e8/4, 0x00011100); + + for (i = 0x504; i <= 0x540; i += 4) + INSTANCE_WR(ctx, i/4, 0x7ff00000); + + INSTANCE_WR(ctx, 0x54c/4, 0x4b7fffff); + INSTANCE_WR(ctx, 0x588/4, 0x00000080); + INSTANCE_WR(ctx, 0x58c/4, 0x30201000); + INSTANCE_WR(ctx, 0x590/4, 0x70605040); + INSTANCE_WR(ctx, 0x594/4, 0xb8a89888); + INSTANCE_WR(ctx, 0x598/4, 0xf8e8d8c8); + INSTANCE_WR(ctx, 0x5ac/4, 0xb0000000); + + for (i = 0x604; i <= 0x640; i += 4) + INSTANCE_WR(ctx, i/4, 0x00010588); + + for (i = 0x644; i <= 0x680; i += 4) + INSTANCE_WR(ctx, i/4, 0x00030303); + + for (i = 0x6c4; i <= 0x700; i += 4) + INSTANCE_WR(ctx, i/4, 0x0008aae4); + + for (i = 0x704; i <= 0x740; i += 4) + INSTANCE_WR(ctx, i/4, 0x1012000); + + for (i = 0x744; i <= 0x780; i += 4) + INSTANCE_WR(ctx, i/4, 0x0080008); + + INSTANCE_WR(ctx, 0x860/4, 0x00040000); + INSTANCE_WR(ctx, 0x864/4, 0x00010000); + INSTANCE_WR(ctx, 0x868/4, 0x00040000); + INSTANCE_WR(ctx, 0x86c/4, 0x00040000); + INSTANCE_WR(ctx, 0x870/4, 0x00040000); + INSTANCE_WR(ctx, 0x874/4, 0x00040000); + + for (i = 0x00; i <= 0x1170; i += 0x10) + { + INSTANCE_WR(ctx, (0x1f24 + i)/4, 0x000c001b); + INSTANCE_WR(ctx, (0x1f20 + i)/4, 0x0436086c); + INSTANCE_WR(ctx, (0x1f1c + i)/4, 0x10700ff9); + } + + INSTANCE_WR(ctx, 0x30bc/4, 0x0000ffff); + INSTANCE_WR(ctx, 0x30c0/4, 0x0000ffff); + INSTANCE_WR(ctx, 0x30c4/4, 0x0000ffff); + INSTANCE_WR(ctx, 0x30c8/4, 0x0000ffff); + + INSTANCE_WR(ctx, 0x380c/4, 0x3f800000); + INSTANCE_WR(ctx, 0x3450/4, 0x3f800000); + INSTANCE_WR(ctx, 0x3820/4, 0x3f800000); + INSTANCE_WR(ctx, 0x3854/4, 0x3f800000); + INSTANCE_WR(ctx, 0x3850/4, 0x3f000000); + INSTANCE_WR(ctx, 0x384c/4, 0x40000000); + INSTANCE_WR(ctx, 0x3868/4, 0xbf800000); + INSTANCE_WR(ctx, 0x3860/4, 0x3f800000); + INSTANCE_WR(ctx, 0x386c/4, 0x40000000); + INSTANCE_WR(ctx, 0x3870/4, 0xbf800000); + + for (i = 0x4e0; i <= 0x4e1c; i += 4) + INSTANCE_WR(ctx, i/4, 0x001c527d); + INSTANCE_WR(ctx, 0x4e40, 0x001c527c); + + INSTANCE_WR(ctx, 0x5680/4, 0x000a0000); + INSTANCE_WR(ctx, 0x87c/4, 0x10000000); + INSTANCE_WR(ctx, 0x28/4, 0x10000011); +} + + +int +nv30_graph_context_create(drm_device_t *dev, int channel) +{ + drm_nouveau_private_t *dev_priv = + (drm_nouveau_private_t *)dev->dev_private; + struct nouveau_fifo *chan = &dev_priv->fifos[channel]; + void (*ctx_init)(drm_device_t *, struct mem_block *); + unsigned int ctx_size; + int i, chipset; + + chipset = (NV_READ(NV_PMC_BOOT_0) & 0x0ff00000) >> 20; + switch (chipset) { + default: + ctx_size = NV30_GRCTX_SIZE; + ctx_init = nv30_graph_context_init; + break; + } + + /* Alloc and clear RAMIN to store the context */ + chan->ramin_grctx = nouveau_instmem_alloc(dev, ctx_size, 4); + if (!chan->ramin_grctx) + return DRM_ERR(ENOMEM); + for (i=0; iramin_grctx, i/4, 0x00000000); + + /* Initialise default context values */ + ctx_init(dev, chan->ramin_grctx); + + return 0; +} +#if 0 +/* Save current context (from PGRAPH) into the channel's context + *XXX: fails sometimes, not sure why.. + */ +void +nv40_graph_context_save_current(drm_device_t *dev) +{ + drm_nouveau_private_t *dev_priv = + (drm_nouveau_private_t *)dev->dev_private; + uint32_t instance; + int i; + + NV_WRITE(NV_PGRAPH_FIFO, 0); + + instance = NV_READ(0x40032C) & 0xFFFFF; + if (!instance) { + NV_WRITE(NV_PGRAPH_FIFO, 1); + return; + } + + NV_WRITE(0x400784, instance); + NV_WRITE(0x400310, NV_READ(0x400310) | 0x20); + NV_WRITE(0x400304, 1); + /* just in case, we don't want to spin in-kernel forever */ + for (i=0; i<1000; i++) { + if (NV_READ(0x40030C) == 0) + break; + } + if (i==1000) { + DRM_ERROR("failed to save current grctx to ramin\n"); + DRM_ERROR("instance = 0x%08x\n", NV_READ(0x40032C)); + DRM_ERROR("0x40030C = 0x%08x\n", NV_READ(0x40030C)); + NV_WRITE(NV_PGRAPH_FIFO, 1); + return; + } + + NV_WRITE(NV_PGRAPH_FIFO, 1); +} + +/* Restore the context for a specific channel into PGRAPH + * XXX: fails sometimes.. not sure why + */ +void +nv40_graph_context_restore(drm_device_t *dev, int channel) +{ + drm_nouveau_private_t *dev_priv = + (drm_nouveau_private_t *)dev->dev_private; + struct nouveau_fifo *chan = &dev_priv->fifos[channel]; + uint32_t instance; + int i; + + instance = nouveau_chip_instance_get(dev, chan->ramin_grctx); + + NV_WRITE(NV_PGRAPH_FIFO, 0); + NV_WRITE(0x400784, instance); + NV_WRITE(0x400310, NV_READ(0x400310) | 0x40); + NV_WRITE(0x400304, 1); + /* just in case, we don't want to spin in-kernel forever */ + for (i=0; i<1000; i++) { + if (NV_READ(0x40030C) == 0) + break; + } + if (i==1000) { + DRM_ERROR("failed to restore grctx for ch%d to PGRAPH\n", + channel); + DRM_ERROR("instance = 0x%08x\n", instance); + DRM_ERROR("0x40030C = 0x%08x\n", NV_READ(0x40030C)); + NV_WRITE(NV_PGRAPH_FIFO, 1); + return; + } + + + /* 0x40032C, no idea of it's exact function. Could simply be a + * record of the currently active PGRAPH context. It's currently + * unknown as to what bit 24 does. The nv ddx has it set, so we will + * set it here too. + */ + NV_WRITE(0x40032C, instance | 0x01000000); + /* 0x32E0 records the instance address of the active FIFO's PGRAPH + * context. If at any time this doesn't match 0x40032C, you will + * recieve PGRAPH_INTR_CONTEXT_SWITCH + */ + NV_WRITE(NV40_PFIFO_GRCTX_INSTANCE, instance); + NV_WRITE(NV_PGRAPH_FIFO, 1); +} +#endif +int +nv30_graph_init(drm_device_t *dev) +{ + drm_nouveau_private_t *dev_priv = + (drm_nouveau_private_t *)dev->dev_private; + int i, chipset; + + chipset = (NV_READ(NV_PMC_BOOT_0) & 0x0ff00000) >> 20; + DRM_DEBUG("chipset (from PMC_BOOT_0): NV%02X\n", chipset); + + /* Create Context Pointer Table */ + dev_priv->ctx_table_size = 32 * 4; + dev_priv->ctx_table = nouveau_instmem_alloc(dev, dev_priv->ctx_table_size, 4); + if (!dev_priv->ctx_table) + return DRM_ERR(ENOMEM); + + for (i=0; i< dev_priv->ctx_table_size; i+=4) + INSTANCE_WR(dev_priv->ctx_table, i/4, 0x00000000); + + NV_WRITE(NV_PGRAPH_CHANNEL_CTX_TABLE, nouveau_chip_instance_get(dev, dev_priv->ctx_table)); + + return 0; +} + -- cgit v1.2.3