diff options
Diffstat (limited to 'linux-core')
-rw-r--r-- | linux-core/Makefile.kernel | 3 | ||||
-rw-r--r-- | linux-core/i915_gem.c | 15 | ||||
-rw-r--r-- | linux-core/i915_gem_tiling.c | 290 |
3 files changed, 307 insertions, 1 deletions
diff --git a/linux-core/Makefile.kernel b/linux-core/Makefile.kernel index 82d200b3..abfad0a1 100644 --- a/linux-core/Makefile.kernel +++ b/linux-core/Makefile.kernel @@ -20,7 +20,8 @@ r128-objs := r128_drv.o r128_cce.o r128_state.o r128_irq.o mga-objs := mga_drv.o mga_dma.o mga_state.o mga_warp.o mga_irq.o i810-objs := i810_drv.o i810_dma.o i915-objs := i915_drv.o i915_dma.o i915_irq.o i915_mem.o i915_fence.o \ - i915_buffer.o i915_compat.o i915_execbuf.o i915_gem.o + i915_buffer.o i915_compat.o i915_execbuf.o \ + i915_gem.o i915_gem_tiling.o nouveau-objs := nouveau_drv.o nouveau_state.o nouveau_fifo.o nouveau_mem.o \ nouveau_object.o nouveau_irq.o nouveau_notifier.o nouveau_swmthd.o \ nouveau_sgdma.o nouveau_dma.o nouveau_bo.o nouveau_fence.o \ diff --git a/linux-core/i915_gem.c b/linux-core/i915_gem.c index 446c9ba9..236203a5 100644 --- a/linux-core/i915_gem.c +++ b/linux-core/i915_gem.c @@ -2702,3 +2702,18 @@ i915_gem_lastclose(struct drm_device *dev) mutex_unlock(&dev->struct_mutex); } + +void i915_gem_load(struct drm_device *dev) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + + INIT_LIST_HEAD(&dev_priv->mm.active_list); + INIT_LIST_HEAD(&dev_priv->mm.flushing_list); + INIT_LIST_HEAD(&dev_priv->mm.inactive_list); + INIT_LIST_HEAD(&dev_priv->mm.request_list); + INIT_DELAYED_WORK(&dev_priv->mm.retire_work, + i915_gem_retire_work_handler); + dev_priv->mm.next_gem_seqno = 1; + + i915_gem_detect_bit_6_swizzle(dev); +} diff --git a/linux-core/i915_gem_tiling.c b/linux-core/i915_gem_tiling.c new file mode 100644 index 00000000..d66276d6 --- /dev/null +++ b/linux-core/i915_gem_tiling.c @@ -0,0 +1,290 @@ +/* + * Copyright © 2008 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#include "drmP.h" +#include "drm.h" +#include "i915_drm.h" +#include "i915_drv.h" + +/** @file i915_gem_tiling.c + * + * Support for managing tiling state of buffer objects. + * + * The idea behind tiling is to increase cache hit rates by rearranging + * pixel data so that a group of pixel accesses are in the same cacheline. + * Performance improvement from doing this on the back/depth buffer are on + * the order of 30%. + * + * Intel architectures make this somewhat more complicated, though, by + * adjustments made to addressing of data when the memory is in interleaved + * mode (matched pairs of DIMMS) to improve memory bandwidth. + * For interleaved memory, the CPU sends every sequential 64 bytes + * to an alternate memory channel so it can get the bandwidth from both. + * + * The GPU also rearranges its accesses for increased bandwidth to interleaved + * memory, and it matches what the CPU does for non-tiled. However, when tiled + * it does it a little differently, since one walks addresses not just in the + * X direction but also Y. So, along with alternating channels when bit + * 6 of the address flips, it also alternates when other bits flip -- Bits 9 + * (every 512 bytes, an X tile scanline) and 10 (every two X tile scanlines) + * are common to both the 915 and 965-class hardware. + * + * The CPU also sometimes XORs in higher bits as well, to improve + * bandwidth doing strided access like we do so frequently in graphics. This + * is called "Channel XOR Randomization" in the MCH documentation. The result + * is that the CPU is XORing in either bit 11 or bit 17 to bit 6 of its address + * decode. + * + * All of this bit 6 XORing has an effect on our memory management, + * as we need to make sure that the 3d driver can correctly address object + * contents. + * + * If we don't have interleaved memory, all tiling is safe and no swizzling is + * required. + * + * When bit 17 is XORed in, we simply refuse to tile at all. Bit + * 17 is not just a page offset, so as we page an objet out and back in, + * individual pages in it will have different bit 17 addresses, resulting in + * each 64 bytes being swapped with its neighbor! + * + * Otherwise, if interleaved, we have to tell the 3d driver what the address + * swizzling it needs to do is, since it's writing with the CPU to the pages + * (bit 6 and potentially bit 11 XORed in), and the GPU is reading from the + * pages (bit 6, 9, and 10 XORed in), resulting in a cumulative bit swizzling + * required by the CPU of XORing in bit 6, 9, 10, and potentially 11, in order + * to match what the GPU expects. + */ + +/** + * Detects bit 6 swizzling of address lookup between IGD access and CPU + * access through main memory. + */ +void +i915_gem_detect_bit_6_swizzle(struct drm_device *dev) +{ + drm_i915_private_t *dev_priv = dev->dev_private; + struct pci_dev *bridge; + uint32_t swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; + uint32_t swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; + int mchbar_offset; + volatile char *mchbar; + int ret; + + bridge = pci_get_bus_and_slot(0, PCI_DEVFN(0,0)); + if (bridge == NULL) { + DRM_ERROR("Couldn't get bridge device\n"); + return; + } + + ret = pci_enable_device(bridge); + if (ret != 0) { + DRM_ERROR("pci_enable_device failed: %d\n", ret); + return; + } + + if (IS_I965G(dev)) + mchbar_offset = 0x48; + else + mchbar_offset = 0x44; + + /* Use resource 2 for our BAR that's stashed in a nonstandard location, + * since the bridge would only ever use standard BARs 0-1 (though it + * doesn't anyway) + */ + pci_read_base(bridge, mchbar_offset, &bridge->resource[2]); + + mchbar = ioremap(pci_resource_start(bridge, 2), + pci_resource_len(bridge, 2)); + if (mchbar == NULL) { + DRM_ERROR("Couldn't map MCHBAR to determine tile swizzling\n"); + return; + } + + if (IS_I965G(dev) && !IS_I965GM(dev)) { + uint32_t chdecmisc; + + /* On the 965, channel interleave appears to be determined by + * the flex bit. If flex is set, then the ranks (sides of a + * DIMM) of memory will be "stacked" (physical addresses walk + * through one rank then move on to the next, flipping channels + * or not depending on rank configuration). The GPU in this + * case does exactly the same addressing as the CPU. + * + * Unlike the 945, channel randomization based does not + * appear to be available. + * + * XXX: While the G965 doesn't appear to do any interleaving + * when the DIMMs are not exactly matched, the G4x chipsets + * might be for "L-shaped" configurations, and will need to be + * detected. + * + * L-shaped configuration: + * + * +-----+ + * | | + * |DIMM2| <-- non-interleaved + * +-----+ + * +-----+ +-----+ + * | | | | + * |DIMM0| |DIMM1| <-- interleaved area + * +-----+ +-----+ + */ + chdecmisc = readb(mchbar + CHDECMISC); + + if (chdecmisc & CHDECMISC_FLEXMEMORY) { + swizzle_x = I915_BIT_6_SWIZZLE_NONE; + swizzle_y = I915_BIT_6_SWIZZLE_NONE; + } else { + swizzle_x = I915_BIT_6_SWIZZLE_9_10; + swizzle_y = I915_BIT_6_SWIZZLE_9; + } + } else if (IS_I9XX(dev)){ + uint32_t dcc; + + /* On 915-945 and GM965, channel interleave by the CPU is + * determined by DCC. The CPU will alternate based on bit 6 + * in interleaved mode, and the GPU will then also alternate + * on bit 6, 9, and 10 for X, but the CPU may also optionally + * alternate based on bit 17 (XOR not disabled and XOR + * bit == 17). + */ + dcc = readl(mchbar + DCC); + switch (dcc & DCC_ADDRESSING_MODE_MASK) { + case DCC_ADDRESSING_MODE_SINGLE_CHANNEL: + case DCC_ADDRESSING_MODE_DUAL_CHANNEL_ASYMMETRIC: + swizzle_x = I915_BIT_6_SWIZZLE_NONE; + swizzle_y = I915_BIT_6_SWIZZLE_NONE; + break; + case DCC_ADDRESSING_MODE_DUAL_CHANNEL_INTERLEAVED: + if (IS_I915G(dev) || IS_I915GM(dev) || + dcc & DCC_CHANNEL_XOR_DISABLE) { + swizzle_x = I915_BIT_6_SWIZZLE_9_10; + swizzle_y = I915_BIT_6_SWIZZLE_9; + } else if (IS_I965GM(dev)) { + /* GM965 only does bit 11-based channel + * randomization + */ + swizzle_x = I915_BIT_6_SWIZZLE_9_10_11; + swizzle_y = I915_BIT_6_SWIZZLE_9_11; + } else { + /* Bit 17 or perhaps other swizzling */ + swizzle_x = I915_BIT_6_SWIZZLE_UNKNOWN; + swizzle_y = I915_BIT_6_SWIZZLE_UNKNOWN; + } + break; + } + } + + iounmap(mchbar); + + dev_priv->mm.bit_6_swizzle_x = swizzle_x; + dev_priv->mm.bit_6_swizzle_y = swizzle_y; +} + +/** + * Sets the tiling mode of an object, returning the required swizzling of + * bit 6 of addresses in the object. + */ +int +i915_gem_set_tiling(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_i915_gem_set_tiling *args = data; + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_gem_object *obj; + struct drm_i915_gem_object *obj_priv; + + obj = drm_gem_object_lookup(dev, file_priv, args->handle); + if (obj == NULL) + return -EINVAL; + obj_priv = obj->driver_private; + + mutex_lock(&dev->struct_mutex); + + if (args->tiling_mode == I915_TILING_NONE) { + obj_priv->tiling_mode = I915_TILING_NONE; + args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; + } else { + if (args->tiling_mode == I915_TILING_X) + args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; + else + args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y; + /* If we can't handle the swizzling, make it untiled. */ + if (args->swizzle_mode == I915_BIT_6_SWIZZLE_UNKNOWN) { + args->tiling_mode = I915_TILING_NONE; + args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; + } + } + obj_priv->tiling_mode = args->tiling_mode; + + mutex_unlock(&dev->struct_mutex); + + drm_gem_object_unreference(obj); + + return 0; +} + +/** + * Returns the current tiling mode and required bit 6 swizzling for the object. + */ +int +i915_gem_get_tiling(struct drm_device *dev, void *data, + struct drm_file *file_priv) +{ + struct drm_i915_gem_get_tiling *args = data; + drm_i915_private_t *dev_priv = dev->dev_private; + struct drm_gem_object *obj; + struct drm_i915_gem_object *obj_priv; + + obj = drm_gem_object_lookup(dev, file_priv, args->handle); + if (obj == NULL) + return -EINVAL; + obj_priv = obj->driver_private; + + mutex_lock(&dev->struct_mutex); + + args->tiling_mode = obj_priv->tiling_mode; + switch (obj_priv->tiling_mode) { + case I915_TILING_X: + args->swizzle_mode = dev_priv->mm.bit_6_swizzle_x; + break; + case I915_TILING_Y: + args->swizzle_mode = dev_priv->mm.bit_6_swizzle_y; + break; + case I915_TILING_NONE: + args->swizzle_mode = I915_BIT_6_SWIZZLE_NONE; + break; + default: + DRM_ERROR("unknown tiling mode\n"); + } + + mutex_unlock(&dev->struct_mutex); + + drm_gem_object_unreference(obj); + + return 0; +} |