diff options
33 files changed, 4574 insertions, 158 deletions
| @@ -58,6 +58,9 @@ tests/getclient  tests/getstats  tests/getversion  tests/lock +tests/gem_basic +tests/gem_mmap +tests/gem_readwrite  tests/openclose  tests/setversion  tests/updatedraw diff --git a/linux-core/Makefile b/linux-core/Makefile index 3af6f370..fc32676f 100644 --- a/linux-core/Makefile +++ b/linux-core/Makefile @@ -30,6 +30,7 @@  #  #    make DRM_MODULES="r128 radeon"  # +DRM_MODULES=i915  SHELL=/bin/sh diff --git a/linux-core/Makefile.kernel b/linux-core/Makefile.kernel index 7a477a10..82d200b3 100644 --- a/linux-core/Makefile.kernel +++ b/linux-core/Makefile.kernel @@ -12,15 +12,15 @@ drm-objs    := drm_auth.o drm_bufs.o drm_context.o drm_dma.o drm_drawable.o \  		drm_lock.o drm_memory.o drm_proc.o drm_stub.o drm_vm.o \  		drm_sysfs.o drm_pci.o drm_agpsupport.o drm_scatter.o \  		drm_memory_debug.o ati_pcigart.o drm_sman.o \ -		drm_hashtab.o drm_mm.o drm_object.o drm_compat.o \ +		drm_hashtab.o drm_memrange.o drm_object.o drm_compat.o \  	        drm_fence.o drm_ttm.o drm_bo.o drm_bo_move.o drm_bo_lock.o \ -		drm_regman.o drm_vm_nopage_compat.o +		drm_regman.o drm_vm_nopage_compat.o drm_gem.o  tdfx-objs   := tdfx_drv.o  r128-objs   := r128_drv.o r128_cce.o r128_state.o r128_irq.o  mga-objs    := mga_drv.o mga_dma.o mga_state.o mga_warp.o mga_irq.o  i810-objs   := i810_drv.o i810_dma.o  i915-objs   := i915_drv.o i915_dma.o i915_irq.o i915_mem.o i915_fence.o \ -		i915_buffer.o i915_compat.o i915_execbuf.o +		i915_buffer.o i915_compat.o i915_execbuf.o i915_gem.o  nouveau-objs := nouveau_drv.o nouveau_state.o nouveau_fifo.o nouveau_mem.o \  		nouveau_object.o nouveau_irq.o nouveau_notifier.o nouveau_swmthd.o \  		nouveau_sgdma.o nouveau_dma.o nouveau_bo.o nouveau_fence.o \ diff --git a/linux-core/drm-gem.txt b/linux-core/drm-gem.txt new file mode 100644 index 00000000..5cda87f8 --- /dev/null +++ b/linux-core/drm-gem.txt @@ -0,0 +1,805 @@ +                  The Graphics Execution Manager +	      Part of the Direct Rendering Manager +                  ============================== +		   +		 Keith Packard <keithp@keithp.com> +		   Eric Anholt <eric@anholt.net> +			   2008-5-9 + +Contents: + + 1. GEM Overview + 2. API overview and conventions + 3. Object Creation/Destruction + 4. Reading/writing contents + 5. Mapping objects to userspace + 6. Memory Domains + 7. Execution (Intel specific) + 8. Other misc Intel-specific functions + +1. Graphics Execution Manager Overview + +Gem is designed to manage graphics memory, control access to the graphics +device execution context and handle the essentially NUMA environment unique +to modern graphics hardware. Gem allows multiple applications to share +graphics device resources without the need to constantly reload the entire +graphics card. Data may be shared between multiple applications with gem +ensuring that the correct memory synchronization occurs. + +Graphics data can consume arbitrary amounts of memory, with 3D applications +constructing ever larger sets of textures and vertices. With graphics cards +memory space growing larger every year, and graphics APIs growing more +complex, we can no longer insist that each application save a complete copy +of their graphics state so that the card can be re-initialized from user +space at each context switch. Ensuring that graphics data remains persistent +across context switches allows applications significant new functionality +while also improving performance for existing APIs. + +Modern linux desktops include significant 3D rendering as a fundemental +component of the desktop image construction process. 2D and 3D applications +paint their content to offscreen storage and the central 'compositing +manager' constructs the final screen image from those window contents.  This +means that pixel image data from these applications must move within reach +of the compositing manager and used as source operands for screen image +rendering operations. + +Gem provides simple mechanisms to manage graphics data and control execution +flow within the linux operating system. Using many existing kernel +subsystems, it does this with a modest amount of code. + +2. API Overview and Conventions + +All APIs here are defined in terms of ioctls appplied to the DRM file +descriptor. To create and manipulate objects, an application must be +'authorized' using the DRI or DRI2 protocols with the X server. To relax +that, we will need to implement some better access control mechanisms within +the hardware portion of the driver to prevent inappropriate +cross-application data access. + +Any DRM driver which does not support GEM will return -ENODEV for all of +these ioctls. Invalid object handles return -EINVAL. Invalid object names +return -ENOENT. Other errors are as documented in the specific API below. + +To avoid the need to translate ioctl contents on mixed-size systems (with +32-bit user space running on a 64-bit kernel), the ioctl data structures +contain explicitly sized objects, using 64-bits for all size and pointer +data and 32-bits for identifiers. In addition, the 64-bit objects are all +carefully aligned on 64-bit boundaries. Because of this, all pointers in the +ioctl data structures are passed as uint64_t values. Suitable casts will +be necessary. + +One significant operation which is explicitly left out of this API is object +locking. Applications are expected to perform locking of shared objects +outside of the GEM api. This kind of locking is not necessary to safely +manipulate the graphics engine, and with multiple objects interacting in +unknown ways, per-object locking would likely introduce all kinds of +lock-order issues. Punting this to the application seems like the only +sensible plan. Given that DRM already offers a global lock on the hardware, +this doesn't change the current situation. + +3. Object Creation and Destruction + +Gem provides explicit memory management primitives. System pages are +allocated when the object is created, either as the fundemental storage for +hardware where system memory is used by the graphics processor directly, or +as backing store for graphics-processor resident memory. + +Objects are referenced from user space using handles. These are, for all +intents and purposes, equivalent to file descriptors. We could simply use +file descriptors were it not for the small limit (1024) of file descriptors +available to applications, and for the fact that the X server (a rather +significant user of this API) uses 'select' and has a limited maximum file +descriptor for that operation. Given the ability to allocate more file +descriptors, and given the ability to place these 'higher' in the file +descriptor space, we'd love to simply use file descriptors. + +Objects may be published with a name so that other applications can access +them. The name remains valid as long as the object exists. Right now, our +DRI APIs use 32-bit integer names, so that's what we expose here + + A. Creation + +		struct drm_gem_create { +			/** +			 * Requested size for the object. +			 * +			 * The (page-aligned) allocated size for the object +			 * will be returned. +			 */ +			uint64_t size; +			/** +			 * Returned handle for the object. +			 * +			 * Object handles are nonzero. +			 */ +			uint32_t handle; +			uint32_t pad; +		}; +	 +		/* usage */ +    		create.size = 16384; +		ret = ioctl (fd, DRM_IOCTL_GEM_CREATE, &create); +		if (ret == 0) +			return create.handle; + +	Note that the size is rounded up to a page boundary, and that +	the rounded-up size is returned in 'size'. No name is assigned to +	this object, making it local to this process. + +	If insufficient memory is availabe, -ENOMEM will be returned. + + B. Closing + +		struct drm_gem_close { +			/** Handle of the object to be closed. */ +			uint32_t handle; +			uint32_t pad; +		}; +		 + +		/* usage */ +		close.handle = <handle>; +		ret = ioctl (fd, DRM_IOCTL_GEM_CLOSE, &close); + +	This call makes the specified handle invalid, and if no other +	applications are using the object, any necessary graphics hardware +	synchronization is performed and the resources used by the object +	released. + + C. Naming + +		struct drm_gem_flink { +			/** Handle for the object being named */ +			uint32_t handle; +		 +			/** Returned global name */ +			uint32_t name; +		}; +		 +		/* usage */ +		flink.handle = <handle>; +		ret = ioctl (fd, DRM_IOCTL_GEM_FLINK, &flink); +		if (ret == 0) +			return flink.name; + +	Flink creates a name for the object and returns it to the +	application. This name can be used by other applications to gain +	access to the same object. + + D. Opening by name + +		struct drm_gem_open { +			/** Name of object being opened */ +			uint32_t name; +		 +			/** Returned handle for the object */ +			uint32_t handle; +			 +			/** Returned size of the object */ +			uint64_t size; +		}; +		 +		/* usage */ +		open.name = <name>; +		ret = ioctl (fd, DRM_IOCTL_GEM_OPEN, &open); +		if (ret == 0) { +			*sizep = open.size; +			return open.handle; +		} + +	Open accesses an existing object and returns a handle for it. If the +	object doesn't exist, -ENOENT is returned. The size of the object is +	also returned. This handle has all the same capabilities as the +	handle used to create the object. In particular, the object is not +	destroyed until all handles are closed. + +4. Basic read/write operations + +By default, gem objects are not mapped to the applications address space, +getting data in and out of them is done with I/O operations instead. This +allows the data to reside in otherwise unmapped pages, including pages in +video memory on an attached discrete graphics card. In addition, using +explicit I/O operations allows better control over cache contents, as +graphics devices are generally not cache coherent with the CPU, mapping +pages used for graphics into an application address space requires the use +of expensive cache flushing operations. Providing direct control over +graphics data access ensures that data are handled in the most efficient +possible fashion. + + A. Reading + +		struct drm_gem_pread { +			/** Handle for the object being read. */ +			uint32_t handle; +			uint32_t pad; +			/** Offset into the object to read from */ +			uint64_t offset; +			/** Length of data to read */ +			uint64_t size; +			/** Pointer to write the data into. */ +			uint64_t data_ptr;	/* void * */ +		}; + +	This copies data into the specified object at the specified +	position. Any necessary graphics device synchronization and +	flushing will be done automatically. +		 +		struct drm_gem_pwrite { +			/** Handle for the object being written to. */ +			uint32_t handle; +			uint32_t pad; +			/** Offset into the object to write to */ +			uint64_t offset; +			/** Length of data to write */ +			uint64_t size; +			/** Pointer to read the data from. */ +			uint64_t data_ptr;	/* void * */ +		}; +		 +	This copies data out of the specified object into the +	waiting user memory. Again, device synchronization will +	be handled by the kernel to ensure user space sees a +	consistent view of the graphics device. + +5. Mapping objects to user space + +For most objects, reading/writing is the preferred interaction mode. +However, when the CPU is involved in rendering to cover deficiencies in +hardware support for particular operations, the CPU will want to directly +access the relevant objects.  + +Because mmap is fairly heavyweight, we allow applications to retain maps to +objects persistently and then update how they're using the memory through a +separate interface. Applications which fail to use this separate interface +may exhibit unpredictable behaviour as memory consistency will not be +preserved. + + A. Mapping + +		struct drm_gem_mmap { +			/** Handle for the object being mapped. */ +			uint32_t handle; +			uint32_t pad; +			/** Offset in the object to map. */ +			uint64_t offset; +			/** +			 * Length of data to map. +			 * +			 * The value will be page-aligned. +			 */ +			uint64_t size; +			/** Returned pointer the data was mapped at */ +			uint64_t addr_ptr;	/* void * */ +		}; +		 +		/* usage */ +		mmap.handle = <handle>; +		mmap.offset = <offset>; +		mmap.size = <size>; +		ret = ioctl (fd, DRM_IOCTL_GEM_MMAP, &mmap); +		if (ret == 0) +			return (void *) (uintptr_t) mmap.addr_ptr; + + + B. Unmapping + +		munmap (addr, length); + +	Nothing strange here, just use the normal munmap syscall. + +6. Memory Domains + +Graphics devices remain a strong bastion of non cache-coherent memory. As a +result, accessing data through one functional unit will end up loading that +cache with data which then needs to be manually synchronized when that data +is used with another functional unit. + +Tracking where data are resident is done by identifying how functional units +deal with caches. Each cache is labeled as a separate memory domain. Then, +each sequence of operations is expected to load data into various read +domains and leave data in at most one write domain. Gem tracks the read and +write memory domains of each object and performs the necessary +synchronization operations when objects move from one domain set to another. + +For example, if operation 'A' constructs an image that is immediately used +by operation 'B', then when the read domain for 'B' is not the same as the +write domain for 'A', then the write domain must be flushed, and the read +domain invalidated. If these two operations are both executed in the same +command queue, then the flush operation can go inbetween them in the same +queue, avoiding any kind of CPU-based synchronization and leaving the GPU to +do the work itself. + +6.1 Memory Domains (GPU-independent) + + * DRM_GEM_DOMAIN_CPU. + + Objects in this domain are using caches which are connected to the CPU. + Moving objects from non-CPU domains into the CPU domain can involve waiting + for the GPU to finish with operations using this object. Moving objects + from this domain to a GPU domain can involve flushing CPU caches and chipset + buffers. + +6.1 GPU-independent memory domain ioctl + +This ioctl is independent of the GPU in use. So far, no use other than +synchronizing objects to the CPU domain have been found; if that turns out +to be generally true, this ioctl may be simplified further. +    + A. Explicit domain control + +		struct drm_gem_set_domain { +			/** Handle for the object */ +			uint32_t handle; +		 +			/** New read domains */ +			uint32_t read_domains; +		 +			/** New write domain */ +			uint32_t write_domain; +		}; + +		/* usage */ +		set_domain.handle = <handle>; +		set_domain.read_domains = <read_domains>; +		set_domain.write_domain = <write_domain>; +		ret = ioctl (fd, DRM_IOCTL_GEM_SET_DOMAIN, &set_domain); +		 +	When the application wants to explicitly manage memory domains for +	an object, it can use this function. Usually, this is only used +	when the application wants to synchronize object contents between +	the GPU and CPU-based application rendering. In that case, +	the <read_domains> would be set to DRM_GEM_DOMAIN_CPU, and if the +	application were going to write to the object, the <write_domain> +	would also be set to DRM_GEM_DOMAIN_CPU. After the call, gem +	guarantees that all previous rendering operations involving this +	object are complete. The application is then free to access the +	object through the address returned by the mmap call. Afterwards, +	when the application again uses the object through the GPU, any +	necessary CPU flushing will occur and the object will be correctly +	synchronized with the GPU. + +	Note that this synchronization is not required for any accesses +	going through the driver itself. The pread, pwrite and execbuffer +	ioctls all perform the necessary domain management internally. +	Explicit synchronization is only necessary when accessing the object +	through the mmap'd address. + +7. Execution (Intel specific) + +Managing the command buffers is inherently chip-specific, so the core of gem +doesn't have any intrinsic functions. Rather, execution is left to the +device-specific portions of the driver. + +The Intel DRM_I915_GEM_EXECBUFFER ioctl takes a list of gem objects, all of +which are mapped to the graphics device. The last object in the list is the +command buffer. + +7.1. Relocations +  +Command buffers often refer to other objects, and to allow the kernel driver +to move objects around, a sequence of relocations is associated with each +object. Device-specific relocation operations are used to place the +target-object relative value into the object. + +The Intel driver has a single relocation type: + +		struct drm_i915_gem_relocation_entry { +			/** +			 * Handle of the buffer being pointed to by this +			 * relocation entry. +			 * +			 * It's appealing to make this be an index into the +			 * mm_validate_entry list to refer to the buffer, +			 * but this allows the driver to create a relocation +			 * list for state buffers and not re-write it per +			 * exec using the buffer. +			 */ +			uint32_t target_handle; +		 +			/** +			 * Value to be added to the offset of the target +			 * buffer to make up the relocation entry. +			 */ +			uint32_t delta; +		 +			/** +			 * Offset in the buffer the relocation entry will be +			 * written into +			 */ +			uint64_t offset; +		 +			/** +			 * Offset value of the target buffer that the +			 * relocation entry was last written as. +			 * +			 * If the buffer has the same offset as last time, we +			 * can skip syncing and writing the relocation.  This +			 * value is written back out by the execbuffer ioctl +			 * when the relocation is written. +			 */ +			uint64_t presumed_offset; +		 +			/** +			 * Target memory domains read by this operation. +			 */ +			uint32_t read_domains; +		 +			/* +			 * Target memory domains written by this operation. +			 * +			 * Note that only one domain may be written by the +			 * whole execbuffer operation, so that where there are +			 * conflicts, the application will get -EINVAL back. +			 */ +			uint32_t write_domain; +		}; +		 +	'target_handle', the handle to the target object. This object must +	be one of the objects listed in the execbuffer request or +	bad things will happen. The kernel doesn't check for this. + +	'offset' is where, in the source object, the relocation data +	are written. Each relocation value is a 32-bit value consisting +	of the location of the target object in the GPU memory space plus +	the 'delta' value included in the relocation. + +	'presumed_offset' is where user-space believes the target object +	lies in GPU memory space. If this value matches where the object +	actually is, then no relocation data are written, the kernel +	assumes that user space has set up data in the source object +	using this presumption. This offers a fairly important optimization +	as writing relocation data requires mapping of the source object +	into the kernel memory space. + +	'read_domains' and 'write_domains' list the usage by the source +	object of the target object. The kernel unions all of the domain +	information from all relocations in the execbuffer request. No more +	than one write_domain is allowed, otherwise an EINVAL error is +	returned. read_domains must contain write_domain. This domain +	information is used to synchronize buffer contents as described +	above in the section on domains. + +7.1.1 Memory Domains (Intel specific) + +The Intel GPU has several internal caches which are not coherent and hence +require explicit synchronization. Memory domains provide the necessary data +to synchronize what is needed while leaving other cache contents intact. + + * DRM_GEM_DOMAIN_I915_RENDER. +   The GPU 3D and 2D rendering operations use a unified rendering cache, so +   operations doing 3D painting and 2D blts will use this domain +    + * DRM_GEM_DOMAIN_I915_SAMPLER +   Textures are loaded by the sampler through a separate cache, so +   any texture reading will use this domain. Note that the sampler +   and renderer use different caches, so moving an object from render target +   to texture source will require a domain transfer. +    + * DRM_GEM_DOMAIN_I915_COMMAND +   The command buffer doesn't have an explicit cache (although it does +   read ahead quite a bit), so this domain just indicates that the object +   needs to be flushed to the GPU. +    + * DRM_GEM_DOMAIN_I915_INSTRUCTION +   All of the programs on Gen4 and later chips use an instruction cache to +   speed program execution. It must be explicitly flushed when new programs +   are written to memory by the CPU. + + * DRM_GEM_DOMAIN_I915_VERTEX +   Vertex data uses two different vertex caches, but they're +   both flushed with the same instruction. + +7.2 Execution object list (Intel specific) + +		struct drm_i915_gem_exec_object { +			/** +			 * User's handle for a buffer to be bound into the GTT +			 * for this operation. +			 */ +			uint32_t handle; +			 +			/** +			 * List of relocations to be performed on this buffer +			 */ +			uint32_t relocation_count; +			/* struct drm_i915_gem_relocation_entry *relocs */ +			uint64_t relocs_ptr; +			 +			/**  +			 * Required alignment in graphics aperture  +			 */ +			uint64_t alignment; +		 +			/** +			 * Returned value of the updated offset of the object, +			 * for future presumed_offset writes. +			 */ +			uint64_t offset; +		}; +			 +	Each object involved in a particular execution operation must be +	listed using one of these structures. + +	'handle' references the object. + +	'relocs_ptr' is a user-mode pointer to a array of 'relocation_count' +	drm_i915_gem_relocation_entry structs (see above) that +	define the relocations necessary in this buffer. Note that all +	relocations must reference other exec_object structures in the same +	execbuffer ioctl and that those other buffers must come earlier in +	the exec_object array. In other words, the dependencies mapped by the +	exec_object relocations must form a directed acyclic graph. + +	'alignment' is the byte alignment necessary for this buffer. Each +	object has specific alignment requirements, as the kernel doesn't +	know what each object is being used for, those requirements must be +	provided by user mode. If an object is used in two different ways, +	it's quite possible that the alignment requirements will differ. + +	'offset' is a return value, receiving the location of the object +	during this execbuffer operation. The application should use this +	as the presumed offset in future operations; if the object does not +	move, then kernel need not write relocation data. + +7.3 Execbuffer ioctl (Intel specific) + +		struct drm_i915_gem_execbuffer { +			/** +			 * List of buffers to be validated with their +			 * relocations to be performend on them. +			 * +			 * These buffers must be listed in an order such that +			 * all relocations a buffer is performing refer to +			 * buffers that have already appeared in the validate +			 * list. +			 */ +			/* struct drm_i915_gem_validate_entry *buffers */ +			uint64_t buffers_ptr; +			uint32_t buffer_count; +		 +			/** +			 * Offset in the batchbuffer to start execution from. +			 */ +			uint32_t batch_start_offset; +			 +			/** +			 * Bytes used in batchbuffer from batch_start_offset +			 */ +			uint32_t batch_len; +			uint32_t DR1; +			uint32_t DR4; +			uint32_t num_cliprects; +			uint64_t cliprects_ptr;	/* struct drm_clip_rect *cliprects */ +		}; +		 + +	'buffers_ptr' is a user-mode pointer to an array of 'buffer_count' +	drm_i915_gem_exec_object structures which contains the complete set +	of objects required for this execbuffer operation. The last entry in +	this array, the 'batch buffer', is the buffer of commands which will +	be linked to the ring and executed. + +	'batch_start_offset' is the byte offset within the batch buffer which +	contains the first command to execute. So far, we haven't found a +	reason to use anything other than '0' here, but the thought was that +	some space might be allocated for additional initialization which +	could be skipped in some cases. This must be a multiple of 4. + +	'batch_len' is the length, in bytes, of the data to be executed +	(i.e., the amount of data after batch_start_offset). This must +	be a multiple of 4. + +	'num_cliprects' and 'cliprects_ptr' reference an array of +	drm_clip_rect structures that is num_cliprects long. The entire +	batch buffer will be executed multiple times, once for each +	rectangle in this list. If num_cliprects is 0, then no clipping +	rectangle will be set. + +	'DR1' and 'DR4' are portions of the 3DSTATE_DRAWING_RECTANGLE +	command which will be queued when this operation is clipped +	(num_cliprects != 0). + +		DR1 bit		definition +		31		Fast Scissor Clip Disable (debug only). +				Disables a hardware optimization that +				improves performance. This should have +				no visible effect, other than reducing +				performance +				 +		30		Depth Buffer Coordinate Offset Disable. +				This disables the addition of the +				depth buffer offset bits which are used +				to change the location of the depth buffer +				relative to the front buffer. + +		27:26		X Dither Offset. Specifies the X pixel +				offset to use when accessing the dither table +				 +		25:24		Y Dither Offset. Specifies the Y pixel +				offset to use when accessing the dither +				table. + +		DR4 bit		definition +		31:16		Drawing Rectangle Origin Y. Specifies the Y +				origin of coordinates relative to the +				draw buffer. + +		15:0		Drawing Rectangle Origin X. Specifies the X +				origin of coordinates relative to the +				draw buffer. + +	As you can see, these two fields are necessary for correctly +	offsetting drawing within a buffer which contains multiple surfaces. +	Note that DR1 is only used on Gen3 and earlier hardware and that +	newer hardware sticks the dither offset elsewhere. + +7.3.1 Detailed Execution Description + +	Execution of a single batch buffer requires several preparatory +	steps to make the objects visible to the graphics engine and resolve +	relocations to account for their current addresses. + + A. Mapping and Relocation + +	Each exec_object structure in the array is examined in turn.  +	 +	If the object is not already bound to the GTT, it is assigned a +	location in the graphics address space. If no space is available in +	the GTT, some other object will be evicted. This may require waiting +	for previous execbuffer requests to complete before that object can +	be unmapped. With the location assigned, the pages for the object +	are pinned in memory using find_or_create_page and the GTT entries +	updated to point at the relevant pages using drm_agp_bind_pages. +	 +	Then the array of relocations is traversed. Each relocation record +	looks up the target object and, if the presumed offset does not +	match the current offset (remember that this buffer has already been +	assigned an address as it must have been mapped earlier), the +	relocation value is computed using the current offset.  If the +	object is currently in use by the graphics engine, writing the data +	out must be preceeded by a delay while the object is still busy. +	Once it is idle, then the page containing the relocation is mapped +	by the CPU and the updated relocation data written out. + +	The read_domains and write_domain entries in each relocation are +	used to compute the new read_domains and write_domain values for the +	target buffers. The actual execution of the domain changes must wait +	until all of the exec_object entries have been evaluated as the +	complete set of domain information will not be available until then. +	 + B. Memory Domain Resolution + +	After all of the new memory domain data has been pulled out of the +	relocations and computed for each object, the list of objects is +	again traversed and the new memory domains compared against the +	current memory domains. There are two basic operations involved here: + + 	 * Flushing the current write domain. If the new read domains +	   are not equal to the current write domain, then the current +	   write domain must be flushed. Otherwise, reads will not see data +	   present in the write domain cache. In addition, any new read domains +	   other than the current write domain must be invalidated to ensure +	   that the flushed data are re-read into their caches. + +	 * Invaliding new read domains. Any domains which were not currently +	   used for this object must be invalidated as old objects which +	   were mapped at the same location may have stale data in the new +	   domain caches. + +	If the CPU cache is being invalidated and some GPU cache is being +	flushed, then we'll have to wait for rendering to complete so that +	any pending GPU writes will be complete before we flush the GPU +	cache. + +	If the CPU cache is being flushed, then we use 'clflush' to get data +	written from the CPU. + +	Because the GPU caches cannot be partially flushed or invalidated, +	we don't actually flush them during this traversal stage. Rather, we +	gather the invalidate and flush bits up in the device structure. + +	Once all of the object domain changes have been evaluated, then the +	gathered invalidate and flush bits are examined. For any GPU flush +	operations, we emit a single MI_FLUSH command that performs all of +	the necessary flushes. We then look to see if the CPU cache was +	flushed. If so, we use the chipset flush magic (writing to a special +	page) to get the data out of the chipset and into memory. + + C. Queuing Batch Buffer to the Ring + +	With all of the objects resident in graphics memory space, and all +	of the caches prepared with appropriate data, the batch buffer +	object can be queued to the ring. If there are clip rectangles, then +	the buffer is queued once per rectangle, with suitable clipping +	inserted into the ring just before the batch buffer. + + D. Creating an IRQ Cookie + +	Right after the batch buffer is placed in the ring, a request to +	generate an IRQ is added to the ring along with a command to write a +	marker into memory. When the IRQ fires, the driver can look at the +	memory location to see where in the ring the GPU has passed. This +	magic cookie value is stored in each object used in this execbuffer +	command; it is used whereever you saw 'wait for rendering' above in +	this document. + + E. Writing back the new object offsets + +	So that the application has a better idea what to use for +	'presumed_offset' values later, the current object offsets are +	written back to the exec_object structures. + + +8. Other misc Intel-specific functions. + +To complete the driver, a few other functions were necessary. + +8.1 Initialization from the X server + +As the X server is currently responsible for apportioning memory between 2D +and 3D, it must tell the kernel which region of the GTT aperture is +available for 3D objects to be mapped into. + +		struct drm_i915_gem_init { +			/** +			 * Beginning offset in the GTT to be managed by the +			 * DRM memory manager. +			 */ +			uint64_t gtt_start; +			/** +			 * Ending offset in the GTT to be managed by the DRM +			 * memory manager. +			 */ +			uint64_t gtt_end; +		}; +		/* usage */ +		init.gtt_start = <gtt_start>; +		init.gtt_end = <gtt_end>; +		ret = ioctl (fd, DRM_IOCTL_I915_GEM_INIT, &init); + +	The GTT aperture between gtt_start and gtt_end will be used to map +	objects. This also tells the kernel that the ring can be used, +	pulling the ring addresses from the device registers. + +8.2 Pinning objects in the GTT + +For scan-out buffers and the current shared depth and back buffers, we need +to have them always available in the GTT, at least for now. Pinning means to +lock their pages in memory along with keeping them at a fixed offset in the +graphics aperture. These operations are available only to root. +		 +		struct drm_i915_gem_pin { +			/** Handle of the buffer to be pinned. */ +			uint32_t handle; +			uint32_t pad; +			 +			/** alignment required within the aperture */ +			uint64_t alignment; +		 +			/** Returned GTT offset of the buffer. */ +			uint64_t offset; +		}; + +		/* usage */ +		pin.handle = <handle>; +		pin.alignment = <alignment>; +		ret = ioctl (fd, DRM_IOCTL_I915_GEM_PIN, &pin); +		if (ret == 0) +			return pin.offset; + +	Pinning an object ensures that it will not be evicted from the GTT +	or moved. It will stay resident until destroyed or unpinned. +		 +		struct drm_i915_gem_unpin { +			/** Handle of the buffer to be unpinned. */ +			uint32_t handle; +			uint32_t pad; +		}; +		 +		/* usage */ +		unpin.handle = <handle>; +		ret = ioctl (fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin); +		 +	Unpinning an object makes it possible to evict this object from the +	GTT. It doesn't ensure that it will be evicted, just that it may. + diff --git a/linux-core/drmP.h b/linux-core/drmP.h index 69d31e14..8246f44a 100644 --- a/linux-core/drmP.h +++ b/linux-core/drmP.h @@ -54,6 +54,7 @@  #include <linux/smp_lock.h>	/* For (un)lock_kernel */  #include <linux/dma-mapping.h>  #include <linux/mm.h> +#include <linux/kref.h>  #include <linux/pagemap.h>  #if LINUX_VERSION_CODE >= KERNEL_VERSION(2,6,16)  #include <linux/mutex.h> @@ -89,6 +90,10 @@  struct drm_device;  struct drm_file; +#if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,24) +typedef unsigned long uintptr_t; +#endif +  /* If you want the memory alloc debug functionality, change define below */  /* #define DEBUG_MEMORY */ @@ -107,7 +112,7 @@ struct drm_file;  #define DRIVER_IRQ_SHARED  0x80  #define DRIVER_DMA_QUEUE   0x100  #define DRIVER_FB_DMA      0x200 - +#define DRIVER_GEM	   0x400  /*@}*/ @@ -427,6 +432,11 @@ struct drm_file {  	struct list_head refd_objects; +	/** Mapping of mm object handles to object pointers. */ +	struct idr object_idr; +	/** Lock for synchronization of access to object_idr. */ +	spinlock_t table_lock; +  	struct drm_open_hash refd_object_hash[_DRM_NO_REF_TYPES];  	struct file *filp;  	void *driver_priv; @@ -464,6 +474,11 @@ struct drm_lock_data {  	uint32_t kernel_waiters;  	uint32_t user_waiters;  	int idle_has_lock; +	/** +	 * Boolean signaling that the lock is held on behalf of the +	 * file_priv client by the kernel in an ioctl handler. +	 */ +	int kernel_held;  };  /** @@ -539,17 +554,17 @@ struct drm_sigdata {   * Generic memory manager structs   */ -struct drm_mm_node { +struct drm_memrange_node {  	struct list_head fl_entry;  	struct list_head ml_entry;  	int free;  	unsigned long start;  	unsigned long size; -	struct drm_mm *mm; +	struct drm_memrange *mm;  	void *private;  }; -struct drm_mm { +struct drm_memrange {  	struct list_head fl_entry;  	struct list_head ml_entry;  }; @@ -563,7 +578,7 @@ struct drm_map_list {  	struct drm_hash_item hash;  	struct drm_map *map;			/**< mapping */  	uint64_t user_token; -	struct drm_mm_node *file_offset_node; +	struct drm_memrange_node *file_offset_node;  };  typedef struct drm_map drm_local_map_t; @@ -604,6 +619,56 @@ struct drm_ati_pcigart_info {  	int table_size;  }; +/** + * This structure defines the drm_mm memory object, which will be used by the + * DRM for its buffer objects. + */ +struct drm_gem_object { +	/** Reference count of this object */ +	struct kref refcount; + +	/** Handle count of this object. Each handle also holds a reference */ +	struct kref handlecount; + +	/** Related drm device */ +	struct drm_device *dev; +	 +	/** File representing the shmem storage */ +	struct file *filp; + +	/** +	 * Size of the object, in bytes.  Immutable over the object's +	 * lifetime. +	 */ +	size_t size; + +	/** +	 * Global name for this object, starts at 1. 0 means unnamed. +	 * Access is covered by the object_name_lock in the related drm_device +	 */ +	int name; + +	/** +	 * Memory domains. These monitor which caches contain read/write data +	 * related to the object. When transitioning from one set of domains +	 * to another, the driver is called to ensure that caches are suitably +	 * flushed and invalidated +	 */ +	uint32_t	read_domains; +	uint32_t	write_domain; + +	/** +	 * While validating an exec operation, the +	 * new read/write domain values are computed here. +	 * They will be transferred to the above values +	 * at the point that any cache flushing occurs +	 */ +	uint32_t	pending_read_domains; +	uint32_t	pending_write_domain; + +	void *driver_private; +}; +  #include "drm_objects.h"  /** @@ -705,6 +770,30 @@ struct drm_driver {  	void (*set_version) (struct drm_device *dev,  			     struct drm_set_version *sv); +	/** +	 * Driver-specific constructor for drm_gem_objects, to set up +	 * obj->driver_private. +	 * +	 * Returns 0 on success. +	 */ +	int (*gem_init_object) (struct drm_gem_object *obj); +	void (*gem_free_object) (struct drm_gem_object *obj); + +	/** +	 * Driver-specific callback to set memory domains from userspace +	 */ +	int (*gem_set_domain) (struct drm_gem_object *obj, +			       struct drm_file *file_priv, +			       uint32_t read_domains, +			       uint32_t write_domain); + +	/** +	 * Driver-specific callback to flush pwrite through chipset +	 */ +	int (*gem_flush_pwrite) (struct drm_gem_object *obj, +				 uint64_t offset, +				 uint64_t size); +  	struct drm_fence_driver *fence_driver;  	struct drm_bo_driver *bo_driver; @@ -787,7 +876,7 @@ struct drm_device {  	struct list_head maplist;	/**< Linked list of regions */  	int map_count;			/**< Number of mappable regions */  	struct drm_open_hash map_hash;       /**< User token hash table for maps */ -	struct drm_mm offset_manager;        /**< User token manager */ +	struct drm_memrange offset_manager;  /**< User token manager */  	struct drm_open_hash object_hash;    /**< User token hash table for objects */  	struct address_space *dev_mapping;  /**< For unmap_mapping_range() */  	struct page *ttm_dummy_page; @@ -885,6 +974,15 @@ struct drm_device {  	spinlock_t drw_lock;  	struct idr drw_idr;  	/*@} */ + +	/** \name GEM information */ +	/*@{ */ +	spinlock_t object_name_lock; +	struct idr object_name_idr; +	atomic_t object_count; +	uint32_t invalidate_domains;	/* domains pending invalidation */ +	uint32_t flush_domains;		/* domains pending flush */ +	/*@} */  };  #if __OS_HAS_AGP @@ -1000,6 +1098,10 @@ extern void drm_free_pages(unsigned long address, int order, int area);  extern DRM_AGP_MEM *drm_alloc_agp(struct drm_device *dev, int pages, u32 type);  extern int drm_free_agp(DRM_AGP_MEM * handle, int pages);  extern int drm_bind_agp(DRM_AGP_MEM * handle, unsigned int start); +extern DRM_AGP_MEM *drm_agp_bind_pages(struct drm_device *dev, +					      struct page **pages, +					      unsigned long num_pages, +					      uint32_t gtt_offset);  extern int drm_unbind_agp(DRM_AGP_MEM * handle);  extern void drm_free_memctl(size_t size); @@ -1082,6 +1184,9 @@ extern int drm_lock_take(struct drm_lock_data *lock_data, unsigned int context);  extern int drm_lock_free(struct drm_lock_data *lock_data, unsigned int context);  extern void drm_idlelock_take(struct drm_lock_data *lock_data);  extern void drm_idlelock_release(struct drm_lock_data *lock_data); +extern int drm_client_lock_take(struct drm_device *dev, +				struct drm_file *file_priv); +extern void drm_client_lock_release(struct drm_device *dev);  /*   * These are exported to drivers so that they can implement fencing using @@ -1234,26 +1339,108 @@ extern int drm_sysfs_device_add(struct drm_minor *minor);  extern void drm_sysfs_device_remove(struct drm_minor *minor);  /* - * Basic memory manager support (drm_mm.c) + * Basic memory manager support (drm_memrange.c)   */ -extern struct drm_mm_node * drm_mm_get_block(struct drm_mm_node * parent, unsigned long size, -					       unsigned alignment); -extern void drm_mm_put_block(struct drm_mm_node *cur); -extern struct drm_mm_node *drm_mm_search_free(const struct drm_mm *mm, unsigned long size, -						unsigned alignment, int best_match); -extern int drm_mm_init(struct drm_mm *mm, unsigned long start, unsigned long size); -extern void drm_mm_takedown(struct drm_mm *mm); -extern int drm_mm_clean(struct drm_mm *mm); -extern unsigned long drm_mm_tail_space(struct drm_mm *mm); -extern int drm_mm_remove_space_from_tail(struct drm_mm *mm, unsigned long size); -extern int drm_mm_add_space_to_tail(struct drm_mm *mm, unsigned long size); - -static inline struct drm_mm *drm_get_mm(struct drm_mm_node *block) +extern struct drm_memrange_node *drm_memrange_get_block(struct drm_memrange_node * parent, +							unsigned long size, +							unsigned alignment); +extern void drm_memrange_put_block(struct drm_memrange_node *cur); +extern struct drm_memrange_node *drm_memrange_search_free(const struct drm_memrange *mm, +							  unsigned long size, +							  unsigned alignment, int best_match); +extern int drm_memrange_init(struct drm_memrange *mm, +			     unsigned long start, unsigned long size); +extern void drm_memrange_takedown(struct drm_memrange *mm); +extern int drm_memrange_clean(struct drm_memrange *mm); +extern unsigned long drm_memrange_tail_space(struct drm_memrange *mm); +extern int drm_memrange_remove_space_from_tail(struct drm_memrange *mm, +					       unsigned long size); +extern int drm_memrange_add_space_to_tail(struct drm_memrange *mm, +					  unsigned long size); + +static inline struct drm_memrange *drm_get_mm(struct drm_memrange_node *block)  {  	return block->mm;  } +/* Graphics Execution Manager library functions (drm_gem.c) */ +int +drm_gem_init (struct drm_device *dev); + +void +drm_gem_object_free (struct kref *kref); + +void +drm_gem_object_handle_free (struct kref *kref); +     +static inline void drm_gem_object_reference(struct drm_gem_object *obj) +{ +	kref_get(&obj->refcount); +} + +static inline void drm_gem_object_unreference(struct drm_gem_object *obj) +{ +	if (obj == NULL) +		return; + +	kref_put (&obj->refcount, drm_gem_object_free); +} + +static inline void drm_gem_object_handle_reference (struct drm_gem_object *obj) +{ +	drm_gem_object_reference (obj); +	kref_get(&obj->handlecount); +} + +static inline void drm_gem_object_handle_unreference (struct drm_gem_object *obj) +{ +	if (obj == NULL) +		return; +	 +	/* +	 * Must bump handle count first as this may be the last +	 * ref, in which case the object would disappear before we +	 * checked for a name +	 */ +	kref_put (&obj->handlecount, drm_gem_object_handle_free); +	drm_gem_object_unreference (obj); +} + +struct drm_gem_object * +drm_gem_object_lookup(struct drm_device *dev, struct drm_file *filp, +		      int handle); +int drm_gem_create_ioctl(struct drm_device *dev, void *data, +			 struct drm_file *file_priv); +int drm_gem_close_ioctl(struct drm_device *dev, void *data, +			struct drm_file *file_priv); +int drm_gem_pread_ioctl(struct drm_device *dev, void *data, +			struct drm_file *file_priv); +int drm_gem_pwrite_ioctl(struct drm_device *dev, void *data, +			 struct drm_file *file_priv); +int drm_gem_mmap_ioctl(struct drm_device *dev, void *data, +		       struct drm_file *file_priv); +int drm_gem_flink_ioctl(struct drm_device *dev, void *data, +			struct drm_file *file_priv); +int drm_gem_open_ioctl(struct drm_device *dev, void *data, +		       struct drm_file *file_priv); +int drm_gem_set_domain_ioctl(struct drm_device *dev, void *data, +			     struct drm_file *file_priv); + +void drm_gem_open(struct drm_device *dev, struct drm_file *file_private); +void drm_gem_release(struct drm_device *dev, struct drm_file *file_private); + + +/* + * Given the new read/write domains for an object, + * compute the invalidate/flush domains for the whole device. + * + */ +int drm_gem_object_set_domain (struct drm_gem_object *object, +			       uint32_t read_domains, +			       uint32_t write_domains); + +  extern void drm_core_ioremap(struct drm_map *map, struct drm_device *dev);  extern void drm_core_ioremapfree(struct drm_map *map, struct drm_device *dev); diff --git a/linux-core/drm_agpsupport.c b/linux-core/drm_agpsupport.c index 0aa94a75..d6594b87 100644 --- a/linux-core/drm_agpsupport.c +++ b/linux-core/drm_agpsupport.c @@ -484,7 +484,50 @@ int drm_agp_unbind_memory(DRM_AGP_MEM * handle)  	return agp_unbind_memory(handle);  } +/** + * Binds a collection of pages into AGP memory at the given offset, returning + * the AGP memory structure containing them. + * + * No reference is held on the pages during this time -- it is up to the + * caller to handle that. + */ +DRM_AGP_MEM * +drm_agp_bind_pages(struct drm_device *dev, +		   struct page **pages, +		   unsigned long num_pages, +		   uint32_t gtt_offset) +{ +	DRM_AGP_MEM *mem; +	int ret, i; + +	DRM_DEBUG("drm_agp_populate_ttm\n"); +#if LINUX_VERSION_CODE <= KERNEL_VERSION(2,6,11) +	mem = drm_agp_allocate_memory(num_pages, AGP_USER_MEMORY); +#else +	mem = drm_agp_allocate_memory(dev->agp->bridge, num_pages, +				      AGP_USER_MEMORY); +#endif +	if (mem == NULL) { +		DRM_ERROR("Failed to allocate memory for %ld pages\n", +			  num_pages); +		return NULL; +	} + +	for (i = 0; i < num_pages; i++) +		mem->memory[i] = phys_to_gart(page_to_phys(pages[i])); +	mem->page_count = num_pages; + +	mem->is_flushed = TRUE; +	ret = drm_agp_bind_memory(mem, gtt_offset / PAGE_SIZE); +	if (ret != 0) { +		DRM_ERROR("Failed to bind AGP memory: %d\n", ret); +		agp_free_memory(mem); +		return NULL; +	} +	return mem; +} +EXPORT_SYMBOL(drm_agp_bind_pages);  /*   * AGP ttm backend interface. diff --git a/linux-core/drm_bo.c b/linux-core/drm_bo.c index 88b2ee66..3abbb8c4 100644 --- a/linux-core/drm_bo.c +++ b/linux-core/drm_bo.c @@ -418,14 +418,14 @@ static void drm_bo_cleanup_refs(struct drm_buffer_object *bo, int remove_all)  	if (!bo->fence) {  		list_del_init(&bo->lru);  		if (bo->mem.mm_node) { -			drm_mm_put_block(bo->mem.mm_node); +			drm_memrange_put_block(bo->mem.mm_node);  			if (bo->pinned_node == bo->mem.mm_node)  				bo->pinned_node = NULL;  			bo->mem.mm_node = NULL;  		}  		list_del_init(&bo->pinned_lru);  		if (bo->pinned_node) { -			drm_mm_put_block(bo->pinned_node); +			drm_memrange_put_block(bo->pinned_node);  			bo->pinned_node = NULL;  		}  		list_del_init(&bo->ddestroy); @@ -791,7 +791,7 @@ out:  	mutex_lock(&dev->struct_mutex);  	if (evict_mem.mm_node) {  		if (evict_mem.mm_node != bo->pinned_node) -			drm_mm_put_block(evict_mem.mm_node); +			drm_memrange_put_block(evict_mem.mm_node);  		evict_mem.mm_node = NULL;  	}  	drm_bo_add_to_lru(bo); @@ -810,7 +810,7 @@ static int drm_bo_mem_force_space(struct drm_device *dev,  				  struct drm_bo_mem_reg *mem,  				  uint32_t mem_type, int no_wait)  { -	struct drm_mm_node *node; +	struct drm_memrange_node *node;  	struct drm_buffer_manager *bm = &dev->bm;  	struct drm_buffer_object *entry;  	struct drm_mem_type_manager *man = &bm->man[mem_type]; @@ -820,7 +820,7 @@ static int drm_bo_mem_force_space(struct drm_device *dev,  	mutex_lock(&dev->struct_mutex);  	do { -		node = drm_mm_search_free(&man->manager, num_pages, +		node = drm_memrange_search_free(&man->manager, num_pages,  					  mem->page_alignment, 1);  		if (node)  			break; @@ -846,7 +846,7 @@ static int drm_bo_mem_force_space(struct drm_device *dev,  		return -ENOMEM;  	} -	node = drm_mm_get_block(node, num_pages, mem->page_alignment); +	node = drm_memrange_get_block(node, num_pages, mem->page_alignment);  	if (unlikely(!node)) {  		mutex_unlock(&dev->struct_mutex);  		return -ENOMEM; @@ -924,7 +924,7 @@ int drm_bo_mem_space(struct drm_buffer_object *bo,  	int type_found = 0;  	int type_ok = 0;  	int has_eagain = 0; -	struct drm_mm_node *node = NULL; +	struct drm_memrange_node *node = NULL;  	int ret;  	mem->mm_node = NULL; @@ -952,10 +952,10 @@ int drm_bo_mem_space(struct drm_buffer_object *bo,  		mutex_lock(&dev->struct_mutex);  		if (man->has_type && man->use_type) {  			type_found = 1; -			node = drm_mm_search_free(&man->manager, mem->num_pages, +			node = drm_memrange_search_free(&man->manager, mem->num_pages,  						  mem->page_alignment, 1);  			if (node) -				node = drm_mm_get_block(node, mem->num_pages, +				node = drm_memrange_get_block(node, mem->num_pages,  							mem->page_alignment);  		}  		mutex_unlock(&dev->struct_mutex); @@ -1340,7 +1340,7 @@ out_unlock:  	if (ret || !move_unfenced) {  		if (mem.mm_node) {  			if (mem.mm_node != bo->pinned_node) -				drm_mm_put_block(mem.mm_node); +				drm_memrange_put_block(mem.mm_node);  			mem.mm_node = NULL;  		}  		drm_bo_add_to_lru(bo); @@ -1432,7 +1432,7 @@ static int drm_buffer_object_validate(struct drm_buffer_object *bo,  		if (bo->pinned_node != bo->mem.mm_node) {  			if (bo->pinned_node != NULL) -				drm_mm_put_block(bo->pinned_node); +				drm_memrange_put_block(bo->pinned_node);  			bo->pinned_node = bo->mem.mm_node;  		} @@ -1443,7 +1443,7 @@ static int drm_buffer_object_validate(struct drm_buffer_object *bo,  		mutex_lock(&dev->struct_mutex);  		if (bo->pinned_node != bo->mem.mm_node) -			drm_mm_put_block(bo->pinned_node); +			drm_memrange_put_block(bo->pinned_node);  		list_del_init(&bo->pinned_lru);  		bo->pinned_node = NULL; @@ -2082,7 +2082,7 @@ static int drm_bo_leave_list(struct drm_buffer_object *bo,  		if (bo->pinned_node == bo->mem.mm_node)  			bo->pinned_node = NULL;  		if (bo->pinned_node != NULL) { -			drm_mm_put_block(bo->pinned_node); +			drm_memrange_put_block(bo->pinned_node);  			bo->pinned_node = NULL;  		}  		mutex_unlock(&dev->struct_mutex); @@ -2223,8 +2223,8 @@ int drm_bo_clean_mm(struct drm_device *dev, unsigned mem_type, int kern_clean)  		drm_bo_force_list_clean(dev, &man->lru, mem_type, 1, 0, 0);  		drm_bo_force_list_clean(dev, &man->pinned, mem_type, 1, 0, 1); -		if (drm_mm_clean(&man->manager)) { -			drm_mm_takedown(&man->manager); +		if (drm_memrange_clean(&man->manager)) { +			drm_memrange_takedown(&man->manager);  		} else {  			ret = -EBUSY;  		} @@ -2295,7 +2295,7 @@ int drm_bo_init_mm(struct drm_device *dev, unsigned type,  			DRM_ERROR("Zero size memory manager type %d\n", type);  			return ret;  		} -		ret = drm_mm_init(&man->manager, p_offset, p_size); +		ret = drm_memrange_init(&man->manager, p_offset, p_size);  		if (ret)  			return ret;  	} @@ -2713,7 +2713,7 @@ static void drm_bo_takedown_vm_locked(struct drm_buffer_object *bo)  		list->user_token = 0;  	}  	if (list->file_offset_node) { -		drm_mm_put_block(list->file_offset_node); +		drm_memrange_put_block(list->file_offset_node);  		list->file_offset_node = NULL;  	} @@ -2756,7 +2756,7 @@ static int drm_bo_setup_vm_locked(struct drm_buffer_object *bo)  	atomic_inc(&bo->usage);  	map->handle = (void *)bo; -	list->file_offset_node = drm_mm_search_free(&dev->offset_manager, +	list->file_offset_node = drm_memrange_search_free(&dev->offset_manager,  						    bo->mem.num_pages, 0, 0);  	if (unlikely(!list->file_offset_node)) { @@ -2764,7 +2764,7 @@ static int drm_bo_setup_vm_locked(struct drm_buffer_object *bo)  		return -ENOMEM;  	} -	list->file_offset_node = drm_mm_get_block(list->file_offset_node, +	list->file_offset_node = drm_memrange_get_block(list->file_offset_node,  						  bo->mem.num_pages, 0);  	if (unlikely(!list->file_offset_node)) { diff --git a/linux-core/drm_bo_move.c b/linux-core/drm_bo_move.c index bf0e1b74..850be5a3 100644 --- a/linux-core/drm_bo_move.c +++ b/linux-core/drm_bo_move.c @@ -41,7 +41,7 @@ static void drm_bo_free_old_node(struct drm_buffer_object *bo)  	if (old_mem->mm_node && (old_mem->mm_node != bo->pinned_node)) {  		mutex_lock(&bo->dev->struct_mutex); -		drm_mm_put_block(old_mem->mm_node); +		drm_memrange_put_block(old_mem->mm_node);  		mutex_unlock(&bo->dev->struct_mutex);  	}  	old_mem->mm_node = NULL; diff --git a/linux-core/drm_drv.c b/linux-core/drm_drv.c index b8b8333e..edc1f057 100644 --- a/linux-core/drm_drv.c +++ b/linux-core/drm_drv.c @@ -150,6 +150,15 @@ static struct drm_ioctl_desc drm_ioctls[] = {  	DRM_IOCTL_DEF(DRM_IOCTL_BO_VERSION, drm_bo_version_ioctl, 0),  	DRM_IOCTL_DEF(DRM_IOCTL_MM_INFO, drm_mm_info_ioctl, 0), + +	DRM_IOCTL_DEF(DRM_IOCTL_GEM_CREATE, drm_gem_create_ioctl, 0), +	DRM_IOCTL_DEF(DRM_IOCTL_GEM_CLOSE, drm_gem_close_ioctl, 0), +	DRM_IOCTL_DEF(DRM_IOCTL_GEM_PREAD, drm_gem_pread_ioctl, 0), +	DRM_IOCTL_DEF(DRM_IOCTL_GEM_PWRITE, drm_gem_pwrite_ioctl, 0), +	DRM_IOCTL_DEF(DRM_IOCTL_GEM_MMAP, drm_gem_mmap_ioctl, 0), +	DRM_IOCTL_DEF(DRM_IOCTL_GEM_FLINK, drm_gem_flink_ioctl, DRM_AUTH), +	DRM_IOCTL_DEF(DRM_IOCTL_GEM_OPEN, drm_gem_open_ioctl, DRM_AUTH), +	DRM_IOCTL_DEF(DRM_IOCTL_GEM_SET_DOMAIN, drm_gem_set_domain_ioctl, DRM_AUTH),  };  #define DRM_CORE_IOCTL_COUNT	ARRAY_SIZE( drm_ioctls ) @@ -415,7 +424,7 @@ static void drm_cleanup(struct drm_device * dev)  	drm_ctxbitmap_cleanup(dev);  	drm_ht_remove(&dev->map_hash); -	drm_mm_takedown(&dev->offset_manager); +	drm_memrange_takedown(&dev->offset_manager);  	drm_ht_remove(&dev->object_hash);  	drm_put_minor(&dev->primary); diff --git a/linux-core/drm_fops.c b/linux-core/drm_fops.c index 3bc25f24..ec521101 100644 --- a/linux-core/drm_fops.c +++ b/linux-core/drm_fops.c @@ -274,6 +274,9 @@ static int drm_open_helper(struct inode *inode, struct file *filp,  		goto out_free;  	} +	if (dev->driver->driver_features & DRIVER_GEM) +		drm_gem_open(dev, priv); +  	if (dev->driver->open) {  		ret = dev->driver->open(dev, priv);  		if (ret < 0) @@ -444,6 +447,9 @@ int drm_release(struct inode *inode, struct file *filp)  		dev->driver->reclaim_buffers(dev, file_priv);  	} +	if (dev->driver->driver_features & DRIVER_GEM) +		drm_gem_release(dev, file_priv); +  	drm_fasync(-1, filp, 0);  	mutex_lock(&dev->ctxlist_mutex); diff --git a/linux-core/drm_gem.c b/linux-core/drm_gem.c new file mode 100644 index 00000000..b726e598 --- /dev/null +++ b/linux-core/drm_gem.c @@ -0,0 +1,639 @@ +/* + * Copyright © 2008 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + *    Eric Anholt <eric@anholt.net> + * + */ + +#include <linux/types.h> +#include <linux/slab.h> +#include <linux/mm.h> +#include <linux/uaccess.h> +#include <linux/fs.h> +#include <linux/file.h> +#include <linux/module.h> +#include <linux/mman.h> +#include <linux/pagemap.h> +#include "drmP.h" + +/** @file drm_gem.c + * + * This file provides some of the base ioctls and library routines for + * the graphics memory manager implemented by each device driver. + * + * Because various devices have different requirements in terms of + * synchronization and migration strategies, implementing that is left up to + * the driver, and all that the general API provides should be generic -- + * allocating objects, reading/writing data with the cpu, freeing objects. + * Even there, platform-dependent optimizations for reading/writing data with + * the CPU mean we'll likely hook those out to driver-specific calls.  However, + * the DRI2 implementation wants to have at least allocate/mmap be generic. + * + * The goal was to have swap-backed object allocation managed through + * struct file.  However, file descriptors as handles to a struct file have + * two major failings: + * - Process limits prevent more than 1024 or so being used at a time by + *   default. + * - Inability to allocate high fds will aggravate the X Server's select() + *   handling, and likely that of many GL client applications as well. + * + * This led to a plan of using our own integer IDs (called handles, following + * DRM terminology) to mimic fds, and implement the fd syscalls we need as + * ioctls.  The objects themselves will still include the struct file so + * that we can transition to fds if the required kernel infrastructure shows + * up at a later date, and as our interface with shmfs for memory allocation. + */ + +/** + * Initialize the GEM device fields + */ + +int +drm_gem_init(struct drm_device *dev) +{ +	spin_lock_init(&dev->object_name_lock); +	idr_init(&dev->object_name_idr); +	atomic_set(&dev->object_count, 0); +	return 0; +} + +/** + * Allocate a GEM object of the specified size with shmfs backing store + */ +static struct drm_gem_object * +drm_gem_object_alloc(struct drm_device *dev, size_t size) +{ +	struct drm_gem_object *obj; + +	BUG_ON((size & (PAGE_SIZE - 1)) != 0); + +	obj = kcalloc(1, sizeof(*obj), GFP_KERNEL); + +	obj->dev = dev; +	obj->filp = shmem_file_setup("drm mm object", size, 0); +	if (IS_ERR(obj->filp)) { +		kfree(obj); +		return NULL; +	} + +	kref_init(&obj->refcount); +	kref_init(&obj->handlecount); +	obj->size = size; + +	/* +	 * We've just allocated pages from the kernel, +	 * so they've just been written by the CPU with +	 * zeros. They'll need to be clflushed before we +	 * use them with the GPU. +	 */ +	obj->write_domain = DRM_GEM_DOMAIN_CPU; +	obj->read_domains = DRM_GEM_DOMAIN_CPU; +	if (dev->driver->gem_init_object != NULL && +	    dev->driver->gem_init_object(obj) != 0) { +		fput(obj->filp); +		kfree(obj); +		return NULL; +	} +	atomic_inc(&dev->object_count); +	return obj; +} + +/** + * Removes the mapping from handle to filp for this object. + */ +static int +drm_gem_handle_delete(struct drm_file *filp, int handle) +{ +	struct drm_device *dev; +	struct drm_gem_object *obj; + +	/* This is gross. The idr system doesn't let us try a delete and +	 * return an error code.  It just spews if you fail at deleting. +	 * So, we have to grab a lock around finding the object and then +	 * doing the delete on it and dropping the refcount, or the user +	 * could race us to double-decrement the refcount and cause a +	 * use-after-free later.  Given the frequency of our handle lookups, +	 * we may want to use ida for number allocation and a hash table +	 * for the pointers, anyway. +	 */ +	spin_lock(&filp->table_lock); + +	/* Check if we currently have a reference on the object */ +	obj = idr_find(&filp->object_idr, handle); +	if (obj == NULL) { +		spin_unlock(&filp->table_lock); +		return -EINVAL; +	} +	dev = obj->dev; + +	/* Release reference and decrement refcount. */ +	idr_remove(&filp->object_idr, handle); +	spin_unlock(&filp->table_lock); + +	mutex_lock(&dev->struct_mutex); +	drm_gem_object_handle_unreference(obj); +	mutex_unlock(&dev->struct_mutex); + +	return 0; +} + +/** + * Create a handle for this object. This adds a handle reference + * to the object, which includes a regular reference count. Callers + * will likely want to dereference the object afterwards. + */ +static int +drm_gem_handle_create(struct drm_file *file_priv, +		       struct drm_gem_object *obj, +		       int *handlep) +{ +	int	ret; + +	/* +	 * Get the user-visible handle using idr. +	 */ +again: +	/* ensure there is space available to allocate a handle */ +	if (idr_pre_get(&file_priv->object_idr, GFP_KERNEL) == 0) +		return -ENOMEM; + +	/* do the allocation under our spinlock */ +	spin_lock(&file_priv->table_lock); +	ret = idr_get_new_above(&file_priv->object_idr, obj, 1, handlep); +	spin_unlock(&file_priv->table_lock); +	if (ret == -EAGAIN) +		goto again; + +	if (ret != 0) +		return ret; + +	drm_gem_object_handle_reference(obj); +	return 0; +} + +/** Returns a reference to the object named by the handle. */ +struct drm_gem_object * +drm_gem_object_lookup(struct drm_device *dev, struct drm_file *filp, +		      int handle) +{ +	struct drm_gem_object *obj; + +	spin_lock(&filp->table_lock); + +	/* Check if we currently have a reference on the object */ +	obj = idr_find(&filp->object_idr, handle); +	if (obj == NULL) { +		spin_unlock(&filp->table_lock); +		return NULL; +	} + +	drm_gem_object_reference(obj); + +	spin_unlock(&filp->table_lock); + +	return obj; +} +EXPORT_SYMBOL(drm_gem_object_lookup); + +/** + * Creates a new mm object and returns a handle to it. + */ +int +drm_gem_create_ioctl(struct drm_device *dev, void *data, +		     struct drm_file *file_priv) +{ +	struct drm_gem_create *args = data; +	struct drm_gem_object *obj; +	int handle, ret; + +	if (!(dev->driver->driver_features & DRIVER_GEM)) +		return -ENODEV; + +	args->size = roundup(args->size, PAGE_SIZE); + +	/* Allocate the new object */ +	obj = drm_gem_object_alloc(dev, args->size); +	if (obj == NULL) +		return -ENOMEM; + +	ret = drm_gem_handle_create(file_priv, obj, &handle); +	mutex_lock(&dev->struct_mutex); +	drm_gem_object_handle_unreference(obj); +	mutex_unlock(&dev->struct_mutex); + +	if (ret) +		return ret; + +	args->handle = handle; + +	return 0; +} + +/** + * Releases the handle to an mm object. + */ +int +drm_gem_close_ioctl(struct drm_device *dev, void *data, +		    struct drm_file *file_priv) +{ +	struct drm_gem_close *args = data; +	int ret; + +	if (!(dev->driver->driver_features & DRIVER_GEM)) +		return -ENODEV; + +	ret = drm_gem_handle_delete(file_priv, args->handle); + +	return ret; +} + +/** + * Reads data from the object referenced by handle. + * + * On error, the contents of *data are undefined. + */ +int +drm_gem_pread_ioctl(struct drm_device *dev, void *data, +		    struct drm_file *file_priv) +{ +	struct drm_gem_pread *args = data; +	struct drm_gem_object *obj; +	ssize_t read; +	loff_t offset; +	int ret; + +	if (!(dev->driver->driver_features & DRIVER_GEM)) +		return -ENODEV; + +	obj = drm_gem_object_lookup(dev, file_priv, args->handle); +	if (obj == NULL) +		return -EINVAL; + +	mutex_lock(&dev->struct_mutex); +	if (dev->driver->gem_set_domain) { +		ret = dev->driver->gem_set_domain(obj, file_priv, +						  DRM_GEM_DOMAIN_CPU, +						  0); +		if (ret) { +			drm_gem_object_unreference(obj); +			mutex_unlock(&dev->struct_mutex); +			return ret; +		} +	} +	offset = args->offset; + +	read = vfs_read(obj->filp, (char __user *)(uintptr_t)args->data_ptr, +			args->size, &offset); +	if (read != args->size) { +		drm_gem_object_unreference(obj); +		mutex_unlock(&dev->struct_mutex); +		if (read < 0) +			return read; +		else +			return -EINVAL; +	} + +	drm_gem_object_unreference(obj); +	mutex_unlock(&dev->struct_mutex); + +	return 0; +} + +/** + * Maps the contents of an object, returning the address it is mapped + * into. + * + * While the mapping holds a reference on the contents of the object, it doesn't + * imply a ref on the object itself. + */ +int +drm_gem_mmap_ioctl(struct drm_device *dev, void *data, +		   struct drm_file *file_priv) +{ +	struct drm_gem_mmap *args = data; +	struct drm_gem_object *obj; +	loff_t offset; +	unsigned long addr; + +	if (!(dev->driver->driver_features & DRIVER_GEM)) +		return -ENODEV; + +	obj = drm_gem_object_lookup(dev, file_priv, args->handle); +	if (obj == NULL) +		return -EINVAL; + +	offset = args->offset; + +	down_write(¤t->mm->mmap_sem); +	addr = do_mmap(obj->filp, 0, args->size, +		       PROT_READ | PROT_WRITE, MAP_SHARED, +		       args->offset); +	up_write(¤t->mm->mmap_sem); +	mutex_lock(&dev->struct_mutex); +	drm_gem_object_unreference(obj); +	mutex_unlock(&dev->struct_mutex); +	if (IS_ERR((void *)addr)) +		return addr; + +	args->addr_ptr = (uint64_t) addr; + +	return 0; +} + +/** + * Writes data to the object referenced by handle. + * + * On error, the contents of the buffer that were to be modified are undefined. + */ +int +drm_gem_pwrite_ioctl(struct drm_device *dev, void *data, +		     struct drm_file *file_priv) +{ +	struct drm_gem_pwrite *args = data; +	struct drm_gem_object *obj; +	ssize_t written; +	loff_t offset; +	int ret; + +	if (!(dev->driver->driver_features & DRIVER_GEM)) +		return -ENODEV; + +	obj = drm_gem_object_lookup(dev, file_priv, args->handle); +	if (obj == NULL) +		return -EINVAL; + +	mutex_lock(&dev->struct_mutex); +	if (dev->driver->gem_set_domain) { +		ret = dev->driver->gem_set_domain(obj, file_priv, +						  DRM_GEM_DOMAIN_CPU, +						  DRM_GEM_DOMAIN_CPU); +		if (ret) { +			drm_gem_object_unreference(obj); +			mutex_unlock(&dev->struct_mutex); +			return ret; +		} +	} +	offset = args->offset; + +	written = vfs_write(obj->filp, +			    (char __user *)(uintptr_t) args->data_ptr, +			    args->size, &offset); + +	if (written != args->size) { +		drm_gem_object_unreference(obj); +		mutex_unlock(&dev->struct_mutex); +		if (written < 0) +			return written; +		else +			return -EINVAL; +	} + +	if (dev->driver->gem_flush_pwrite) +		dev->driver->gem_flush_pwrite(obj, +					      args->offset, +					      args->size); + +	drm_gem_object_unreference(obj); +	mutex_unlock(&dev->struct_mutex); + +	return 0; +} + +/** + * Create a global name for an object, returning the name. + * + * Note that the name does not hold a reference; when the object + * is freed, the name goes away. + */ +int +drm_gem_flink_ioctl(struct drm_device *dev, void *data, +		    struct drm_file *file_priv) +{ +	struct drm_gem_flink *args = data; +	struct drm_gem_object *obj; +	int ret; + +	if (!(dev->driver->driver_features & DRIVER_GEM)) +		return -ENODEV; + +	obj = drm_gem_object_lookup(dev, file_priv, args->handle); +	if (obj == NULL) +		return -EINVAL; + +again: +	if (idr_pre_get(&dev->object_name_idr, GFP_KERNEL) == 0) +		return -ENOMEM; + +	spin_lock(&dev->object_name_lock); +	if (obj->name) { +		spin_unlock(&dev->object_name_lock); +		return -EEXIST; +	} +	ret = idr_get_new_above(&dev->object_name_idr, obj, 1, +				 &obj->name); +	spin_unlock(&dev->object_name_lock); +	if (ret == -EAGAIN) +		goto again; + +	if (ret != 0) { +		mutex_lock(&dev->struct_mutex); +		drm_gem_object_unreference(obj); +		mutex_unlock(&dev->struct_mutex); +		return ret; +	} + +	/* +	 * Leave the reference from the lookup around as the +	 * name table now holds one +	 */ +	args->name = (uint64_t) obj->name; + +	return 0; +} + +/** + * Open an object using the global name, returning a handle and the size. + * + * This handle (of course) holds a reference to the object, so the object + * will not go away until the handle is deleted. + */ +int +drm_gem_open_ioctl(struct drm_device *dev, void *data, +		   struct drm_file *file_priv) +{ +	struct drm_gem_open *args = data; +	struct drm_gem_object *obj; +	int ret; +	int handle; + +	if (!(dev->driver->driver_features & DRIVER_GEM)) +		return -ENODEV; + +	spin_lock(&dev->object_name_lock); +	obj = idr_find(&dev->object_name_idr, (int) args->name); +	if (obj) +		drm_gem_object_reference(obj); +	spin_unlock(&dev->object_name_lock); +	if (!obj) +		return -ENOENT; + +	ret = drm_gem_handle_create(file_priv, obj, &handle); +	mutex_lock(&dev->struct_mutex); +	drm_gem_object_unreference(obj); +	mutex_unlock(&dev->struct_mutex); +	if (ret) +		return ret; + +	args->handle = handle; +	args->size = obj->size; + +	return 0; +} + +/** + * Called when user space prepares to use an object + */ +int +drm_gem_set_domain_ioctl(struct drm_device *dev, void *data, +			  struct drm_file *file_priv) +{ +	struct drm_gem_set_domain *args = data; +	struct drm_gem_object *obj; +	int ret; + +	if (!(dev->driver->driver_features & DRIVER_GEM)) +		return -ENODEV; + +	obj = drm_gem_object_lookup(dev, file_priv, args->handle); +	if (obj == NULL) +		return -EINVAL; + +	mutex_lock(&dev->struct_mutex); +	if (dev->driver->gem_set_domain) { +		ret = dev->driver->gem_set_domain(obj, file_priv, +						   args->read_domains, +						   args->write_domain); +	} else { +		obj->read_domains = args->read_domains; +		obj->write_domain = args->write_domain; +		ret = 0; +	} +	drm_gem_object_unreference(obj); +	mutex_unlock(&dev->struct_mutex); +	return ret; +} + +/** + * Called at device open time, sets up the structure for handling refcounting + * of mm objects. + */ +void +drm_gem_open(struct drm_device *dev, struct drm_file *file_private) +{ +	idr_init(&file_private->object_idr); +	spin_lock_init(&file_private->table_lock); +} + +/** + * Called at device close to release the file's + * handle references on objects. + */ +static int +drm_gem_object_release_handle(int id, void *ptr, void *data) +{ +	struct drm_gem_object *obj = ptr; + +	drm_gem_object_handle_unreference(obj); + +	return 0; +} + +/** + * Called at close time when the filp is going away. + * + * Releases any remaining references on objects by this filp. + */ +void +drm_gem_release(struct drm_device *dev, struct drm_file *file_private) +{ +	mutex_lock(&dev->struct_mutex); +	idr_for_each(&file_private->object_idr, +		     &drm_gem_object_release_handle, NULL); + +	idr_destroy(&file_private->object_idr); +	mutex_unlock(&dev->struct_mutex); +} + +/** + * Called after the last reference to the object has been lost. + * + * Frees the object + */ +void +drm_gem_object_free(struct kref *kref) +{ +	struct drm_gem_object *obj = (struct drm_gem_object *) kref; +	struct drm_device *dev = obj->dev; + +	BUG_ON(!mutex_is_locked(&dev->struct_mutex)); + +	if (dev->driver->gem_free_object != NULL) +		dev->driver->gem_free_object(obj); + +	fput(obj->filp); +	atomic_dec(&dev->object_count); +	kfree(obj); +} +EXPORT_SYMBOL(drm_gem_object_free); + +/** + * Called after the last handle to the object has been closed + * + * Removes any name for the object. Note that this must be + * called before drm_gem_object_free or we'll be touching + * freed memory + */ +void +drm_gem_object_handle_free(struct kref *kref) +{ +	struct drm_gem_object *obj = container_of(kref, +						  struct drm_gem_object, +						  handlecount); +	struct drm_device *dev = obj->dev; + +	/* Remove any name for this object */ +	spin_lock(&dev->object_name_lock); +	if (obj->name) { +		idr_remove(&dev->object_name_idr, obj->name); +		spin_unlock(&dev->object_name_lock); +		/* +		 * The object name held a reference to this object, drop +		 * that now. +		 */ +		drm_gem_object_unreference(obj); +	} else +		spin_unlock(&dev->object_name_lock); + +} +EXPORT_SYMBOL(drm_gem_object_handle_free); + diff --git a/linux-core/drm_lock.c b/linux-core/drm_lock.c index d4c2da0c..58c5f08d 100644 --- a/linux-core/drm_lock.c +++ b/linux-core/drm_lock.c @@ -213,22 +213,16 @@ int drm_lock_take(struct drm_lock_data *lock_data,  	} while (prev != old);  	spin_unlock_bh(&lock_data->spinlock); -	if (_DRM_LOCKING_CONTEXT(old) == context) { -		if (old & _DRM_LOCK_HELD) { -			if (context != DRM_KERNEL_CONTEXT) { -				DRM_ERROR("%d holds heavyweight lock\n", -					  context); -			} -			return 0; +	/* Warn on recursive locking of user contexts. */ +	if (_DRM_LOCKING_CONTEXT(old) == context && _DRM_LOCK_IS_HELD(old)) { +		if (context != DRM_KERNEL_CONTEXT) { +			DRM_ERROR("%d holds heavyweight lock\n", +				  context);  		} +		return 0;  	} -	if ((_DRM_LOCKING_CONTEXT(new)) == context && (new & _DRM_LOCK_HELD)) { -		/* Have lock */ - -		return 1; -	} -	return 0; +	return !_DRM_LOCK_IS_HELD(old);  }  /** @@ -381,6 +375,60 @@ void drm_idlelock_release(struct drm_lock_data *lock_data)  }  EXPORT_SYMBOL(drm_idlelock_release); +/** + * Takes the lock on behalf of the client if needed, using the kernel context. + * + * This allows us to hide the hardware lock when it's required for protection + * of data structures (such as command ringbuffer) shared with the X Server, and + * a way for us to transition to lockless for those requests when the X Server + * stops accessing the ringbuffer directly, without having to update the + * other userland clients. + */ +int drm_client_lock_take(struct drm_device *dev, struct drm_file *file_priv) +{ +	int ret; +	unsigned long irqflags; + +	/* If the client has the lock, we're already done. */ +	if (drm_i_have_hw_lock(dev, file_priv)) +		return 0; + +	/* Client doesn't hold the lock.  Block taking the lock with the kernel +	 * context on behalf of the client, and return whether we were +	 * successful. +	 */ +	spin_lock_irqsave(&dev->lock.spinlock, irqflags); +	dev->lock.user_waiters++; +	spin_unlock_irqrestore(&dev->lock.spinlock, irqflags); +	ret = wait_event_interruptible(dev->lock.lock_queue, +				       drm_lock_take(&dev->lock, +						     DRM_KERNEL_CONTEXT)); +	spin_lock_irqsave(&dev->lock.spinlock, irqflags); +	dev->lock.user_waiters--; +	if (ret != 0) { +		spin_unlock_irqrestore(&dev->lock.spinlock, irqflags); +		return ret; +	} else { +		dev->lock.file_priv = file_priv; +		dev->lock.lock_time = jiffies; +		dev->lock.kernel_held = 1; +		file_priv->lock_count++; +		spin_unlock_irqrestore(&dev->lock.spinlock, irqflags); +		return 0; +	} +} +EXPORT_SYMBOL(drm_client_lock_take); + +void drm_client_lock_release(struct drm_device *dev) +{ +	if (dev->lock.kernel_held) { +		dev->lock.kernel_held = 0; +		dev->lock.file_priv = NULL; +		drm_lock_free(&dev->lock, DRM_KERNEL_CONTEXT); +	} +} +EXPORT_SYMBOL(drm_client_lock_release); +  int drm_i_have_hw_lock(struct drm_device *dev, struct drm_file *file_priv)  { diff --git a/linux-core/drm_memory.c b/linux-core/drm_memory.c index 75f5b521..4b494f9c 100644 --- a/linux-core/drm_memory.c +++ b/linux-core/drm_memory.c @@ -310,6 +310,7 @@ int drm_free_agp(DRM_AGP_MEM * handle, int pages)  {  	return drm_agp_free_memory(handle) ? 0 : -EINVAL;  } +EXPORT_SYMBOL(drm_free_agp);  /** Wrapper around agp_bind_memory() */  int drm_bind_agp(DRM_AGP_MEM * handle, unsigned int start) @@ -322,6 +323,7 @@ int drm_unbind_agp(DRM_AGP_MEM * handle)  {  	return drm_agp_unbind_memory(handle);  } +EXPORT_SYMBOL(drm_unbind_agp);  #else  /* __OS_HAS_AGP*/  static void *agp_remap(unsigned long offset, unsigned long size, diff --git a/linux-core/drm_mm.c b/linux-core/drm_memrange.c index 59110293..7014c4e2 100644 --- a/linux-core/drm_mm.c +++ b/linux-core/drm_memrange.c @@ -44,26 +44,26 @@  #include "drmP.h"  #include <linux/slab.h> -unsigned long drm_mm_tail_space(struct drm_mm *mm) +unsigned long drm_memrange_tail_space(struct drm_memrange *mm)  {  	struct list_head *tail_node; -	struct drm_mm_node *entry; +	struct drm_memrange_node *entry;  	tail_node = mm->ml_entry.prev; -	entry = list_entry(tail_node, struct drm_mm_node, ml_entry); +	entry = list_entry(tail_node, struct drm_memrange_node, ml_entry);  	if (!entry->free)  		return 0;  	return entry->size;  } -int drm_mm_remove_space_from_tail(struct drm_mm *mm, unsigned long size) +int drm_memrange_remove_space_from_tail(struct drm_memrange *mm, unsigned long size)  {  	struct list_head *tail_node; -	struct drm_mm_node *entry; +	struct drm_memrange_node *entry;  	tail_node = mm->ml_entry.prev; -	entry = list_entry(tail_node, struct drm_mm_node, ml_entry); +	entry = list_entry(tail_node, struct drm_memrange_node, ml_entry);  	if (!entry->free)  		return -ENOMEM; @@ -75,13 +75,13 @@ int drm_mm_remove_space_from_tail(struct drm_mm *mm, unsigned long size)  } -static int drm_mm_create_tail_node(struct drm_mm *mm, +static int drm_memrange_create_tail_node(struct drm_memrange *mm,  			    unsigned long start,  			    unsigned long size)  { -	struct drm_mm_node *child; +	struct drm_memrange_node *child; -	child = (struct drm_mm_node *) +	child = (struct drm_memrange_node *)  		drm_ctl_alloc(sizeof(*child), DRM_MEM_MM);  	if (!child)  		return -ENOMEM; @@ -98,26 +98,26 @@ static int drm_mm_create_tail_node(struct drm_mm *mm,  } -int drm_mm_add_space_to_tail(struct drm_mm *mm, unsigned long size) +int drm_memrange_add_space_to_tail(struct drm_memrange *mm, unsigned long size)  {  	struct list_head *tail_node; -	struct drm_mm_node *entry; +	struct drm_memrange_node *entry;  	tail_node = mm->ml_entry.prev; -	entry = list_entry(tail_node, struct drm_mm_node, ml_entry); +	entry = list_entry(tail_node, struct drm_memrange_node, ml_entry);  	if (!entry->free) { -		return drm_mm_create_tail_node(mm, entry->start + entry->size, size); +		return drm_memrange_create_tail_node(mm, entry->start + entry->size, size);  	}  	entry->size += size;  	return 0;  } -static struct drm_mm_node *drm_mm_split_at_start(struct drm_mm_node *parent, +static struct drm_memrange_node *drm_memrange_split_at_start(struct drm_memrange_node *parent,  					    unsigned long size)  { -	struct drm_mm_node *child; +	struct drm_memrange_node *child; -	child = (struct drm_mm_node *) +	child = (struct drm_memrange_node *)  		drm_ctl_alloc(sizeof(*child), DRM_MEM_MM);  	if (!child)  		return NULL; @@ -137,19 +137,19 @@ static struct drm_mm_node *drm_mm_split_at_start(struct drm_mm_node *parent,  	return child;  } -struct drm_mm_node *drm_mm_get_block(struct drm_mm_node * parent, +struct drm_memrange_node *drm_memrange_get_block(struct drm_memrange_node * parent,  				unsigned long size, unsigned alignment)  { -	struct drm_mm_node *align_splitoff = NULL; -	struct drm_mm_node *child; +	struct drm_memrange_node *align_splitoff = NULL; +	struct drm_memrange_node *child;  	unsigned tmp = 0;  	if (alignment)  		tmp = parent->start % alignment;  	if (tmp) { -		align_splitoff = drm_mm_split_at_start(parent, alignment - tmp); +		align_splitoff = drm_memrange_split_at_start(parent, alignment - tmp);  		if (!align_splitoff)  			return NULL;  	} @@ -159,40 +159,41 @@ struct drm_mm_node *drm_mm_get_block(struct drm_mm_node * parent,  		parent->free = 0;  		return parent;  	} else { -		child = drm_mm_split_at_start(parent, size); +		child = drm_memrange_split_at_start(parent, size);  	}  	if (align_splitoff) -		drm_mm_put_block(align_splitoff); +		drm_memrange_put_block(align_splitoff);  	return child;  } +EXPORT_SYMBOL(drm_memrange_get_block);  /*   * Put a block. Merge with the previous and / or next block if they are free.   * Otherwise add to the free stack.   */ -void drm_mm_put_block(struct drm_mm_node * cur) +void drm_memrange_put_block(struct drm_memrange_node * cur)  { -	struct drm_mm *mm = cur->mm; +	struct drm_memrange *mm = cur->mm;  	struct list_head *cur_head = &cur->ml_entry;  	struct list_head *root_head = &mm->ml_entry; -	struct drm_mm_node *prev_node = NULL; -	struct drm_mm_node *next_node; +	struct drm_memrange_node *prev_node = NULL; +	struct drm_memrange_node *next_node;  	int merged = 0;  	if (cur_head->prev != root_head) { -		prev_node = list_entry(cur_head->prev, struct drm_mm_node, ml_entry); +		prev_node = list_entry(cur_head->prev, struct drm_memrange_node, ml_entry);  		if (prev_node->free) {  			prev_node->size += cur->size;  			merged = 1;  		}  	}  	if (cur_head->next != root_head) { -		next_node = list_entry(cur_head->next, struct drm_mm_node, ml_entry); +		next_node = list_entry(cur_head->next, struct drm_memrange_node, ml_entry);  		if (next_node->free) {  			if (merged) {  				prev_node->size += next_node->size; @@ -215,16 +216,16 @@ void drm_mm_put_block(struct drm_mm_node * cur)  		drm_ctl_free(cur, sizeof(*cur), DRM_MEM_MM);  	}  } -EXPORT_SYMBOL(drm_mm_put_block); +EXPORT_SYMBOL(drm_memrange_put_block); -struct drm_mm_node *drm_mm_search_free(const struct drm_mm * mm, +struct drm_memrange_node *drm_memrange_search_free(const struct drm_memrange * mm,  				  unsigned long size,  				  unsigned alignment, int best_match)  {  	struct list_head *list;  	const struct list_head *free_stack = &mm->fl_entry; -	struct drm_mm_node *entry; -	struct drm_mm_node *best; +	struct drm_memrange_node *entry; +	struct drm_memrange_node *best;  	unsigned long best_size;  	unsigned wasted; @@ -232,7 +233,7 @@ struct drm_mm_node *drm_mm_search_free(const struct drm_mm * mm,  	best_size = ~0UL;  	list_for_each(list, free_stack) { -		entry = list_entry(list, struct drm_mm_node, fl_entry); +		entry = list_entry(list, struct drm_memrange_node, fl_entry);  		wasted = 0;  		if (entry->size < size) @@ -257,30 +258,31 @@ struct drm_mm_node *drm_mm_search_free(const struct drm_mm * mm,  	return best;  } +EXPORT_SYMBOL(drm_memrange_search_free); -int drm_mm_clean(struct drm_mm * mm) +int drm_memrange_clean(struct drm_memrange * mm)  {  	struct list_head *head = &mm->ml_entry;  	return (head->next->next == head);  } -int drm_mm_init(struct drm_mm * mm, unsigned long start, unsigned long size) +int drm_memrange_init(struct drm_memrange * mm, unsigned long start, unsigned long size)  {  	INIT_LIST_HEAD(&mm->ml_entry);  	INIT_LIST_HEAD(&mm->fl_entry); -	return drm_mm_create_tail_node(mm, start, size); +	return drm_memrange_create_tail_node(mm, start, size);  } -EXPORT_SYMBOL(drm_mm_init); +EXPORT_SYMBOL(drm_memrange_init); -void drm_mm_takedown(struct drm_mm * mm) +void drm_memrange_takedown(struct drm_memrange * mm)  {  	struct list_head *bnode = mm->fl_entry.next; -	struct drm_mm_node *entry; +	struct drm_memrange_node *entry; -	entry = list_entry(bnode, struct drm_mm_node, fl_entry); +	entry = list_entry(bnode, struct drm_memrange_node, fl_entry);  	if (entry->ml_entry.next != &mm->ml_entry ||  	    entry->fl_entry.next != &mm->fl_entry) { @@ -293,4 +295,4 @@ void drm_mm_takedown(struct drm_mm * mm)  	drm_ctl_free(entry, sizeof(*entry), DRM_MEM_MM);  } -EXPORT_SYMBOL(drm_mm_takedown); +EXPORT_SYMBOL(drm_memrange_takedown); diff --git a/linux-core/drm_objects.h b/linux-core/drm_objects.h index 770fbc56..6ec09ef8 100644 --- a/linux-core/drm_objects.h +++ b/linux-core/drm_objects.h @@ -301,7 +301,12 @@ struct drm_ttm_backend_func {  	void (*destroy) (struct drm_ttm_backend *backend);  }; - +/** + * This structure associates a set of flags and methods with a drm_ttm + * object, and will also be subclassed by the particular backend. + * + * \sa #drm_agp_ttm_backend + */  struct drm_ttm_backend {  	struct drm_device *dev;  	uint32_t flags; @@ -413,7 +418,7 @@ extern int drm_ttm_destroy(struct drm_ttm *ttm);   */  struct drm_bo_mem_reg { -	struct drm_mm_node *mm_node; +	struct drm_memrange_node *mm_node;  	unsigned long size;  	unsigned long num_pages;  	uint32_t page_alignment; @@ -494,7 +499,7 @@ struct drm_buffer_object {  	unsigned long num_pages;  	/* For pinned buffers */ -	struct drm_mm_node *pinned_node; +	struct drm_memrange_node *pinned_node;  	uint32_t pinned_mem_type;  	struct list_head pinned_lru; @@ -529,7 +534,7 @@ struct drm_mem_type_manager {  	int has_type;  	int use_type;  	int kern_init_type; -	struct drm_mm manager; +	struct drm_memrange manager;  	struct list_head lru;  	struct list_head pinned;  	uint32_t flags; diff --git a/linux-core/drm_proc.c b/linux-core/drm_proc.c index 42da5c69..b1b976b2 100644 --- a/linux-core/drm_proc.c +++ b/linux-core/drm_proc.c @@ -51,6 +51,10 @@ static int drm_bufs_info(char *buf, char **start, off_t offset,  			 int request, int *eof, void *data);  static int drm_objects_info(char *buf, char **start, off_t offset,  			 int request, int *eof, void *data); +static int drm_gem_name_info(char *buf, char **start, off_t offset, +			     int request, int *eof, void *data); +static int drm_gem_object_info(char *buf, char **start, off_t offset, +			       int request, int *eof, void *data);  #if DRM_DEBUG_CODE  static int drm_vma_info(char *buf, char **start, off_t offset,  			int request, int *eof, void *data); @@ -70,6 +74,8 @@ static struct drm_proc_list {  	{"queues", drm_queues_info},  	{"bufs", drm_bufs_info},  	{"objects", drm_objects_info}, +	{"gem_names", drm_gem_name_info}, +	{"gem_objects", drm_gem_object_info},  #if DRM_DEBUG_CODE  	{"vma", drm_vma_info},  #endif @@ -582,6 +588,79 @@ static int drm_clients_info(char *buf, char **start, off_t offset,  	return ret;  } +struct drm_gem_name_info_data { +	int			len; +	char			*buf; +	int			eof; +}; + +static int drm_gem_one_name_info (int id, void *ptr, void *data) +{ +	struct drm_gem_object *obj = ptr; +	struct drm_gem_name_info_data	*nid = data; + +	DRM_INFO ("name %d size %d\n", obj->name, obj->size); +	if (nid->eof) +		return 0; +	 +	nid->len += sprintf (&nid->buf[nid->len], +			     "%6d%9d%8d%9d\n", +			     obj->name, obj->size, +			     atomic_read(&obj->handlecount.refcount), +			     atomic_read(&obj->refcount.refcount)); +	if (nid->len > DRM_PROC_LIMIT) { +		nid->eof = 1; +		return 0; +	} +	return 0; +} + +static int drm_gem_name_info(char *buf, char **start, off_t offset, +			     int request, int *eof, void *data) +{ +	struct drm_minor *minor = (struct drm_minor *) data;  +	struct drm_device *dev = minor->dev; +	struct drm_gem_name_info_data nid; +	 +	if (offset > DRM_PROC_LIMIT) { +		*eof = 1; +		return 0; +	} + +	nid.len = sprintf (buf, "  name     size handles refcount\n"); +	nid.buf = buf; +	nid.eof = 0; +	idr_for_each (&dev->object_name_idr, drm_gem_one_name_info, &nid); +	 +	*start = &buf[offset]; +	*eof = 0; +	if (nid.len > request + offset) +		return request; +	*eof = 1; +	return nid.len - offset; +} + +static int drm_gem_object_info(char *buf, char **start, off_t offset, +			       int request, int *eof, void *data) +{ +	struct drm_minor *minor = (struct drm_minor *) data;  +	struct drm_device *dev = minor->dev; +	int len = 0; +	 +	if (offset > DRM_PROC_LIMIT) { +		*eof = 1; +		return 0; +	} + +	*start = &buf[offset]; +	*eof = 0; +	DRM_PROC_PRINT ("%d objects\n", atomic_read (&dev->object_count)); +	if (len > request + offset) +		return request; +	*eof = 1; +	return len - offset; +} +  #if DRM_DEBUG_CODE  static int drm__vma_info(char *buf, char **start, off_t offset, int request, diff --git a/linux-core/drm_sman.c b/linux-core/drm_sman.c index 8421a939..7c16f685 100644 --- a/linux-core/drm_sman.c +++ b/linux-core/drm_sman.c @@ -88,34 +88,34 @@ EXPORT_SYMBOL(drm_sman_init);  static void *drm_sman_mm_allocate(void *private, unsigned long size,  				  unsigned alignment)  { -	struct drm_mm *mm = (struct drm_mm *) private; -	struct drm_mm_node *tmp; +	struct drm_memrange *mm = (struct drm_memrange *) private; +	struct drm_memrange_node *tmp; -	tmp = drm_mm_search_free(mm, size, alignment, 1); +	tmp = drm_memrange_search_free(mm, size, alignment, 1);  	if (!tmp) {  		return NULL;  	} -	tmp = drm_mm_get_block(tmp, size, alignment); +	tmp = drm_memrange_get_block(tmp, size, alignment);  	return tmp;  }  static void drm_sman_mm_free(void *private, void *ref)  { -	struct drm_mm_node *node = (struct drm_mm_node *) ref; +	struct drm_memrange_node *node = (struct drm_memrange_node *) ref; -	drm_mm_put_block(node); +	drm_memrange_put_block(node);  }  static void drm_sman_mm_destroy(void *private)  { -	struct drm_mm *mm = (struct drm_mm *) private; -	drm_mm_takedown(mm); +	struct drm_memrange *mm = (struct drm_memrange *) private; +	drm_memrange_takedown(mm);  	drm_free(mm, sizeof(*mm), DRM_MEM_MM);  }  static unsigned long drm_sman_mm_offset(void *private, void *ref)  { -	struct drm_mm_node *node = (struct drm_mm_node *) ref; +	struct drm_memrange_node *node = (struct drm_memrange_node *) ref;  	return node->start;  } @@ -124,7 +124,7 @@ drm_sman_set_range(struct drm_sman * sman, unsigned int manager,  		   unsigned long start, unsigned long size)  {  	struct drm_sman_mm *sman_mm; -	struct drm_mm *mm; +	struct drm_memrange *mm;  	int ret;  	BUG_ON(manager >= sman->num_managers); @@ -135,7 +135,7 @@ drm_sman_set_range(struct drm_sman * sman, unsigned int manager,  		return -ENOMEM;  	}  	sman_mm->private = mm; -	ret = drm_mm_init(mm, start, size); +	ret = drm_memrange_init(mm, start, size);  	if (ret) {  		drm_free(mm, sizeof(*mm), DRM_MEM_MM); diff --git a/linux-core/drm_sman.h b/linux-core/drm_sman.h index 39a39fef..0299776c 100644 --- a/linux-core/drm_sman.h +++ b/linux-core/drm_sman.h @@ -45,7 +45,7 @@  /*   * A class that is an abstration of a simple memory allocator.   * The sman implementation provides a default such allocator - * using the drm_mm.c implementation. But the user can replace it. + * using the drm_memrange.c implementation. But the user can replace it.   * See the SiS implementation, which may use the SiS FB kernel module   * for memory management.   */ @@ -116,7 +116,7 @@ extern int drm_sman_init(struct drm_sman * sman, unsigned int num_managers,  			 unsigned int user_order, unsigned int owner_order);  /* - * Initialize a drm_mm.c allocator. Should be called only once for each + * Initialize a drm_memrange.c allocator. Should be called only once for each   * manager unless a customized allogator is used.   */ diff --git a/linux-core/drm_stub.c b/linux-core/drm_stub.c index c68adbaf..55841826 100644 --- a/linux-core/drm_stub.c +++ b/linux-core/drm_stub.c @@ -115,15 +115,15 @@ static int drm_fill_in_dev(struct drm_device * dev, struct pci_dev *pdev,  	if (drm_ht_create(&dev->map_hash, DRM_MAP_HASH_ORDER)) {  		return -ENOMEM;  	} -	if (drm_mm_init(&dev->offset_manager, DRM_FILE_PAGE_OFFSET_START, -			DRM_FILE_PAGE_OFFSET_SIZE)) { +	if (drm_memrange_init(&dev->offset_manager, DRM_FILE_PAGE_OFFSET_START, +			      DRM_FILE_PAGE_OFFSET_SIZE)) {  		drm_ht_remove(&dev->map_hash);  		return -ENOMEM;  	}  	if (drm_ht_create(&dev->object_hash, DRM_OBJECT_HASH_ORDER)) {  		drm_ht_remove(&dev->map_hash); -		drm_mm_takedown(&dev->offset_manager); +		drm_memrange_takedown(&dev->offset_manager);  		return -ENOMEM;  	} @@ -163,7 +163,16 @@ static int drm_fill_in_dev(struct drm_device * dev, struct pci_dev *pdev,  		goto error_out_unreg;  	} +	if (driver->driver_features & DRIVER_GEM) { +		retcode = drm_gem_init (dev); +		if (retcode) { +			DRM_ERROR("Cannot initialize graphics execution manager (GEM)\n"); +			goto error_out_unreg; +		} +	} +  	drm_fence_manager_init(dev); +  	return 0;  error_out_unreg: diff --git a/linux-core/i915_drv.c b/linux-core/i915_drv.c index 91c2da23..3f246a08 100644 --- a/linux-core/i915_drv.c +++ b/linux-core/i915_drv.c @@ -566,7 +566,7 @@ static struct drm_driver driver = {  	 */  	.driver_features =  	    DRIVER_USE_AGP | DRIVER_REQUIRE_AGP | /* DRIVER_USE_MTRR | */ -	    DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED, +	    DRIVER_HAVE_IRQ | DRIVER_IRQ_SHARED | DRIVER_GEM,  	.load = i915_driver_load,  	.unload = i915_driver_unload,  	.firstopen = i915_driver_firstopen, @@ -586,6 +586,10 @@ static struct drm_driver driver = {  	.get_map_ofs = drm_core_get_map_ofs,  	.get_reg_ofs = drm_core_get_reg_ofs,  	.ioctls = i915_ioctls, +	.gem_init_object = i915_gem_init_object, +	.gem_free_object = i915_gem_free_object, +	.gem_set_domain = i915_gem_set_domain, +	.gem_flush_pwrite = i915_gem_flush_pwrite,  	.fops = {  		.owner = THIS_MODULE,  		.open = drm_open, diff --git a/linux-core/i915_gem.c b/linux-core/i915_gem.c new file mode 100644 index 00000000..bcc15dd3 --- /dev/null +++ b/linux-core/i915_gem.c @@ -0,0 +1,1693 @@ +/* + * Copyright © 2008 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + *    Eric Anholt <eric@anholt.net> + * + */ + +#include "drmP.h" +#include "drm.h" +#include "i915_drm.h" +#include "i915_drv.h" + +#define WATCH_COHERENCY	0 +#define WATCH_BUF	0 +#define WATCH_EXEC	0 +#define WATCH_LRU	0 +#define WATCH_RELOC	0 + +static void +i915_gem_object_set_domain(struct drm_gem_object *obj, +			    uint32_t read_domains, +			    uint32_t write_domain); + +int +i915_gem_init_ioctl(struct drm_device *dev, void *data, +		    struct drm_file *file_priv) +{ +	drm_i915_private_t *dev_priv = dev->dev_private; +	struct drm_i915_gem_init *args = data; + +	mutex_lock(&dev->struct_mutex); + +	if (args->gtt_start >= args->gtt_end || +	    (args->gtt_start & (PAGE_SIZE - 1)) != 0 || +	    (args->gtt_end & (PAGE_SIZE - 1)) != 0) { +		mutex_unlock(&dev->struct_mutex); +		return -EINVAL; +	} + +	drm_memrange_init(&dev_priv->mm.gtt_space, args->gtt_start, +	    args->gtt_end - args->gtt_start); + +	mutex_unlock(&dev->struct_mutex); + +	return 0; +} + +static void +i915_gem_object_free_page_list(struct drm_gem_object *obj) +{ +	struct drm_i915_gem_object *obj_priv = obj->driver_private; +	int page_count = obj->size / PAGE_SIZE; +	int i; + +	if (obj_priv->page_list == NULL) +		return; + + +	for (i = 0; i < page_count; i++) +		if (obj_priv->page_list[i] != NULL) +			page_cache_release(obj_priv->page_list[i]); + +	drm_free(obj_priv->page_list, +		 page_count * sizeof(struct page *), +		 DRM_MEM_DRIVER); +	obj_priv->page_list = NULL; +} + +static void +i915_gem_object_move_to_active(struct drm_gem_object *obj) +{ +	struct drm_device *dev = obj->dev; +	drm_i915_private_t *dev_priv = dev->dev_private; +	struct drm_i915_gem_object *obj_priv = obj->driver_private; + +	/* Add a reference if we're newly entering the active list. */ +	if (!obj_priv->active) { +		drm_gem_object_reference(obj); +		obj_priv->active = 1; +	} +	/* Move from whatever list we were on to the tail of execution. */ +	list_move_tail(&obj_priv->list, +		       &dev_priv->mm.active_list); +} + +static void +i915_gem_object_move_to_inactive(struct drm_gem_object *obj) +{ +	struct drm_device *dev = obj->dev; +	drm_i915_private_t *dev_priv = dev->dev_private; +	struct drm_i915_gem_object *obj_priv = obj->driver_private; + +	if (obj_priv->pin_count != 0) +		list_del_init(&obj_priv->list); +	else +		list_move_tail(&obj_priv->list, &dev_priv->mm.inactive_list); + +	if (obj_priv->active) { +		obj_priv->active = 0; +		drm_gem_object_unreference(obj); +	} +} + +/** + * Creates a new sequence number, emitting a write of it to the status page + * plus an interrupt, which will trigger i915_user_interrupt_handler. + * + * Must be called with struct_lock held. + * + * Returned sequence numbers are nonzero on success. + */ +static uint32_t +i915_add_request(struct drm_device *dev, uint32_t flush_domains) +{ +	drm_i915_private_t *dev_priv = dev->dev_private; +	struct drm_i915_gem_request *request; +	uint32_t seqno; +	RING_LOCALS; + +	request = drm_calloc(1, sizeof(*request), DRM_MEM_DRIVER); +	if (request == NULL) +		return 0; + +	/* Grab the seqno we're going to make this request be, and bump the +	 * next (skipping 0 so it can be the reserved no-seqno value). +	 */ +	seqno = dev_priv->mm.next_gem_seqno; +	dev_priv->mm.next_gem_seqno++; +	if (dev_priv->mm.next_gem_seqno == 0) +		dev_priv->mm.next_gem_seqno++; + +	BEGIN_LP_RING(4); +	OUT_RING(CMD_STORE_DWORD_IDX); +	OUT_RING(I915_GEM_HWS_INDEX << STORE_DWORD_INDEX_SHIFT); +	OUT_RING(seqno); + +	OUT_RING(GFX_OP_USER_INTERRUPT); +	ADVANCE_LP_RING(); + +	DRM_DEBUG("%d\n", seqno); + +	request->seqno = seqno; +	request->emitted_jiffies = jiffies; +	request->flush_domains = flush_domains; +	list_add_tail(&request->list, &dev_priv->mm.request_list); + +	return seqno; +} + +/** + * Command execution barrier + * + * Ensures that all commands in the ring are finished + * before signalling the CPU + */ + +uint32_t +i915_retire_commands(struct drm_device *dev) +{ +	drm_i915_private_t *dev_priv = dev->dev_private; +	uint32_t cmd = CMD_MI_FLUSH | MI_NO_WRITE_FLUSH; +	uint32_t flush_domains = 0; +	RING_LOCALS; + +	/* The sampler always gets flushed on i965 (sigh) */ +	if (IS_I965G(dev)) +		flush_domains |= DRM_GEM_DOMAIN_I915_SAMPLER; +	BEGIN_LP_RING(2); +	OUT_RING(cmd); +	OUT_RING(0); /* noop */ +	ADVANCE_LP_RING(); +	return flush_domains; +} + +/** + * Moves buffers associated only with the given active seqno from the active + * to inactive list, potentially freeing them. + */ +static void +i915_gem_retire_request(struct drm_device *dev, +			struct drm_i915_gem_request *request) +{ +	drm_i915_private_t *dev_priv = dev->dev_private; + +	if (request->flush_domains != 0) { +		struct drm_i915_gem_object *obj_priv, *next; + +		/* First clear any buffers that were only waiting for a flush +		 * matching the one just retired. +		 */ + +		list_for_each_entry_safe(obj_priv, next, +					 &dev_priv->mm.flushing_list, list) { +			struct drm_gem_object *obj = obj_priv->obj; + +			if (obj->write_domain & request->flush_domains) { +				obj->write_domain = 0; +				i915_gem_object_move_to_inactive(obj); +			} +		} + +	} + +	/* Move any buffers on the active list that are no longer referenced +	 * by the ringbuffer to the flushing/inactive lists as appropriate. +	 */ +	while (!list_empty(&dev_priv->mm.active_list)) { +		struct drm_gem_object *obj; +		struct drm_i915_gem_object *obj_priv; + +		obj_priv = list_first_entry(&dev_priv->mm.active_list, +					    struct drm_i915_gem_object, +					    list); +		obj = obj_priv->obj; + +		/* If the seqno being retired doesn't match the oldest in the +		 * list, then the oldest in the list must still be newer than +		 * this seqno. +		 */ +		if (obj_priv->last_rendering_seqno != request->seqno) +			return; +#if WATCH_LRU +		DRM_INFO("%s: retire %d moves to inactive list %p\n", +			 __func__, request->seqno, obj); +#endif + +		if (obj->write_domain != 0) { +			list_move_tail(&obj_priv->list, +				       &dev_priv->mm.flushing_list); +		} else { +			i915_gem_object_move_to_inactive(obj); +		} +	} +} + +/** + * Returns true if seq1 is later than seq2. + */ +static int +i915_seqno_passed(uint32_t seq1, uint32_t seq2) +{ +	return (int32_t)(seq1 - seq2) >= 0; +} + +static uint32_t +i915_get_gem_seqno(struct drm_device *dev) +{ +	drm_i915_private_t *dev_priv = dev->dev_private; + +	return READ_HWSP(dev_priv, I915_GEM_HWS_INDEX); +} + +/** + * This function clears the request list as sequence numbers are passed. + */ +void +i915_gem_retire_requests(struct drm_device *dev) +{ +	drm_i915_private_t *dev_priv = dev->dev_private; +	uint32_t seqno; + +	seqno = i915_get_gem_seqno(dev); + +	while (!list_empty(&dev_priv->mm.request_list)) { +		struct drm_i915_gem_request *request; +		uint32_t retiring_seqno; + +		request = list_first_entry(&dev_priv->mm.request_list, +					   struct drm_i915_gem_request, +					   list); +		retiring_seqno = request->seqno; + +		if (i915_seqno_passed(seqno, retiring_seqno)) { +			i915_gem_retire_request(dev, request); + +			list_del(&request->list); +			drm_free(request, sizeof(*request), DRM_MEM_DRIVER); +		} else +		    break; +	} +} + +/** + * Waits for a sequence number to be signaled, and cleans up the + * request and object lists appropriately for that event. + */ +int +i915_wait_request(struct drm_device *dev, uint32_t seqno) +{ +	drm_i915_private_t *dev_priv = dev->dev_private; +	int ret = 0; + +	BUG_ON(seqno == 0); + +	if (!i915_seqno_passed(i915_get_gem_seqno(dev), seqno)) { +		i915_user_irq_on(dev_priv); +		ret = wait_event_interruptible(dev_priv->irq_queue, +					       i915_seqno_passed(i915_get_gem_seqno(dev), +								 seqno)); +		i915_user_irq_off(dev_priv); +	} + +	/* Directly dispatch request retiring.  While we have the work queue +	 * to handle this, the waiter on a request often wants an associated +	 * buffer to have made it to the inactive list, and we would need +	 * a separate wait queue to handle that. +	 */ +	if (ret == 0) +		i915_gem_retire_requests(dev); + +	return ret; +} + +static void +i915_gem_flush(struct drm_device *dev, +	       uint32_t invalidate_domains, +	       uint32_t flush_domains) +{ +	drm_i915_private_t *dev_priv = dev->dev_private; +	uint32_t cmd; +	RING_LOCALS; + +#if WATCH_EXEC +	DRM_INFO("%s: invalidate %08x flush %08x\n", __func__, +		  invalidate_domains, flush_domains); +#endif + +	if (flush_domains & DRM_GEM_DOMAIN_CPU) +		drm_agp_chipset_flush(dev); + +	if ((invalidate_domains|flush_domains) & ~DRM_GEM_DOMAIN_CPU) { +		/* +		 * read/write caches: +		 * +		 * DRM_GEM_DOMAIN_I915_RENDER is always invalidated, but is +		 * only flushed if MI_NO_WRITE_FLUSH is unset.  On 965, it is +		 * also flushed at 2d versus 3d pipeline switches. +		 * +		 * read-only caches: +		 * +		 * DRM_GEM_DOMAIN_I915_SAMPLER is flushed on pre-965 if +		 * MI_READ_FLUSH is set, and is always flushed on 965. +		 * +		 * DRM_GEM_DOMAIN_I915_COMMAND may not exist? +		 * +		 * DRM_GEM_DOMAIN_I915_INSTRUCTION, which exists on 965, is +		 * invalidated when MI_EXE_FLUSH is set. +		 * +		 * DRM_GEM_DOMAIN_I915_VERTEX, which exists on 965, is +		 * invalidated with every MI_FLUSH. +		 * +		 * TLBs: +		 * +		 * On 965, TLBs associated with DRM_GEM_DOMAIN_I915_COMMAND +		 * and DRM_GEM_DOMAIN_CPU in are invalidated at PTE write and +		 * DRM_GEM_DOMAIN_I915_RENDER and DRM_GEM_DOMAIN_I915_SAMPLER +		 * are flushed at any MI_FLUSH. +		 */ + +		cmd = CMD_MI_FLUSH | MI_NO_WRITE_FLUSH; +		if ((invalidate_domains|flush_domains) & +		    DRM_GEM_DOMAIN_I915_RENDER) +			cmd &= ~MI_NO_WRITE_FLUSH; +		if (!IS_I965G(dev)) { +			/* +			 * On the 965, the sampler cache always gets flushed +			 * and this bit is reserved. +			 */ +			if (invalidate_domains & DRM_GEM_DOMAIN_I915_SAMPLER) +				cmd |= MI_READ_FLUSH; +		} +		if (invalidate_domains & DRM_GEM_DOMAIN_I915_INSTRUCTION) +			cmd |= MI_EXE_FLUSH; + +#if WATCH_EXEC +		DRM_INFO("%s: queue flush %08x to ring\n", __func__, cmd); +#endif +		BEGIN_LP_RING(2); +		OUT_RING(cmd); +		OUT_RING(0); /* noop */ +		ADVANCE_LP_RING(); +	} +} + +/** + * Ensures that all rendering to the object has completed and the object is + * safe to unbind from the GTT or access from the CPU. + */ +static int +i915_gem_object_wait_rendering(struct drm_gem_object *obj) +{ +	struct drm_device *dev = obj->dev; +	struct drm_i915_gem_object *obj_priv = obj->driver_private; +	int ret; + +	/* If there are writes queued to the buffer, flush and +	 * create a new seqno to wait for. +	 */ +	if (obj->write_domain & ~(DRM_GEM_DOMAIN_CPU)) { +		uint32_t write_domain = obj->write_domain; +#if WATCH_BUF +		DRM_INFO("%s: flushing object %p from write domain %08x\n", +			  __func__, obj, write_domain); +#endif +		i915_gem_flush(dev, 0, write_domain); +		obj->write_domain = 0; + +		i915_gem_object_move_to_active(obj); +		obj_priv->last_rendering_seqno = i915_add_request(dev, +								  write_domain); +		BUG_ON(obj_priv->last_rendering_seqno == 0); +#if WATCH_LRU +		DRM_INFO("%s: flush moves to exec list %p\n", __func__, obj); +#endif +	} +	/* If there is rendering queued on the buffer being evicted, wait for +	 * it. +	 */ +	if (obj_priv->active) { +#if WATCH_BUF +		DRM_INFO("%s: object %p wait for seqno %08x\n", +			  __func__, obj, obj_priv->last_rendering_seqno); +#endif +		ret = i915_wait_request(dev, obj_priv->last_rendering_seqno); +		if (ret != 0) +			return ret; +	} + +	return 0; +} + +/** + * Unbinds an object from the GTT aperture. + */ +static void +i915_gem_object_unbind(struct drm_gem_object *obj) +{ +	struct drm_i915_gem_object *obj_priv = obj->driver_private; + +#if WATCH_BUF +	DRM_INFO("%s:%d %p\n", __func__, __LINE__, obj); +	DRM_INFO("gtt_space %p\n", obj_priv->gtt_space); +#endif +	if (obj_priv->gtt_space == NULL) +		return; + +	/* Move the object to the CPU domain to ensure that +	 * any possible CPU writes while it's not in the GTT +	 * are flushed when we go to remap it. This will +	 * also ensure that all pending GPU writes are finished +	 * before we unbind. +	 */ +	i915_gem_object_set_domain (obj, DRM_GEM_DOMAIN_CPU, +				    DRM_GEM_DOMAIN_CPU); + +	if (obj_priv->agp_mem != NULL) { +		drm_unbind_agp(obj_priv->agp_mem); +		drm_free_agp(obj_priv->agp_mem, obj->size / PAGE_SIZE); +		obj_priv->agp_mem = NULL; +	} + +	i915_gem_object_free_page_list(obj); + +	drm_memrange_put_block(obj_priv->gtt_space); +	obj_priv->gtt_space = NULL; + +	/* Remove ourselves from the LRU list if present. */ +	if (!list_empty(&obj_priv->list)) { +		list_del_init(&obj_priv->list); +		if (obj_priv->active) { +			DRM_ERROR("Failed to wait on buffer when unbinding, " +				  "continued anyway.\n"); +			obj_priv->active = 0; +			drm_gem_object_unreference(obj); +		} +	} +} + +#if WATCH_BUF | WATCH_EXEC +static void +i915_gem_dump_page(struct page *page, uint32_t start, uint32_t end, +		   uint32_t bias, uint32_t mark) +{ +	uint32_t *mem = kmap_atomic(page, KM_USER0); +	int i; +	for (i = start; i < end; i += 4) +		DRM_INFO("%08x: %08x%s\n", +			  (int) (bias + i), mem[i / 4], +			  (bias + i == mark) ? " ********" : ""); +	kunmap_atomic(mem, KM_USER0); +	/* give syslog time to catch up */ +	msleep(1); +} + +static void +i915_gem_dump_object(struct drm_gem_object *obj, int len, +		     const char *where, uint32_t mark) +{ +	struct drm_i915_gem_object *obj_priv = obj->driver_private; +	int page; + +	DRM_INFO("%s: object at offset %08x\n", where, obj_priv->gtt_offset); +	for (page = 0; page < (len + PAGE_SIZE-1) / PAGE_SIZE; page++) { +		int page_len, chunk, chunk_len; + +		page_len = len - page * PAGE_SIZE; +		if (page_len > PAGE_SIZE) +			page_len = PAGE_SIZE; + +		for (chunk = 0; chunk < page_len; chunk += 128) { +			chunk_len = page_len - chunk; +			if (chunk_len > 128) +				chunk_len = 128; +			i915_gem_dump_page(obj_priv->page_list[page], +					   chunk, chunk + chunk_len, +					   obj_priv->gtt_offset + +					   page * PAGE_SIZE, +					   mark); +		} +	} +} +#endif + +#if WATCH_LRU +static void +i915_dump_lru(struct drm_device *dev, const char *where) +{ +	drm_i915_private_t		*dev_priv = dev->dev_private; +	struct drm_i915_gem_object	*obj_priv; + +	DRM_INFO("active list %s {\n", where); +	list_for_each_entry(obj_priv, &dev_priv->mm.active_list, +			    list) +	{ +		DRM_INFO("    %p: %08x\n", obj_priv, +			 obj_priv->last_rendering_seqno); +	} +	DRM_INFO("}\n"); +	DRM_INFO("flushing list %s {\n", where); +	list_for_each_entry(obj_priv, &dev_priv->mm.flushing_list, +			    list) +	{ +		DRM_INFO("    %p: %08x\n", obj_priv, +			 obj_priv->last_rendering_seqno); +	} +	DRM_INFO("}\n"); +	DRM_INFO("inactive %s {\n", where); +	list_for_each_entry(obj_priv, &dev_priv->mm.inactive_list, list) { +		DRM_INFO("    %p: %08x\n", obj_priv, +			 obj_priv->last_rendering_seqno); +	} +	DRM_INFO("}\n"); +} +#endif + +static int +i915_gem_evict_something(struct drm_device *dev) +{ +	drm_i915_private_t *dev_priv = dev->dev_private; +	struct drm_gem_object *obj; +	struct drm_i915_gem_object *obj_priv; + +	for (;;) { +		/* If there's an inactive buffer available now, grab it +		 * and be done. +		 */ +		if (!list_empty(&dev_priv->mm.inactive_list)) { +			obj_priv = list_first_entry(&dev_priv->mm.inactive_list, +						    struct drm_i915_gem_object, +						    list); +			obj = obj_priv->obj; +			BUG_ON(obj_priv->pin_count != 0); +			break; +		} + +		/* If we didn't get anything, but the ring is still processing +		 * things, wait for one of those things to finish and hopefully +		 * leave us a buffer to evict. +		 */ +		if (!list_empty(&dev_priv->mm.request_list)) { +			struct drm_i915_gem_request *request; +			int ret; + +			request = list_first_entry(&dev_priv->mm.request_list, +						   struct drm_i915_gem_request, +						   list); + +			ret = i915_wait_request(dev, request->seqno); +			if (ret != 0) +				return ret; + +			continue; +		} + +		/* If we didn't have anything on the request list but there +		 * are buffers awaiting a flush, emit one and try again. +		 * When we wait on it, those buffers waiting for that flush +		 * will get moved to inactive. +		 */ +		if (!list_empty(&dev_priv->mm.flushing_list)) { +			obj_priv = list_first_entry(&dev_priv->mm.flushing_list, +						    struct drm_i915_gem_object, +						    list); +			obj = obj_priv->obj; + +			i915_gem_flush(dev, +				       obj->write_domain, +				       obj->write_domain); +			i915_add_request(dev, obj->write_domain); + +			obj = NULL; +			continue; +		} + +		/* If we didn't do any of the above, there's nothing to be done +		 * and we just can't fit it in. +		 */ +		return -ENOMEM; +	} + +#if WATCH_LRU +	DRM_INFO("%s: evicting %p\n", __func__, obj); +#endif + +	BUG_ON(obj_priv->active); + +	/* Wait on the rendering and unbind the buffer. */ +	i915_gem_object_unbind(obj); + +	return 0; +} + +static int +i915_gem_object_get_page_list(struct drm_gem_object *obj) +{ +	struct drm_i915_gem_object *obj_priv = obj->driver_private; +	int page_count, i; +	if (obj_priv->page_list) +		return 0; + +	/* Get the list of pages out of our struct file.  They'll be pinned +	 * at this point until we release them. +	 */ +	page_count = obj->size / PAGE_SIZE; +	BUG_ON(obj_priv->page_list != NULL); +	obj_priv->page_list = drm_calloc(page_count, sizeof(struct page *), +					 DRM_MEM_DRIVER); +	if (obj_priv->page_list == NULL) +		return -ENOMEM; + +	for (i = 0; i < page_count; i++) { +		obj_priv->page_list[i] = +		    find_or_create_page(obj->filp->f_mapping, i, GFP_HIGHUSER); + +		if (obj_priv->page_list[i] == NULL) { +			i915_gem_object_free_page_list(obj); +			return -ENOMEM; +		} +		unlock_page(obj_priv->page_list[i]); +	} +	return 0; +} + +/** + * Finds free space in the GTT aperture and binds the object there. + */ +static int +i915_gem_object_bind_to_gtt(struct drm_gem_object *obj, unsigned alignment) +{ +	struct drm_device *dev = obj->dev; +	drm_i915_private_t *dev_priv = dev->dev_private; +	struct drm_i915_gem_object *obj_priv = obj->driver_private; +	struct drm_memrange_node *free_space; +	int page_count, ret; + +	if (alignment == 0) +		alignment = PAGE_SIZE; +	if (alignment & (PAGE_SIZE - 1)) { +		DRM_ERROR("Invalid object alignment requested %u\n", alignment); +		return -EINVAL; +	} + + search_free: +	free_space = drm_memrange_search_free(&dev_priv->mm.gtt_space, +					      obj->size, +					      alignment, 0); +	if (free_space != NULL) { +		obj_priv->gtt_space = +			drm_memrange_get_block(free_space, obj->size, +					       alignment); +		if (obj_priv->gtt_space != NULL) { +			obj_priv->gtt_space->private = obj; +			obj_priv->gtt_offset = obj_priv->gtt_space->start; +		} +	} +	if (obj_priv->gtt_space == NULL) { +		/* If the gtt is empty and we're still having trouble +		 * fitting our object in, we're out of memory. +		 */ +#if WATCH_LRU +		DRM_INFO("%s: GTT full, evicting something\n", __func__); +#endif +		if (list_empty(&dev_priv->mm.inactive_list) && +		    list_empty(&dev_priv->mm.active_list)) { +			DRM_ERROR("GTT full, but LRU list empty\n"); +			return -ENOMEM; +		} + +		ret = i915_gem_evict_something(dev); +		if (ret != 0) +			return ret; +		goto search_free; +	} + +#if WATCH_BUF +	DRM_INFO("Binding object of size %d at 0x%08x\n", +		 obj->size, obj_priv->gtt_offset); +#endif +	ret = i915_gem_object_get_page_list(obj); +	if (ret) { +		drm_memrange_put_block(obj_priv->gtt_space); +		obj_priv->gtt_space = NULL; +		return ret; +	} + +	page_count = obj->size / PAGE_SIZE; +	/* Create an AGP memory structure pointing at our pages, and bind it +	 * into the GTT. +	 */ +	obj_priv->agp_mem = drm_agp_bind_pages(dev, +					       obj_priv->page_list, +					       page_count, +					       obj_priv->gtt_offset); +	if (obj_priv->agp_mem == NULL) { +		i915_gem_object_free_page_list(obj); +		drm_memrange_put_block(obj_priv->gtt_space); +		obj_priv->gtt_space = NULL; +		return -ENOMEM; +	} + +	/* Assert that the object is not currently in any GPU domain. As it +	 * wasn't in the GTT, there shouldn't be any way it could have been in +	 * a GPU cache +	 */ +	BUG_ON(obj->read_domains & ~DRM_GEM_DOMAIN_CPU); +	BUG_ON(obj->write_domain & ~DRM_GEM_DOMAIN_CPU); + +	return 0; +} + +static void +i915_gem_clflush_object(struct drm_gem_object *obj) +{ +	struct drm_i915_gem_object	*obj_priv = obj->driver_private; + +	/* If we don't have a page list set up, then we're not pinned +	 * to GPU, and we can ignore the cache flush because it'll happen +	 * again at bind time. +	 */ +	if (obj_priv->page_list == NULL) +		return; + +	drm_ttm_cache_flush(obj_priv->page_list, obj->size / PAGE_SIZE); +} + +/* + * Set the next domain for the specified object. This + * may not actually perform the necessary flushing/invaliding though, + * as that may want to be batched with other set_domain operations + * + * This is (we hope) the only really tricky part of gem. The goal + * is fairly simple -- track which caches hold bits of the object + * and make sure they remain coherent. A few concrete examples may + * help to explain how it works. For shorthand, we use the notation + * (read_domains, write_domain), e.g. (CPU, CPU) to indicate the + * a pair of read and write domain masks. + * + * Case 1: the batch buffer + * + *	1. Allocated + *	2. Written by CPU + *	3. Mapped to GTT + *	4. Read by GPU + *	5. Unmapped from GTT + *	6. Freed + * + *	Let's take these a step at a time + * + *	1. Allocated + *		Pages allocated from the kernel may still have + *		cache contents, so we set them to (CPU, CPU) always. + *	2. Written by CPU (using pwrite) + *		The pwrite function calls set_domain (CPU, CPU) and + *		this function does nothing (as nothing changes) + *	3. Mapped by GTT + *		This function asserts that the object is not + *		currently in any GPU-based read or write domains + *	4. Read by GPU + *		i915_gem_execbuffer calls set_domain (COMMAND, 0). + *		As write_domain is zero, this function adds in the + *		current read domains (CPU+COMMAND, 0). + *		flush_domains is set to CPU. + *		invalidate_domains is set to COMMAND + *		clflush is run to get data out of the CPU caches + *		then i915_dev_set_domain calls i915_gem_flush to + *		emit an MI_FLUSH and drm_agp_chipset_flush + *	5. Unmapped from GTT + *		i915_gem_object_unbind calls set_domain (CPU, CPU) + *		flush_domains and invalidate_domains end up both zero + *		so no flushing/invalidating happens + *	6. Freed + *		yay, done + * + * Case 2: The shared render buffer + * + *	1. Allocated + *	2. Mapped to GTT + *	3. Read/written by GPU + *	4. set_domain to (CPU,CPU) + *	5. Read/written by CPU + *	6. Read/written by GPU + * + *	1. Allocated + *		Same as last example, (CPU, CPU) + *	2. Mapped to GTT + *		Nothing changes (assertions find that it is not in the GPU) + *	3. Read/written by GPU + *		execbuffer calls set_domain (RENDER, RENDER) + *		flush_domains gets CPU + *		invalidate_domains gets GPU + *		clflush (obj) + *		MI_FLUSH and drm_agp_chipset_flush + *	4. set_domain (CPU, CPU) + *		flush_domains gets GPU + *		invalidate_domains gets CPU + *		wait_rendering (obj) to make sure all drawing is complete. + *		This will include an MI_FLUSH to get the data from GPU + *		to memory + *		clflush (obj) to invalidate the CPU cache + *		Another MI_FLUSH in i915_gem_flush (eliminate this somehow?) + *	5. Read/written by CPU + *		cache lines are loaded and dirtied + *	6. Read written by GPU + *		Same as last GPU access + * + * Case 3: The constant buffer + * + *	1. Allocated + *	2. Written by CPU + *	3. Read by GPU + *	4. Updated (written) by CPU again + *	5. Read by GPU + * + *	1. Allocated + *		(CPU, CPU) + *	2. Written by CPU + *		(CPU, CPU) + *	3. Read by GPU + *		(CPU+RENDER, 0) + *		flush_domains = CPU + *		invalidate_domains = RENDER + *		clflush (obj) + *		MI_FLUSH + *		drm_agp_chipset_flush + *	4. Updated (written) by CPU again + *		(CPU, CPU) + *		flush_domains = 0 (no previous write domain) + *		invalidate_domains = 0 (no new read domains) + *	5. Read by GPU + *		(CPU+RENDER, 0) + *		flush_domains = CPU + *		invalidate_domains = RENDER + *		clflush (obj) + *		MI_FLUSH + *		drm_agp_chipset_flush + */ +static void +i915_gem_object_set_domain(struct drm_gem_object *obj, +			    uint32_t read_domains, +			    uint32_t write_domain) +{ +	struct drm_device		*dev = obj->dev; +	uint32_t			invalidate_domains = 0; +	uint32_t			flush_domains = 0; + +#if WATCH_BUF +	DRM_INFO("%s: object %p read %08x write %08x\n", +		 __func__, obj, read_domains, write_domain); +#endif +	/* +	 * If the object isn't moving to a new write domain, +	 * let the object stay in multiple read domains +	 */ +	if (write_domain == 0) +		read_domains |= obj->read_domains; + +	/* +	 * Flush the current write domain if +	 * the new read domains don't match. Invalidate +	 * any read domains which differ from the old +	 * write domain +	 */ +	if (obj->write_domain && obj->write_domain != read_domains) { +		flush_domains |= obj->write_domain; +		invalidate_domains |= read_domains & ~obj->write_domain; +	} +	/* +	 * Invalidate any read caches which may have +	 * stale data. That is, any new read domains. +	 */ +	invalidate_domains |= read_domains & ~obj->read_domains; +	if ((flush_domains | invalidate_domains) & DRM_GEM_DOMAIN_CPU) { +#if WATCH_BUF +		DRM_INFO("%s: CPU domain flush %08x invalidate %08x\n", +			 __func__, flush_domains, invalidate_domains); +#endif +		/* +		 * If we're invaliding the CPU cache and flushing a GPU cache, +		 * then pause for rendering so that the GPU caches will be +		 * flushed before the cpu cache is invalidated +		 */ +		if ((invalidate_domains & DRM_GEM_DOMAIN_CPU) && +		    (flush_domains & ~DRM_GEM_DOMAIN_CPU)) +			i915_gem_object_wait_rendering(obj); +		i915_gem_clflush_object(obj); +	} + +	if ((write_domain | flush_domains) != 0) +		obj->write_domain = write_domain; +	obj->read_domains = read_domains; +	dev->invalidate_domains |= invalidate_domains; +	dev->flush_domains |= flush_domains; +} + +/** + * Once all of the objects have been set in the proper domain, + * perform the necessary flush and invalidate operations. + * + * Returns the write domains flushed, for use in flush tracking. + */ +static uint32_t +i915_gem_dev_set_domain(struct drm_device *dev) +{ +	uint32_t flush_domains = dev->flush_domains; + +	/* +	 * Now that all the buffers are synced to the proper domains, +	 * flush and invalidate the collected domains +	 */ +	if (dev->invalidate_domains | dev->flush_domains) { +#if WATCH_EXEC +		DRM_INFO("%s: invalidate_domains %08x flush_domains %08x\n", +			  __func__, +			 dev->invalidate_domains, +			 dev->flush_domains); +#endif +		i915_gem_flush(dev, +			       dev->invalidate_domains, +			       dev->flush_domains); +		dev->invalidate_domains = 0; +		dev->flush_domains = 0; +	} + +	return flush_domains; +} + +#if WATCH_COHERENCY +static void +i915_gem_object_check_coherency(struct drm_gem_object *obj, int handle) +{ +	struct drm_device *dev = obj->dev; +	struct drm_i915_gem_object *obj_priv = obj->driver_private; +	int page; +	uint32_t *gtt_mapping; +	uint32_t *backing_map = NULL; +	int bad_count = 0; + +	DRM_INFO("%s: checking coherency of object %p@0x%08x (%d, %dkb):\n", +		 __FUNCTION__, obj, obj_priv->gtt_offset, handle, +		 obj->size / 1024); + +	gtt_mapping = ioremap(dev->agp->base + obj_priv->gtt_offset, +			      obj->size); +	if (gtt_mapping == NULL) { +		DRM_ERROR("failed to map GTT space\n"); +		return; +	} + +	for (page = 0; page < obj->size / PAGE_SIZE; page++) { +		int i; + +		backing_map = kmap_atomic(obj_priv->page_list[page], KM_USER0); + +		if (backing_map == NULL) { +			DRM_ERROR("failed to map backing page\n"); +			goto out; +		} + +		for (i = 0; i < PAGE_SIZE / 4; i++) { +			uint32_t cpuval = backing_map[i]; +			uint32_t gttval = readl(gtt_mapping + +						page * 1024 + i); + +			if (cpuval != gttval) { +				DRM_INFO("incoherent CPU vs GPU at 0x%08x: " +					 "0x%08x vs 0x%08x\n", +					 (int)(obj_priv->gtt_offset + +					       page * PAGE_SIZE + i * 4), +					 cpuval, gttval); +				if (bad_count++ >= 8) { +					DRM_INFO("...\n"); +					goto out; +				} +			} +		} +		kunmap_atomic(backing_map, KM_USER0); +		backing_map = NULL; +	} + + out: +	if (backing_map != NULL) +		kunmap_atomic(backing_map, KM_USER0); +	iounmap(gtt_mapping); + +	/* give syslog time to catch up */ +	msleep(1); + +	/* Directly flush the object, since we just loaded values with the CPU +	 * from thebacking pages and we don't want to disturb the cache +	 * management that we're trying to observe. +	 */ + +	i915_gem_clflush_object(obj); +} +#endif + +static int +i915_gem_reloc_and_validate_object(struct drm_gem_object *obj, +				   struct drm_file *file_priv, +				   struct drm_i915_gem_exec_object *entry) +{ +	struct drm_device *dev = obj->dev; +	struct drm_i915_gem_relocation_entry reloc; +	struct drm_i915_gem_relocation_entry __user *relocs; +	struct drm_i915_gem_object *obj_priv = obj->driver_private; +	int i; +	uint32_t last_reloc_offset = -1; +	void *reloc_page = NULL; + +	/* Choose the GTT offset for our buffer and put it there. */ +	if (obj_priv->gtt_space == NULL) { +		i915_gem_object_bind_to_gtt(obj, (unsigned) entry->alignment); +		if (obj_priv->gtt_space == NULL) +			return -ENOMEM; +	} + +	entry->offset = obj_priv->gtt_offset; + +	relocs = (struct drm_i915_gem_relocation_entry __user *) +		 (uintptr_t) entry->relocs_ptr; +	/* Apply the relocations, using the GTT aperture to avoid cache +	 * flushing requirements. +	 */ +	for (i = 0; i < entry->relocation_count; i++) { +		struct drm_gem_object *target_obj; +		struct drm_i915_gem_object *target_obj_priv; +		uint32_t reloc_val, reloc_offset, *reloc_entry; +		int ret; + +		ret = copy_from_user(&reloc, relocs + i, sizeof(reloc)); +		if (ret != 0) +			return ret; + +		target_obj = drm_gem_object_lookup(obj->dev, file_priv, +						   reloc.target_handle); +		if (target_obj == NULL) +			return -EINVAL; +		target_obj_priv = target_obj->driver_private; + +		/* The target buffer should have appeared before us in the +		 * validate list, so it should have a GTT space bound by now. +		 */ +		if (target_obj_priv->gtt_space == NULL) { +			DRM_ERROR("No GTT space found for object %d\n", +				  reloc.target_handle); +			drm_gem_object_unreference(target_obj); +			return -EINVAL; +		} + +		if (reloc.offset > obj->size - 4) { +			DRM_ERROR("Relocation beyond object bounds: " +				  "obj %p target %d offset %d size %d.\n", +				  obj, reloc.target_handle, +				  (int) reloc.offset, (int) obj->size); +			drm_gem_object_unreference(target_obj); +			return -EINVAL; +		} +		if (reloc.offset & 3) { +			DRM_ERROR("Relocation not 4-byte aligned: " +				  "obj %p target %d offset %d.\n", +				  obj, reloc.target_handle, +				  (int) reloc.offset); +			drm_gem_object_unreference(target_obj); +			return -EINVAL; +		} + +		if (reloc.write_domain && target_obj->pending_write_domain && +		    reloc.write_domain != target_obj->pending_write_domain) { +			DRM_ERROR("Write domain conflict: " +				  "obj %p target %d offset %d " +				  "new %08x old %08x\n", +				  obj, reloc.target_handle, +				  (int) reloc.offset, +				  reloc.write_domain, +				  target_obj->pending_write_domain); +			drm_gem_object_unreference(target_obj); +			return -EINVAL; +		} + +#if WATCH_RELOC +		DRM_INFO("%s: obj %p offset %08x target %d " +			 "read %08x write %08x gtt %08x " +			 "presumed %08x delta %08x\n", +			 __func__, +			 obj, +			 (int) reloc.offset, +			 (int) reloc.target_handle, +			 (int) reloc.read_domains, +			 (int) reloc.write_domain, +			 (int) target_obj_priv->gtt_offset, +			 (int) reloc.presumed_offset, +			 reloc.delta); +#endif + +		target_obj->pending_read_domains |= reloc.read_domains; +		target_obj->pending_write_domain |= reloc.write_domain; + +		/* If the relocation already has the right value in it, no +		 * more work needs to be done. +		 */ +		if (target_obj_priv->gtt_offset == reloc.presumed_offset) { +			drm_gem_object_unreference(target_obj); +			continue; +		} + +		/* Now that we're going to actually write some data in, +		 * make sure that any rendering using this buffer's contents +		 * is completed. +		 */ +		i915_gem_object_wait_rendering(obj); + +		/* As we're writing through the gtt, flush +		 * any CPU writes before we write the relocations +		 */ +		if (obj->write_domain & DRM_GEM_DOMAIN_CPU) { +			i915_gem_clflush_object(obj); +			drm_agp_chipset_flush(dev); +			obj->write_domain = 0; +		} + +		/* Map the page containing the relocation we're going to +		 * perform. +		 */ +		reloc_offset = obj_priv->gtt_offset + reloc.offset; +		if (reloc_page == NULL || +		    (last_reloc_offset & ~(PAGE_SIZE - 1)) != +		    (reloc_offset & ~(PAGE_SIZE - 1))) { +			if (reloc_page != NULL) +				iounmap(reloc_page); + +			reloc_page = ioremap(dev->agp->base + +					     (reloc_offset & ~(PAGE_SIZE - 1)), +					     PAGE_SIZE); +			last_reloc_offset = reloc_offset; +			if (reloc_page == NULL) { +				drm_gem_object_unreference(target_obj); +				return -ENOMEM; +			} +		} + +		reloc_entry = (uint32_t *)((char *)reloc_page + +					   (reloc_offset & (PAGE_SIZE - 1))); +		reloc_val = target_obj_priv->gtt_offset + reloc.delta; + +#if WATCH_BUF +		DRM_INFO("Applied relocation: %p@0x%08x %08x -> %08x\n", +			  obj, (unsigned int) reloc.offset, +			  readl(reloc_entry), reloc_val); +#endif +		writel(reloc_val, reloc_entry); + +		/* Write the updated presumed offset for this entry back out +		 * to the user. +		 */ +		reloc.presumed_offset = target_obj_priv->gtt_offset; +		ret = copy_to_user(relocs + i, &reloc, sizeof(reloc)); +		if (ret != 0) { +			drm_gem_object_unreference(target_obj); +			return ret; +		} + +		drm_gem_object_unreference(target_obj); +	} + +	if (reloc_page != NULL) +		iounmap(reloc_page); + +#if WATCH_BUF +	if (0) +		i915_gem_dump_object(obj, 128, __func__, ~0); +#endif +	return 0; +} + +static int +i915_dispatch_gem_execbuffer(struct drm_device *dev, +			      struct drm_i915_gem_execbuffer *exec, +			      uint64_t exec_offset) +{ +	drm_i915_private_t *dev_priv = dev->dev_private; +	struct drm_clip_rect __user *boxes = (struct drm_clip_rect __user *) +					     (uintptr_t) exec->cliprects_ptr; +	int nbox = exec->num_cliprects; +	int i = 0, count; +	uint32_t	exec_start, exec_len; +	RING_LOCALS; + +	exec_start = (uint32_t) exec_offset + exec->batch_start_offset; +	exec_len = (uint32_t) exec->batch_len; + +	if ((exec_start | exec_len) & 0x7) { +		DRM_ERROR("alignment\n"); +		return -EINVAL; +	} + +	if (!exec_start) +		return -EINVAL; + +	count = nbox ? nbox : 1; + +	for (i = 0; i < count; i++) { +		if (i < nbox) { +			int ret = i915_emit_box(dev, boxes, i, +						exec->DR1, exec->DR4); +			if (ret) +				return ret; +		} + +		if (dev_priv->use_mi_batchbuffer_start) { +			BEGIN_LP_RING(2); +			if (IS_I965G(dev)) { +				OUT_RING(MI_BATCH_BUFFER_START | +					 (2 << 6) | +					 MI_BATCH_NON_SECURE_I965); +				OUT_RING(exec_start); +			} else { +				OUT_RING(MI_BATCH_BUFFER_START | +					 (2 << 6)); +				OUT_RING(exec_start | MI_BATCH_NON_SECURE); +			} +			ADVANCE_LP_RING(); + +		} else { +			BEGIN_LP_RING(4); +			OUT_RING(MI_BATCH_BUFFER); +			OUT_RING(exec_start | MI_BATCH_NON_SECURE); +			OUT_RING(exec_start + exec_len - 4); +			OUT_RING(0); +			ADVANCE_LP_RING(); +		} +	} + +	/* XXX breadcrumb */ +	return 0; +} + +/* Throttle our rendering by waiting until the ring has completed our requests + * emitted over 20 msec ago. + * + * This should get us reasonable parallelism between CPU and GPU but also + * relatively low latency when blocking on a particular request to finish. + */ +static int +i915_gem_ring_throttle(struct drm_device *dev) +{ +	drm_i915_private_t *dev_priv = dev->dev_private; +	int ret = 0; + +	mutex_lock(&dev->struct_mutex); +	while (!list_empty(&dev_priv->mm.request_list)) { +		struct drm_i915_gem_request *request; + +		request = list_first_entry(&dev_priv->mm.request_list, +					   struct drm_i915_gem_request, +					   list); + +		/* Break out if we're close enough. */ +		if ((long) (jiffies - request->emitted_jiffies) <= (20 * HZ) / 1000) { +			mutex_unlock(&dev->struct_mutex); +			return 0; +		} + +		/* Wait on the last request if not. */ +		ret = i915_wait_request(dev, request->seqno); +		if (ret != 0) { +			mutex_unlock(&dev->struct_mutex); +			return ret; +		} +	} +	mutex_unlock(&dev->struct_mutex); +	return ret; +} + +int +i915_gem_execbuffer(struct drm_device *dev, void *data, +		    struct drm_file *file_priv) +{ +	struct drm_i915_gem_execbuffer *args = data; +	struct drm_i915_gem_exec_object *validate_list = NULL; +	struct drm_gem_object **object_list = NULL; +	struct drm_gem_object *batch_obj; +	int ret, i; +	uint64_t exec_offset; +	uint32_t seqno, flush_domains; + +	LOCK_TEST_WITH_RETURN(dev, file_priv); + +#if WATCH_EXEC +	DRM_INFO("buffers_ptr %d buffer_count %d len %08x\n", +		  (int) args->buffers_ptr, args->buffer_count, args->batch_len); +#endif +	i915_kernel_lost_context(dev); + +	ret = i915_gem_ring_throttle(dev); +	if (ret) +		return ret; + +	/* Copy in the validate list from userland */ +	validate_list = drm_calloc(sizeof(*validate_list), args->buffer_count, +				   DRM_MEM_DRIVER); +	object_list = drm_calloc(sizeof(*object_list), args->buffer_count, +				 DRM_MEM_DRIVER); +	if (validate_list == NULL || object_list == NULL) { +		DRM_ERROR("Failed to allocate validate or object list " +			  "for %d buffers\n", +			  args->buffer_count); +		ret = -ENOMEM; +		goto err; +	} +	ret = copy_from_user(validate_list, +			     (struct drm_i915_relocation_entry __user *) +			     (uintptr_t) args->buffers_ptr, +			     sizeof(*validate_list) * args->buffer_count); +	if (ret != 0) { +		DRM_ERROR("copy %d validate entries failed %d\n", +			  args->buffer_count, ret); +		goto err; +	} + +	mutex_lock(&dev->struct_mutex); +	/* Look up object handles and perform the relocations */ +	for (i = 0; i < args->buffer_count; i++) { +		object_list[i] = drm_gem_object_lookup(dev, file_priv, +						       validate_list[i].handle); +		if (object_list[i] == NULL) { +			DRM_ERROR("Invalid object handle %d at index %d\n", +				   validate_list[i].handle, i); +			ret = -EINVAL; +			goto err; +		} + +		ret = i915_gem_reloc_and_validate_object(object_list[i], +							 file_priv, +							 &validate_list[i]); +		if (ret) { +			DRM_ERROR("reloc and validate failed %d\n", ret); +			goto err; +		} +	} + +	/* Set the pending read domains for the batch buffer to COMMAND */ +	batch_obj = object_list[args->buffer_count-1]; +	batch_obj->pending_read_domains = DRM_GEM_DOMAIN_I915_COMMAND; +	batch_obj->pending_write_domain = 0; + +	for (i = 0; i < args->buffer_count; i++) { +		struct drm_gem_object *obj = object_list[i]; +		struct drm_i915_gem_object *obj_priv = obj->driver_private; + +		if (obj_priv->gtt_space == NULL) { +			/* We evicted the buffer in the process of validating +			 * our set of buffers in.  We could try to recover by +			 * kicking them everything out and trying again from +			 * the start. +			 */ +			ret = -ENOMEM; +			goto err; +		} + +		/* make sure all previous memory operations have passed */ +		i915_gem_object_set_domain(obj, +					    obj->pending_read_domains, +					    obj->pending_write_domain); +		obj->pending_read_domains = 0; +		obj->pending_write_domain = 0; +	} + +	/* Flush/invalidate caches and chipset buffer */ +	flush_domains = i915_gem_dev_set_domain(dev); + +#if WATCH_COHERENCY +	for (i = 0; i < args->buffer_count; i++) { +		i915_gem_object_check_coherency(object_list[i], +						validate_list[i].handle); +	} +#endif + +	exec_offset = validate_list[args->buffer_count - 1].offset; + +#if WATCH_EXEC +	i915_gem_dump_object(object_list[args->buffer_count - 1], +			      args->batch_len, +			      __func__, +			      ~0); +#endif + +	/* Exec the batchbuffer */ +	ret = i915_dispatch_gem_execbuffer(dev, args, exec_offset); +	if (ret) { +		DRM_ERROR("dispatch failed %d\n", ret); +		goto err; +	} + +	/* +	 * Ensure that the commands in the batch buffer are +	 * finished before the interrupt fires +	 */ +	flush_domains |= i915_retire_commands(dev); + +	/* +	 * Get a seqno representing the execution of the current buffer, +	 * which we can wait on.  We would like to mitigate these interrupts, +	 * likely by only creating seqnos occasionally (so that we have +	 * *some* interrupts representing completion of buffers that we can +	 * wait on when trying to clear up gtt space). +	 */ +	seqno = i915_add_request(dev, flush_domains); +	BUG_ON(seqno == 0); +	for (i = 0; i < args->buffer_count; i++) { +		struct drm_gem_object *obj = object_list[i]; +		struct drm_i915_gem_object *obj_priv = obj->driver_private; + +		i915_gem_object_move_to_active(obj); +		obj_priv->last_rendering_seqno = seqno; +#if WATCH_LRU +		DRM_INFO("%s: move to exec list %p\n", __func__, obj); +#endif +	} +#if WATCH_LRU +	i915_dump_lru(dev, __func__); +#endif + +	/* Copy the new buffer offsets back to the user's validate list. */ +	ret = copy_to_user((struct drm_i915_relocation_entry __user *) +			   (uintptr_t) args->buffers_ptr, +			   validate_list, +			   sizeof(*validate_list) * args->buffer_count); +	if (ret) +		DRM_ERROR("failed to copy %d validate entries " +			  "back to user (%d)\n", +			   args->buffer_count, ret); +err: +	if (object_list != NULL) { +		for (i = 0; i < args->buffer_count; i++) +			drm_gem_object_unreference(object_list[i]); +	} +	mutex_unlock(&dev->struct_mutex); + +	drm_free(object_list, sizeof(*object_list) * args->buffer_count, +		 DRM_MEM_DRIVER); +	drm_free(validate_list, sizeof(*validate_list) * args->buffer_count, +		 DRM_MEM_DRIVER); + +	return ret; +} + +int +i915_gem_pin_ioctl(struct drm_device *dev, void *data, +		   struct drm_file *file_priv) +{ +	struct drm_i915_gem_pin *args = data; +	struct drm_gem_object *obj; +	struct drm_i915_gem_object *obj_priv; +	int ret; + +	mutex_lock(&dev->struct_mutex); + +	i915_kernel_lost_context(dev); +	obj = drm_gem_object_lookup(dev, file_priv, args->handle); +	if (obj == NULL) { +		DRM_ERROR("Bad handle in i915_gem_pin_ioctl(): %d\n", +			  args->handle); +		mutex_unlock(&dev->struct_mutex); +		return -EINVAL; +	} + +	obj_priv = obj->driver_private; +	if (obj_priv->gtt_space == NULL) { +		ret = i915_gem_object_bind_to_gtt(obj, +						  (unsigned) args->alignment); +		if (ret != 0) { +			DRM_ERROR("Failure to bind in " +				  "i915_gem_pin_ioctl(): %d\n", +				  ret); +			drm_gem_object_unreference(obj); +			mutex_unlock(&dev->struct_mutex); +			return ret; +		} +	} + +	obj_priv->pin_count++; +	args->offset = obj_priv->gtt_offset; +	drm_gem_object_unreference(obj); +	mutex_unlock(&dev->struct_mutex); + +	return 0; +} + +int +i915_gem_unpin_ioctl(struct drm_device *dev, void *data, +		     struct drm_file *file_priv) +{ +	struct drm_i915_gem_pin *args = data; +	struct drm_gem_object *obj; +	struct drm_i915_gem_object *obj_priv; + +	mutex_lock(&dev->struct_mutex); + +	i915_kernel_lost_context(dev); +	obj = drm_gem_object_lookup(dev, file_priv, args->handle); +	if (obj == NULL) { +		DRM_ERROR("Bad handle in i915_gem_unpin_ioctl(): %d\n", +			  args->handle); +		mutex_unlock(&dev->struct_mutex); +		return -EINVAL; +	} + +	obj_priv = obj->driver_private; +	obj_priv->pin_count--; +	drm_gem_object_unreference(obj); +	mutex_unlock(&dev->struct_mutex); +	return 0; +} + +int +i915_gem_busy_ioctl(struct drm_device *dev, void *data, +		    struct drm_file *file_priv) +{ +	struct drm_i915_gem_busy *args = data; +	struct drm_gem_object *obj; +	struct drm_i915_gem_object *obj_priv; + +	mutex_lock(&dev->struct_mutex); +	obj = drm_gem_object_lookup(dev, file_priv, args->handle); +	if (obj == NULL) { +		DRM_ERROR("Bad handle in i915_gem_busy_ioctl(): %d\n", +			  args->handle); +		mutex_unlock(&dev->struct_mutex); +		return -EINVAL; +	} + +	obj_priv = obj->driver_private; +	args->busy = obj_priv->active; +	 +	drm_gem_object_unreference(obj); +	mutex_unlock(&dev->struct_mutex); +	return 0; +} + +int i915_gem_init_object(struct drm_gem_object *obj) +{ +	struct drm_i915_gem_object *obj_priv; + +	obj_priv = drm_calloc(1, sizeof(*obj_priv), DRM_MEM_DRIVER); +	if (obj_priv == NULL) +		return -ENOMEM; + +	obj->driver_private = obj_priv; +	obj_priv->obj = obj; +	INIT_LIST_HEAD(&obj_priv->list); +	return 0; +} + +void i915_gem_free_object(struct drm_gem_object *obj) +{ +	i915_kernel_lost_context(obj->dev); +	i915_gem_object_unbind(obj); + +	drm_free(obj->driver_private, 1, DRM_MEM_DRIVER); +} + +int +i915_gem_set_domain(struct drm_gem_object *obj, +		    struct drm_file *file_priv, +		    uint32_t read_domains, +		    uint32_t write_domain) +{ +	struct drm_device *dev = obj->dev; + +	BUG_ON(!mutex_is_locked(&dev->struct_mutex)); + +	drm_client_lock_take(dev, file_priv); +	i915_kernel_lost_context(dev); +	i915_gem_object_set_domain(obj, read_domains, write_domain); +	i915_gem_dev_set_domain(obj->dev); +	drm_client_lock_release(dev); + +	return 0; +} + +int +i915_gem_flush_pwrite(struct drm_gem_object *obj, +		      uint64_t offset, uint64_t size) +{ +#if 0 +	struct drm_device *dev = obj->dev; +	struct drm_i915_gem_object *obj_priv = obj->driver_private; + +	/* +	 * For writes much less than the size of the object and +	 * which are already pinned in memory, do the flush right now +	 */ + +	if ((size < obj->size >> 1) && obj_priv->page_list != NULL) { +		unsigned long first_page = offset / PAGE_SIZE; +		unsigned long beyond_page = roundup(offset + size, PAGE_SIZE) / PAGE_SIZE; + +		drm_ttm_cache_flush(obj_priv->page_list + first_page, +				    beyond_page - first_page); +		drm_agp_chipset_flush(dev); +		obj->write_domain = 0; +	} +#endif +	return 0; +} + +void +i915_gem_lastclose(struct drm_device *dev) +{ +	drm_i915_private_t *dev_priv = dev->dev_private; + +	mutex_lock(&dev->struct_mutex); + +	/* Assume that the chip has been idled at this point. Just pull them +	 * off the execution list and unref them.  Since this is the last +	 * close, this is also the last ref and they'll go away. +	 */ + +	while (!list_empty(&dev_priv->mm.active_list)) { +		struct drm_i915_gem_object *obj_priv; + +		obj_priv = list_first_entry(&dev_priv->mm.active_list, +					    struct drm_i915_gem_object, +					    list); + +		list_del_init(&obj_priv->list); +		obj_priv->active = 0; +		obj_priv->obj->write_domain = 0; +		drm_gem_object_unreference(obj_priv->obj); +	} + +	mutex_unlock(&dev->struct_mutex); +} diff --git a/linux-core/nouveau_bo.c b/linux-core/nouveau_bo.c index ab3b23a4..86347e03 100644 --- a/linux-core/nouveau_bo.c +++ b/linux-core/nouveau_bo.c @@ -229,7 +229,7 @@ out_cleanup:          if (tmp_mem.mm_node) {                  mutex_lock(&dev->struct_mutex);                  if (tmp_mem.mm_node != bo->pinned_node) -                        drm_mm_put_block(tmp_mem.mm_node); +                        drm_memrange_put_block(tmp_mem.mm_node);                  tmp_mem.mm_node = NULL;                  mutex_unlock(&dev->struct_mutex);          } diff --git a/linux-core/nouveau_sgdma.c b/linux-core/nouveau_sgdma.c index cc4d5a92..81704ea1 100644 --- a/linux-core/nouveau_sgdma.c +++ b/linux-core/nouveau_sgdma.c @@ -280,7 +280,7 @@ nouveau_sgdma_nottm_hack_init(struct drm_device *dev)  	struct drm_nouveau_private *dev_priv = dev->dev_private;  	struct drm_ttm_backend *be;  	struct drm_scatter_gather sgreq; -	struct drm_mm_node mm_node; +	struct drm_memrange_node mm_node;  	struct drm_bo_mem_reg mem;  	int ret; diff --git a/shared-core/drm.h b/shared-core/drm.h index 52b01cd1..2373a22e 100644 --- a/shared-core/drm.h +++ b/shared-core/drm.h @@ -959,6 +959,101 @@ struct drm_mm_info_arg {  	uint64_t p_size;  }; + +struct drm_gem_create { +	/** +	 * Requested size for the object. +	 * +	 * The (page-aligned) allocated size for the object will be returned. +	 */ +	uint64_t size; +	/** +	 * Returned handle for the object. +	 * +	 * Object handles are nonzero. +	 */ +	uint32_t handle; +	uint32_t pad; +}; + +struct drm_gem_close { +	/** Handle of the object to be closed. */ +	uint32_t handle; +	uint32_t pad; +}; + +struct drm_gem_pread { +	/** Handle for the object being read. */ +	uint32_t handle; +	uint32_t pad; +	/** Offset into the object to read from */ +	uint64_t offset; +	/** Length of data to read */ +	uint64_t size; +	/** Pointer to write the data into. */ +	uint64_t data_ptr;	/* void *, but pointers are not 32/64 compatible */ +}; + +struct drm_gem_pwrite { +	/** Handle for the object being written to. */ +	uint32_t handle; +	uint32_t pad; +	/** Offset into the object to write to */ +	uint64_t offset; +	/** Length of data to write */ +	uint64_t size; +	/** Pointer to read the data from. */ +	uint64_t data_ptr;	/* void *, but pointers are not 32/64 compatible */ +}; + +struct drm_gem_mmap { +	/** Handle for the object being mapped. */ +	uint32_t handle; +	uint32_t pad; +	/** Offset in the object to map. */ +	uint64_t offset; +	/** +	 * Length of data to map. +	 * +	 * The value will be page-aligned. +	 */ +	uint64_t size; +	/** Returned pointer the data was mapped at */ +	uint64_t addr_ptr;	/* void *, but pointers are not 32/64 compatible */ +}; + +struct drm_gem_flink { +	/** Handle for the object being named */ +	uint32_t handle; + +	/** Returned global name */ +	uint32_t name; +}; + +struct drm_gem_open { +	/** Name of object being opened */ +	uint32_t name; + +	/** Returned handle for the object */ +	uint32_t handle; +	 +	/** Returned size of the object */ +	uint64_t size; +}; + +struct drm_gem_set_domain { +	/** Handle for the object */ +	uint32_t handle; + +	/** New read domains */ +	uint32_t read_domains; + +	/** New write domain */ +	uint32_t write_domain; +}; + +#define DRM_GEM_DOMAIN_CPU		0x00000001 +  /**   * \name Ioctls Definitions   */ @@ -978,7 +1073,7 @@ struct drm_mm_info_arg {  #define DRM_IOCTL_GET_CLIENT            DRM_IOWR(0x05, struct drm_client)  #define DRM_IOCTL_GET_STATS             DRM_IOR( 0x06, struct drm_stats)  #define DRM_IOCTL_SET_VERSION		DRM_IOWR(0x07, struct drm_set_version) -#define DRM_IOCTL_MODESET_CTL           DRM_IOW(0x08, struct drm_modeset_ctl) +#define DRM_IOCTL_MODESET_CTL           DRM_IOW(0x08,  struct drm_modeset_ctl)  #define DRM_IOCTL_SET_UNIQUE		DRM_IOW( 0x10, struct drm_unique)  #define DRM_IOCTL_AUTH_MAGIC		DRM_IOW( 0x11, struct drm_auth) @@ -1027,6 +1122,15 @@ struct drm_mm_info_arg {  #define DRM_IOCTL_UPDATE_DRAW           DRM_IOW(0x3f, struct drm_update_draw) +#define DRM_IOCTL_GEM_CREATE		DRM_IOWR(0x09, struct drm_gem_create) +#define DRM_IOCTL_GEM_CLOSE		DRM_IOW (0x0a, struct drm_gem_close) +#define DRM_IOCTL_GEM_PREAD		DRM_IOW (0x0b, struct drm_gem_pread) +#define DRM_IOCTL_GEM_PWRITE		DRM_IOW (0x0c, struct drm_gem_pwrite) +#define DRM_IOCTL_GEM_MMAP		DRM_IOWR(0x0d, struct drm_gem_mmap) +#define DRM_IOCTL_GEM_FLINK		DRM_IOWR(0x0e, struct drm_gem_flink) +#define DRM_IOCTL_GEM_OPEN		DRM_IOWR(0x0f, struct drm_gem_open) +#define DRM_IOCTL_GEM_SET_DOMAIN	DRM_IOW (0xb7, struct drm_gem_set_domain) +  #define DRM_IOCTL_MM_INIT               DRM_IOWR(0xc0, struct drm_mm_init_arg)  #define DRM_IOCTL_MM_TAKEDOWN           DRM_IOWR(0xc1, struct drm_mm_type_arg)  #define DRM_IOCTL_MM_LOCK               DRM_IOWR(0xc2, struct drm_mm_type_arg) diff --git a/shared-core/i915_dma.c b/shared-core/i915_dma.c index 60b405d4..a948834a 100644 --- a/shared-core/i915_dma.c +++ b/shared-core/i915_dma.c @@ -41,10 +41,14 @@ int i915_wait_ring(struct drm_device * dev, int n, const char *caller)  	drm_i915_private_t *dev_priv = dev->dev_private;  	drm_i915_ring_buffer_t *ring = &(dev_priv->ring);  	u32 last_head = I915_READ(LP_RING + RING_HEAD) & HEAD_ADDR; +	u32 acthd_reg = IS_I965G(dev) ? I965REG_ACTHD : I915REG_ACTHD; +	u32 last_acthd = I915_READ(acthd_reg); +	u32 acthd;  	int i; -	for (i = 0; i < 10000; i++) { +	for (i = 0; i < 100000; i++) {  		ring->head = I915_READ(LP_RING + RING_HEAD) & HEAD_ADDR; +		acthd = I915_READ(acthd_reg);  		ring->space = ring->head - (ring->tail + 8);  		if (ring->space < 0)  			ring->space += ring->Size; @@ -54,13 +58,41 @@ int i915_wait_ring(struct drm_device * dev, int n, const char *caller)  		if (ring->head != last_head)  			i = 0; +		if (acthd != last_acthd) +			i = 0; +  		last_head = ring->head; -		DRM_UDELAY(1); +		last_acthd = acthd; +		msleep_interruptible (10);  	}  	return -EBUSY;  } +#if I915_RING_VALIDATE +/** + * Validate the cached ring tail value + * + * If the X server writes to the ring and DRM doesn't + * reload the head and tail pointers, it will end up writing + * data to the wrong place in the ring, causing havoc. + */ +void i915_ring_validate(struct drm_device *dev, const char *func, int line) +{ +	drm_i915_private_t *dev_priv = dev->dev_private; +	drm_i915_ring_buffer_t *ring = &(dev_priv->ring); +	u32	tail = I915_READ(LP_RING+RING_TAIL) & HEAD_ADDR; +	u32	head = I915_READ(LP_RING+RING_HEAD) & HEAD_ADDR; + +	if (tail != ring->tail) { +		DRM_ERROR("%s:%d head sw %x, hw %x. tail sw %x hw %x\n", +			  func, line, +			  ring->head, head, ring->tail, tail); +		BUG_ON(1); +	} +} +#endif +  void i915_kernel_lost_context(struct drm_device * dev)  {  	drm_i915_private_t *dev_priv = dev->dev_private; @@ -459,9 +491,9 @@ static int i915_emit_cmds(struct drm_device *dev, int __user *buffer,  	return 0;  } -static int i915_emit_box(struct drm_device * dev, -			 struct drm_clip_rect __user * boxes, -			 int i, int DR1, int DR4) +int i915_emit_box(struct drm_device * dev, +		  struct drm_clip_rect __user * boxes, +		  int i, int DR1, int DR4)  {  	drm_i915_private_t *dev_priv = dev->dev_private;  	struct drm_clip_rect box; @@ -517,7 +549,7 @@ void i915_emit_breadcrumb(struct drm_device *dev)  	BEGIN_LP_RING(4);  	OUT_RING(CMD_STORE_DWORD_IDX); -	OUT_RING(20); +	OUT_RING(5 << STORE_DWORD_INDEX_SHIFT);  	OUT_RING(dev_priv->counter);  	OUT_RING(0);  	ADVANCE_LP_RING(); @@ -715,9 +747,19 @@ void i915_dispatch_flip(struct drm_device * dev, int planes, int sync)  int i915_quiescent(struct drm_device *dev)  {  	drm_i915_private_t *dev_priv = dev->dev_private; +	int ret;  	i915_kernel_lost_context(dev); -	return i915_wait_ring(dev, dev_priv->ring.Size - 8, __FUNCTION__); +	ret = i915_wait_ring(dev, dev_priv->ring.Size - 8, __FUNCTION__); +	if (ret) +	{ +		i915_kernel_lost_context (dev); +		DRM_ERROR ("not quiescent head %08x tail %08x space %08x\n", +			   dev_priv->ring.head, +			   dev_priv->ring.tail, +			   dev_priv->ring.space); +	} +	return ret;  }  static int i915_flush_ioctl(struct drm_device *dev, void *data, @@ -1026,6 +1068,7 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)  	memset(dev_priv, 0, sizeof(drm_i915_private_t));  	dev->dev_private = (void *)dev_priv; +	dev_priv->dev = dev;  	/* Add register map (needed for suspend/resume) */  	base = drm_get_resource_start(dev, mmio_bar); @@ -1034,6 +1077,14 @@ int i915_driver_load(struct drm_device *dev, unsigned long flags)  	ret = drm_addmap(dev, base, size, _DRM_REGISTERS,  		_DRM_KERNEL | _DRM_DRIVER, &dev_priv->mmio_map); +	INIT_LIST_HEAD(&dev_priv->mm.active_list); +	INIT_LIST_HEAD(&dev_priv->mm.flushing_list); +	INIT_LIST_HEAD(&dev_priv->mm.inactive_list); +	INIT_LIST_HEAD(&dev_priv->mm.request_list); +	INIT_WORK(&dev_priv->user_interrupt_task, +		  i915_user_interrupt_handler); +	dev_priv->mm.next_gem_seqno = 1; +  #ifdef __linux__  #if LINUX_VERSION_CODE < KERNEL_VERSION(2,6,25)  	intel_init_chipset_flush_compat(dev); @@ -1074,6 +1125,7 @@ void i915_driver_lastclose(struct drm_device * dev)  		dev_priv->val_bufs = NULL;  	}  #endif +	i915_gem_lastclose(dev);  	if (drm_getsarea(dev) && dev_priv->sarea_priv)  		i915_do_cleanup_pageflip(dev); @@ -1125,6 +1177,11 @@ struct drm_ioctl_desc i915_ioctls[] = {  #ifdef I915_HAVE_BUFFER  	DRM_IOCTL_DEF(DRM_I915_EXECBUFFER, i915_execbuffer, DRM_AUTH),  #endif +	DRM_IOCTL_DEF(DRM_I915_GEM_INIT, i915_gem_init_ioctl, DRM_AUTH), +	DRM_IOCTL_DEF(DRM_I915_GEM_EXECBUFFER, i915_gem_execbuffer, DRM_AUTH), +	DRM_IOCTL_DEF(DRM_I915_GEM_PIN, i915_gem_pin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), +	DRM_IOCTL_DEF(DRM_I915_GEM_UNPIN, i915_gem_unpin_ioctl, DRM_AUTH|DRM_ROOT_ONLY), +	DRM_IOCTL_DEF(DRM_I915_GEM_BUSY, i915_gem_busy_ioctl, DRM_AUTH),  };  int i915_max_ioctl = DRM_ARRAY_SIZE(i915_ioctls); diff --git a/shared-core/i915_drm.h b/shared-core/i915_drm.h index 97e77428..4712ea4f 100644 --- a/shared-core/i915_drm.h +++ b/shared-core/i915_drm.h @@ -176,6 +176,11 @@ typedef struct drm_i915_sarea {  #define DRM_I915_MMIO		0x10  #define DRM_I915_HWS_ADDR	0x11  #define DRM_I915_EXECBUFFER	0x12 +#define DRM_I915_GEM_INIT	0x13 +#define DRM_I915_GEM_EXECBUFFER	0x14 +#define DRM_I915_GEM_PIN	0x15 +#define DRM_I915_GEM_UNPIN	0x16 +#define DRM_I915_GEM_BUSY	0x17  #define DRM_IOCTL_I915_INIT		DRM_IOW( DRM_COMMAND_BASE + DRM_I915_INIT, drm_i915_init_t)  #define DRM_IOCTL_I915_FLUSH		DRM_IO ( DRM_COMMAND_BASE + DRM_I915_FLUSH) @@ -195,6 +200,11 @@ typedef struct drm_i915_sarea {  #define DRM_IOCTL_I915_VBLANK_SWAP	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_VBLANK_SWAP, drm_i915_vblank_swap_t)  #define DRM_IOCTL_I915_MMIO             DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_MMIO, drm_i915_mmio)  #define DRM_IOCTL_I915_EXECBUFFER	DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_EXECBUFFER, struct drm_i915_execbuffer) +#define DRM_IOCTL_I915_GEM_INIT		DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_INIT, struct drm_i915_gem_init) +#define DRM_IOCTL_I915_GEM_EXECBUFFER	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_EXECBUFFER, struct drm_i915_gem_execbuffer) +#define DRM_IOCTL_I915_GEM_PIN		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_PIN, struct drm_i915_gem_pin) +#define DRM_IOCTL_I915_GEM_UNPIN	DRM_IOW(DRM_COMMAND_BASE + DRM_I915_GEM_UNPIN, struct drm_i915_gem_unpin) +#define DRM_IOCTL_I915_GEM_BUSY		DRM_IOWR(DRM_COMMAND_BASE + DRM_I915_GEM_BUSY, struct drm_i915_gem_busy)  /* Asynchronous page flipping:   */ @@ -395,4 +405,146 @@ struct drm_i915_execbuffer {  	struct drm_fence_arg fence_arg;  }; +struct drm_i915_gem_init { +	/** +	 * Beginning offset in the GTT to be managed by the DRM memory +	 * manager. +	 */ +	uint64_t gtt_start; +	/** +	 * Ending offset in the GTT to be managed by the DRM memory +	 * manager. +	 */ +	uint64_t gtt_end; +}; + +struct drm_i915_gem_relocation_entry { +	/** +	 * Handle of the buffer being pointed to by this relocation entry. +	 * +	 * It's appealing to make this be an index into the mm_validate_entry +	 * list to refer to the buffer, but this allows the driver to create +	 * a relocation list for state buffers and not re-write it per +	 * exec using the buffer. +	 */ +	uint32_t target_handle; + +	/** +	 * Value to be added to the offset of the target buffer to make up +	 * the relocation entry. +	 */ +	uint32_t delta; + +	/** Offset in the buffer the relocation entry will be written into */ +	uint64_t offset; + +	/** +	 * Offset value of the target buffer that the relocation entry was last +	 * written as. +	 * +	 * If the buffer has the same offset as last time, we can skip syncing +	 * and writing the relocation.  This value is written back out by +	 * the execbuffer ioctl when the relocation is written. +	 */ +	uint64_t presumed_offset; + +	/** +	 * Target memory domains read by this operation. +	 */ +	uint32_t read_domains; + +	/** +	 * Target memory domains written by this operation. +	 * +	 * Note that only one domain may be written by the whole +	 * execbuffer operation, so that where there are conflicts, +	 * the application will get -EINVAL back. +	 */ +	uint32_t write_domain; +}; + +/** + * Intel memory domains + * + * Most of these just align with the various caches in + * the system and are used to flush and invalidate as + * objects end up cached in different domains. + */ + +/* 0x00000001 is DRM_GEM_DOMAIN_CPU */ +#define DRM_GEM_DOMAIN_I915_RENDER	0x00000002	/* Render cache, used by 2D and 3D drawing */ +#define DRM_GEM_DOMAIN_I915_SAMPLER	0x00000004	/* Sampler cache, used by texture engine */ +#define DRM_GEM_DOMAIN_I915_COMMAND	0x00000008	/* Command queue, used to load batch buffers */ +#define DRM_GEM_DOMAIN_I915_INSTRUCTION	0x00000010	/* Instruction cache, used by shader programs */ +#define DRM_GEM_DOMAIN_I915_VERTEX	0x00000020	/* Vertex address cache */ + +struct drm_i915_gem_exec_object { +	/** +	 * User's handle for a buffer to be bound into the GTT for this +	 * operation. +	 */ +	uint32_t handle; +	 +	/** List of relocations to be performed on this buffer */ +	uint32_t relocation_count; +	uint64_t relocs_ptr;	/* struct drm_i915_gem_relocation_entry *relocs */ +	 +	/** Required alignment in graphics aperture */ +	uint64_t alignment; + +	/** +	 * Returned value of the updated offset of the object, for future +	 * presumed_offset writes. +	 */ +	uint64_t offset; +}; + +struct drm_i915_gem_execbuffer { +	/** +	 * List of buffers to be validated with their relocations to be +	 * performend on them. +	 * +	 * These buffers must be listed in an order such that all relocations +	 * a buffer is performing refer to buffers that have already appeared +	 * in the validate list. +	 */ +	uint64_t buffers_ptr;	/* struct drm_i915_gem_validate_entry *buffers */ +	uint32_t buffer_count; + +	/** Offset in the batchbuffer to start execution from. */ +	uint32_t batch_start_offset; +	/** Bytes used in batchbuffer from batch_start_offset */ +	uint32_t batch_len; +	uint32_t DR1; +	uint32_t DR4; +	uint32_t num_cliprects; +	uint64_t cliprects_ptr;	/* struct drm_clip_rect *cliprects */ +}; + +struct drm_i915_gem_pin { +	/** Handle of the buffer to be pinned. */ +	uint32_t handle; +	uint32_t pad; +	 +	/** alignment required within the aperture */ +	uint64_t alignment; + +	/** Returned GTT offset of the buffer. */ +	uint64_t offset; +}; + +struct drm_i915_gem_unpin { +	/** Handle of the buffer to be unpinned. */ +	uint32_t handle; +	uint32_t pad; +}; + +struct drm_i915_gem_busy { +	/** Handle of the buffer to check for busy */ +	uint32_t handle; +	 +	/** Return busy status (1 if busy, 0 if idle) */ +	uint32_t busy; +}; +  #endif				/* _I915_DRM_H_ */ diff --git a/shared-core/i915_drv.h b/shared-core/i915_drv.h index 6421f689..d646177b 100644 --- a/shared-core/i915_drv.h +++ b/shared-core/i915_drv.h @@ -101,6 +101,8 @@ typedef struct _drm_i915_vbl_swap {  } drm_i915_vbl_swap_t;  typedef struct drm_i915_private { +	struct drm_device *dev; +  	drm_local_map_t *sarea;  	drm_local_map_t *mmio_map; @@ -129,7 +131,7 @@ typedef struct drm_i915_private {  	DRM_SPINTYPE user_irq_lock;  	int user_irq_refcount;  	int fence_irq_on; -	uint32_t irq_enable_reg; +	uint32_t irq_mask_reg;  	int irq_enabled;  #ifdef I915_HAVE_FENCE @@ -241,6 +243,47 @@ typedef struct drm_i915_private {  	u8 saveDACMASK;  	u8 saveDACDATA[256*3]; /* 256 3-byte colors */  	u8 saveCR[37]; + +	struct { +		struct drm_memrange gtt_space; + +		/** +		 * List of objects currently involved in rendering from the +		 * ringbuffer. +		 * +		 * A reference is held on the buffer while on this list. +		 */ +		struct list_head active_list; + +		/** +		 * List of objects which are not in the ringbuffer but which +		 * still have a write_domain which needs to be flushed before +		 * unbinding. +		 * +		 * A reference is held on the buffer while on this list. +		 */ +		struct list_head flushing_list; + +		/** +		 * LRU list of objects which are not in the ringbuffer and +		 * are ready to unbind, but are still in the GTT. +		 * +		 * A reference is not held on the buffer while on this list, +		 * as merely being GTT-bound shouldn't prevent its being +		 * freed, and we'll pull it off the list in the free path. +		 */ +		struct list_head inactive_list; + +		/** +		 * List of breadcrumbs associated with GPU requests currently +		 * outstanding. +		 */ +		struct list_head request_list; + +		uint32_t next_gem_seqno; +	} mm; + +	struct work_struct user_interrupt_task;  } drm_i915_private_t;  enum intel_chip_family { @@ -250,6 +293,68 @@ enum intel_chip_family {  	CHIP_I965 = 0x08,  }; +/** driver private structure attached to each drm_gem_object */ +struct drm_i915_gem_object { +	struct drm_gem_object *obj; + +	/** Current space allocated to this object in the GTT, if any. */ +	struct drm_memrange_node *gtt_space; + +	/** This object's place on the active/flushing/inactive lists */ +	struct list_head list; + +	/** +	 * This is set if the object is on the active or flushing lists +	 * (has pending rendering), and is not set if it's on inactive (ready +	 * to be unbound). +	 */ +	int active; + +	/** AGP memory structure for our GTT binding. */ +	DRM_AGP_MEM *agp_mem; + +	struct page **page_list; + +	/** +	 * Current offset of the object in GTT space. +	 * +	 * This is the same as gtt_space->start +	 */ +	uint32_t gtt_offset; + +	/** Boolean whether this object has a valid gtt offset. */ +	int gtt_bound; + +	/** How many users have pinned this object in GTT space */ +	int pin_count; + +	/** Breadcrumb of last rendering to the buffer. */ +	uint32_t last_rendering_seqno; +}; + +/** + * Request queue structure. + * + * The request queue allows us to note sequence numbers that have been emitted + * and may be associated with active buffers to be retired. + * + * By keeping this list, we can avoid having to do questionable + * sequence-number comparisons on buffer last_rendering_seqnos, and associate + * an emission time with seqnos for tracking how far ahead of the GPU we are. + */ +struct drm_i915_gem_request { +	/** GEM sequence number associated with this request. */ +	uint32_t seqno; + +	/** Time at which this request was emitted, in jiffies. */ +	unsigned long emitted_jiffies; + +	/** Cache domains that were flushed at the start of the request. */ +	uint32_t flush_domains; + +	struct list_head list; +}; +  extern struct drm_ioctl_desc i915_ioctls[];  extern int i915_max_ioctl; @@ -271,6 +376,10 @@ extern int i915_dispatch_batchbuffer(struct drm_device * dev,  				     drm_i915_batchbuffer_t * batch);  extern int i915_quiescent(struct drm_device *dev); +int i915_emit_box(struct drm_device * dev, +		  struct drm_clip_rect __user * boxes, +		  int i, int DR1, int DR4); +  /* i915_irq.c */  extern int i915_irq_emit(struct drm_device *dev, void *data,  			 struct drm_file *file_priv); @@ -286,6 +395,7 @@ extern int i915_vblank_pipe_set(struct drm_device *dev, void *data,  extern int i915_vblank_pipe_get(struct drm_device *dev, void *data,  				struct drm_file *file_priv);  extern int i915_emit_irq(struct drm_device * dev); +extern int i915_wait_irq(struct drm_device * dev, int irq_nr);  extern int i915_enable_vblank(struct drm_device *dev, int crtc);  extern void i915_disable_vblank(struct drm_device *dev, int crtc);  extern u32 i915_get_vblank_counter(struct drm_device *dev, int crtc); @@ -293,6 +403,7 @@ extern int i915_vblank_swap(struct drm_device *dev, void *data,  			    struct drm_file *file_priv);  extern void i915_user_irq_on(drm_i915_private_t *dev_priv);  extern void i915_user_irq_off(drm_i915_private_t *dev_priv); +extern void i915_user_interrupt_handler(struct work_struct *work);  /* i915_mem.c */  extern int i915_mem_alloc(struct drm_device *dev, void *data, @@ -329,7 +440,27 @@ void i915_flush_ttm(struct drm_ttm *ttm);  /* i915_execbuf.c */  int i915_execbuffer(struct drm_device *dev, void *data,  				   struct drm_file *file_priv); - +/* i915_gem.c */ +int i915_gem_init_ioctl(struct drm_device *dev, void *data, +			struct drm_file *file_priv); +int i915_gem_execbuffer(struct drm_device *dev, void *data, +			struct drm_file *file_priv); +int i915_gem_pin_ioctl(struct drm_device *dev, void *data, +		       struct drm_file *file_priv); +int i915_gem_unpin_ioctl(struct drm_device *dev, void *data, +			 struct drm_file *file_priv); +int i915_gem_busy_ioctl(struct drm_device *dev, void *data, +			struct drm_file *file_priv); +int i915_gem_init_object(struct drm_gem_object *obj); +void i915_gem_free_object(struct drm_gem_object *obj); +int i915_gem_set_domain(struct drm_gem_object *obj, +			struct drm_file *file_priv, +			uint32_t read_domains, +			uint32_t write_domain); +int i915_gem_flush_pwrite(struct drm_gem_object *obj, +			  uint64_t offset, uint64_t size); +void i915_gem_lastclose(struct drm_device *dev); +void i915_gem_retire_requests(struct drm_device *dev);  #endif  #ifdef __linux__ @@ -345,14 +476,23 @@ extern void intel_fini_chipset_flush_compat(struct drm_device *dev);  #define I915_WRITE16(reg,val)	DRM_WRITE16(dev_priv->mmio_map, (reg), (val))  #define I915_VERBOSE 0 +#define I915_RING_VALIDATE 0  #define RING_LOCALS	unsigned int outring, ringmask, outcount; \  			volatile char *virt; +#if I915_RING_VALIDATE +void i915_ring_validate(struct drm_device *dev, const char *func, int line); +#define I915_RING_DO_VALIDATE(dev) i915_ring_validate(dev, __FUNCTION__, __LINE__) +#else +#define I915_RING_DO_VALIDATE(dev) +#endif +  #define BEGIN_LP_RING(n) do {				\  	if (I915_VERBOSE)				\  		DRM_DEBUG("BEGIN_LP_RING(%d)\n",	\  	                         (n));		        \ +	I915_RING_DO_VALIDATE(dev);			\  	if (dev_priv->ring.space < (n)*4)                      \  		i915_wait_ring(dev, (n)*4, __FUNCTION__);      \  	outcount = 0;					\ @@ -371,6 +511,7 @@ extern void intel_fini_chipset_flush_compat(struct drm_device *dev);  #define ADVANCE_LP_RING() do {						\  	if (I915_VERBOSE) DRM_DEBUG("ADVANCE_LP_RING %x\n", outring);	\ +	I915_RING_DO_VALIDATE(dev);					\  	dev_priv->ring.tail = outring;					\  	dev_priv->ring.space -= outcount * 4;				\  	I915_WRITE(LP_RING + RING_TAIL, outring);			\ @@ -426,7 +567,12 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller);  #define GFX_OP_BREAKPOINT_INTERRUPT	((0<<29)|(1<<23))  #define CMD_REPORT_HEAD			(7<<23)  #define CMD_STORE_DWORD_IMM             ((0x20<<23) | (0x1 << 22) | 0x1) +/** + * Stores a 32-bit integer to the status page at the dword index given. + */  #define CMD_STORE_DWORD_IDX		((0x21<<23) | 0x1) +# define STORE_DWORD_INDEX_SHIFT		2 +  #define CMD_OP_BATCH_BUFFER  ((0x0<<29)|(0x30<<23)|0x1)  #define CMD_MI_FLUSH         (0x04 << 23) @@ -501,6 +647,8 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller);  #define I915REG_INT_MASK_R	0x020a8  #define I915REG_INT_ENABLE_R	0x020a0  #define I915REG_INSTPM	        0x020c0 +#define I965REG_ACTHD		0x02074 +#define I915REG_ACTHD		0x020C8  #define PIPEADSL		0x70000  #define PIPEBDSL		0x71000 @@ -773,7 +921,22 @@ extern int i915_wait_ring(struct drm_device * dev, int n, const char *caller);  #define BREADCRUMB_MASK ((1U << BREADCRUMB_BITS) - 1)  #define READ_BREADCRUMB(dev_priv)  (((volatile u32*)(dev_priv->hw_status_page))[5]) + +/** + * Reads a dword out of the status page, which is written to from the command + * queue by automatic updates, MI_REPORT_HEAD, MI_STORE_DATA_INDEX, or + * MI_STORE_DATA_IMM. + * + * The following dwords have a reserved meaning: + * 0: ISR copy, updated when an ISR bit not set in the HWSTAM changes. + * 4: ring 0 head pointer + * 5: ring 1 head pointer (915-class) + * 6: ring 2 head pointer (915-class) + * + * The area from dword 0x10 to 0x3ff is available for driver usage. + */  #define READ_HWSP(dev_priv, reg)  (((volatile u32*)(dev_priv->hw_status_page))[reg]) +#define I915_GEM_HWS_INDEX		0x10  #define BLC_PWM_CTL		0x61254  #define BACKLIGHT_MODULATION_FREQ_SHIFT		(17) diff --git a/shared-core/i915_irq.c b/shared-core/i915_irq.c index 2287cd0c..37d85f40 100644 --- a/shared-core/i915_irq.c +++ b/shared-core/i915_irq.c @@ -435,6 +435,28 @@ u32 i915_get_vblank_counter(struct drm_device *dev, int plane)  	return count;  } +/** + * Handler for user interrupts in process context (able to sleep, do VFS + * operations, etc. + * + * If another IRQ comes in while we're in this handler, it will still get put + * on the queue again to be rerun when we finish. + */ +void +i915_user_interrupt_handler(struct work_struct *work) +{ +	drm_i915_private_t *dev_priv; +	struct drm_device *dev; + +	dev_priv = container_of(work, drm_i915_private_t, +				user_interrupt_task); +	dev = dev_priv->dev; + +	mutex_lock(&dev->struct_mutex); +	i915_gem_retire_requests(dev); +	mutex_unlock(&dev->struct_mutex); +} +  irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)  {  	struct drm_device *dev = (struct drm_device *) arg; @@ -485,13 +507,14 @@ irqreturn_t i915_driver_irq_handler(DRM_IRQ_ARGS)  	if (dev_priv->sarea_priv)  	    dev_priv->sarea_priv->last_dispatch = READ_BREADCRUMB(dev_priv); -	I915_WRITE(I915REG_INT_IDENTITY_R, iir); +	I915_WRITE(I915REG_INT_IDENTITY_R, iir | I915_USER_INTERRUPT);  	(void) I915_READ(I915REG_INT_IDENTITY_R); /* Flush posted write */  	if (iir & I915_USER_INTERRUPT) {  		DRM_WAKEUP(&dev_priv->irq_queue);  #ifdef I915_HAVE_FENCE  		i915_fence_handler(dev); +		schedule_work(&dev_priv->user_interrupt_task);  #endif  	} @@ -526,8 +549,9 @@ void i915_user_irq_on(drm_i915_private_t *dev_priv)  {  	DRM_SPINLOCK(&dev_priv->user_irq_lock);  	if (dev_priv->irq_enabled && (++dev_priv->user_irq_refcount == 1)){ -		dev_priv->irq_enable_reg |= I915_USER_INTERRUPT; -		I915_WRITE(I915REG_INT_ENABLE_R, dev_priv->irq_enable_reg); +		dev_priv->irq_mask_reg &= ~I915_USER_INTERRUPT; +		I915_WRITE(I915REG_INT_MASK_R, dev_priv->irq_mask_reg); +		(void) I915_READ (I915REG_INT_ENABLE_R);  	}  	DRM_SPINUNLOCK(&dev_priv->user_irq_lock); @@ -536,15 +560,17 @@ void i915_user_irq_on(drm_i915_private_t *dev_priv)  void i915_user_irq_off(drm_i915_private_t *dev_priv)  {  	DRM_SPINLOCK(&dev_priv->user_irq_lock); +	BUG_ON(dev_priv->user_irq_refcount <= 0);  	if (dev_priv->irq_enabled && (--dev_priv->user_irq_refcount == 0)) { -		//		dev_priv->irq_enable_reg &= ~USER_INT_FLAG; -		//		I915_WRITE(I915REG_INT_ENABLE_R, dev_priv->irq_enable_reg); +		dev_priv->irq_mask_reg |= I915_USER_INTERRUPT; +		I915_WRITE(I915REG_INT_MASK_R, dev_priv->irq_mask_reg); +		(void) I915_READ(I915REG_INT_MASK_R);  	}  	DRM_SPINUNLOCK(&dev_priv->user_irq_lock);  } -static int i915_wait_irq(struct drm_device * dev, int irq_nr) +int i915_wait_irq(struct drm_device * dev, int irq_nr)  {  	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;  	int ret = 0; @@ -618,16 +644,17 @@ int i915_enable_vblank(struct drm_device *dev, int plane)  	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;  	int pipe = i915_get_pipe(dev, plane);  	u32	pipestat_reg = 0; +	u32	mask_reg = 0;  	u32	pipestat;  	switch (pipe) {  	case 0:  		pipestat_reg = I915REG_PIPEASTAT; -		dev_priv->irq_enable_reg |= I915_DISPLAY_PIPE_A_EVENT_INTERRUPT; +		mask_reg |= I915_DISPLAY_PIPE_A_EVENT_INTERRUPT;  		break;  	case 1:  		pipestat_reg = I915REG_PIPEBSTAT; -		dev_priv->irq_enable_reg |= I915_DISPLAY_PIPE_B_EVENT_INTERRUPT; +		mask_reg |= I915_DISPLAY_PIPE_B_EVENT_INTERRUPT;  		break;  	default:  		DRM_ERROR("tried to enable vblank on non-existent pipe %d\n", @@ -653,7 +680,11 @@ int i915_enable_vblank(struct drm_device *dev, int plane)  			     I915_VBLANK_INTERRUPT_STATUS);  		I915_WRITE(pipestat_reg, pipestat);  	} -	I915_WRITE(I915REG_INT_ENABLE_R, dev_priv->irq_enable_reg); +	DRM_SPINLOCK(&dev_priv->user_irq_lock); +	dev_priv->irq_mask_reg &= ~mask_reg; +	I915_WRITE(I915REG_INT_MASK_R, dev_priv->irq_mask_reg); +	I915_READ(I915REG_INT_MASK_R); +	DRM_SPINUNLOCK(&dev_priv->user_irq_lock);  	return 0;  } @@ -663,16 +694,17 @@ void i915_disable_vblank(struct drm_device *dev, int plane)  	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private;  	int pipe = i915_get_pipe(dev, plane);  	u32	pipestat_reg = 0; +	u32	mask_reg = 0;  	u32	pipestat;  	switch (pipe) {  	case 0:  		pipestat_reg = I915REG_PIPEASTAT; -		dev_priv->irq_enable_reg &= ~I915_DISPLAY_PIPE_A_EVENT_INTERRUPT; +		mask_reg |= I915_DISPLAY_PIPE_A_EVENT_INTERRUPT;  		break;  	case 1:  		pipestat_reg = I915REG_PIPEBSTAT; -		dev_priv->irq_enable_reg &= ~I915_DISPLAY_PIPE_B_EVENT_INTERRUPT; +		mask_reg |= I915_DISPLAY_PIPE_B_EVENT_INTERRUPT;  		break;  	default:  		DRM_ERROR("tried to disable vblank on non-existent pipe %d\n", @@ -680,7 +712,11 @@ void i915_disable_vblank(struct drm_device *dev, int plane)  		break;  	} -	I915_WRITE(I915REG_INT_ENABLE_R, dev_priv->irq_enable_reg); +	DRM_SPINLOCK(&dev_priv->user_irq_lock); +	dev_priv->irq_mask_reg |= mask_reg; +	I915_WRITE(I915REG_INT_MASK_R, dev_priv->irq_mask_reg); +	(void) I915_READ (I915REG_INT_MASK_R); +	DRM_SPINUNLOCK(&dev_priv->user_irq_lock);  	if (pipestat_reg)  	{  		pipestat = I915_READ (pipestat_reg); @@ -692,6 +728,7 @@ void i915_disable_vblank(struct drm_device *dev, int plane)  		pipestat |= (I915_START_VBLANK_INTERRUPT_STATUS |  			     I915_VBLANK_INTERRUPT_STATUS);  		I915_WRITE(pipestat_reg, pipestat); +		(void) I915_READ(pipestat_reg);  	}  } @@ -699,12 +736,27 @@ static void i915_enable_interrupt (struct drm_device *dev)  {  	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; -	dev_priv->irq_enable_reg |= I915_USER_INTERRUPT; - -	I915_WRITE(I915REG_INT_ENABLE_R, dev_priv->irq_enable_reg); +	dev_priv->irq_mask_reg = (I915_USER_INTERRUPT | +				  I915_DISPLAY_PIPE_A_EVENT_INTERRUPT | +				  I915_DISPLAY_PIPE_B_EVENT_INTERRUPT); +	I915_WRITE(I915REG_INT_MASK_R, dev_priv->irq_mask_reg); +	I915_WRITE(I915REG_INT_ENABLE_R, dev_priv->irq_mask_reg); +	(void) I915_READ (I915REG_INT_ENABLE_R);  	dev_priv->irq_enabled = 1;  } +static void i915_disable_interrupt (struct drm_device *dev) +{ +	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; +	 +	I915_WRITE(I915REG_HWSTAM, 0xffffffff); +	I915_WRITE(I915REG_INT_MASK_R, 0xffffffff); +	I915_WRITE(I915REG_INT_ENABLE_R, 0); +	I915_WRITE(I915REG_INT_IDENTITY_R, 0xffffffff); +	(void) I915_READ (I915REG_INT_IDENTITY_R); +	dev_priv->irq_enabled = 0; +} +  /* Set the vblank monitor pipe   */  int i915_vblank_pipe_set(struct drm_device *dev, void *data, @@ -910,9 +962,11 @@ void i915_driver_irq_preinstall(struct drm_device * dev)  {  	drm_i915_private_t *dev_priv = (drm_i915_private_t *) dev->dev_private; -	I915_WRITE16(I915REG_HWSTAM, 0xeffe); -	I915_WRITE16(I915REG_INT_MASK_R, 0x0); -	I915_WRITE16(I915REG_INT_ENABLE_R, 0x0); +	I915_WRITE(I915REG_HWSTAM, 0xffff); +	I915_WRITE(I915REG_INT_ENABLE_R, 0x0); +	I915_WRITE(I915REG_INT_MASK_R, 0xffffffff); +	I915_WRITE(I915REG_INT_IDENTITY_R, 0xffffffff); +	(void) I915_READ(I915REG_INT_IDENTITY_R);  }  int i915_driver_irq_postinstall(struct drm_device * dev) @@ -926,7 +980,7 @@ int i915_driver_irq_postinstall(struct drm_device * dev)  	DRM_SPININIT(&dev_priv->user_irq_lock, "userirq");  	dev_priv->user_irq_refcount = 0; -	dev_priv->irq_enable_reg = 0; +	dev_priv->irq_mask_reg = 0;  	ret = drm_vblank_init(dev, num_pipes);  	if (ret) @@ -953,15 +1007,10 @@ void i915_driver_irq_uninstall(struct drm_device * dev)  	if (!dev_priv)  		return; -	dev_priv->irq_enabled = 0; -	I915_WRITE(I915REG_HWSTAM, 0xffffffff); -	I915_WRITE(I915REG_INT_MASK_R, 0xffffffff); -	I915_WRITE(I915REG_INT_ENABLE_R, 0x0); +	i915_disable_interrupt (dev);  	temp = I915_READ(I915REG_PIPEASTAT);  	I915_WRITE(I915REG_PIPEASTAT, temp);  	temp = I915_READ(I915REG_PIPEBSTAT);  	I915_WRITE(I915REG_PIPEBSTAT, temp); -	temp = I915_READ(I915REG_INT_IDENTITY_R); -	I915_WRITE(I915REG_INT_IDENTITY_R, temp);  } diff --git a/tests/Makefile.am b/tests/Makefile.am index dce1754e..718cc436 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -22,7 +22,10 @@ TESTS = auth \  	getstats \  	lock \  	setversion \ -	updatedraw +	updatedraw \ +	gem_basic \ +	gem_readwrite \ +	gem_mmap  EXTRA_PROGRAMS = $(TESTS)  CLEANFILES = $(EXTRA_PROGRAMS) $(EXTRA_LTLIBRARIES) diff --git a/tests/drmtest.c b/tests/drmtest.c index cae99a0c..5453b105 100644 --- a/tests/drmtest.c +++ b/tests/drmtest.c @@ -26,6 +26,7 @@   */  #include <fcntl.h> +#include <sys/stat.h>  #include "drmtest.h"  /** Open the first DRM device we can find, searching up to 16 device nodes */ @@ -80,4 +81,3 @@ int drm_open_any_master(void)  	fprintf(stderr, "Couldn't find an un-controlled DRM device\n");  	abort();  } - diff --git a/tests/gem_basic.c b/tests/gem_basic.c new file mode 100644 index 00000000..8b8b63d0 --- /dev/null +++ b/tests/gem_basic.c @@ -0,0 +1,97 @@ +/* + * Copyright © 2008 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + *    Eric Anholt <eric@anholt.net> + * + */ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <fcntl.h> +#include <inttypes.h> +#include <errno.h> +#include <sys/stat.h> +#include "drm.h" + +static void +test_bad_close(int fd) +{ +	struct drm_gem_close close; +	int ret; + +	printf("Testing error return on bad close ioctl.\n"); + +	close.handle = 0x10101010; +	ret = ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close); + +	assert(ret == -1 && errno == EINVAL); +} + +static void +test_create_close(int fd) +{ +	struct drm_gem_create create; +	struct drm_gem_close close; +	int ret; + +	printf("Testing creating and closing an object.\n"); + +	memset(&create, 0, sizeof(create)); +	create.size = 16 * 1024; +	ret = ioctl(fd, DRM_IOCTL_GEM_CREATE, &create); +	assert(ret == 0); + +	close.handle = create.handle; +	ret = ioctl(fd, DRM_IOCTL_GEM_CLOSE, &close); +} + +static void +test_create_fd_close(int fd) +{ +	struct drm_gem_create create; +	int ret; + +	printf("Testing closing with an object allocated.\n"); + +	memset(&create, 0, sizeof(create)); +	create.size = 16 * 1024; +	ret = ioctl(fd, DRM_IOCTL_GEM_CREATE, &create); +	assert(ret == 0); + +	close(fd); +} + +int main(int argc, char **argv) +{ +	int fd; + +	fd = drm_open_any(); + +	test_bad_close(fd); +	test_create_close(fd); +	test_create_fd_close(fd); + +	return 0; +} diff --git a/tests/gem_mmap.c b/tests/gem_mmap.c new file mode 100644 index 00000000..3f8e27a0 --- /dev/null +++ b/tests/gem_mmap.c @@ -0,0 +1,131 @@ +/* + * Copyright © 2008 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + *    Eric Anholt <eric@anholt.net> + * + */ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <fcntl.h> +#include <inttypes.h> +#include <errno.h> +#include <sys/stat.h> +#include "drm.h" + +#define OBJECT_SIZE 16384 + +int do_read(int fd, int handle, void *buf, int offset, int size) +{ +	struct drm_gem_pread read; + +	/* Ensure that we don't have any convenient data in buf in case +	 * we fail. +	 */ +	memset(buf, 0xd0, size); + +	memset(&read, 0, sizeof(read)); +	read.handle = handle; +	read.data_ptr = (uintptr_t)buf; +	read.size = size; +	read.offset = offset; + +	return ioctl(fd, DRM_IOCTL_GEM_PREAD, &read); +} + +int do_write(int fd, int handle, void *buf, int offset, int size) +{ +	struct drm_gem_pwrite write; + +	memset(&write, 0, sizeof(write)); +	write.handle = handle; +	write.data_ptr = (uintptr_t)buf; +	write.size = size; +	write.offset = offset; + +	return ioctl(fd, DRM_IOCTL_GEM_PWRITE, &write); +} + +int main(int argc, char **argv) +{ +	int fd; +	struct drm_gem_create create; +	struct drm_gem_mmap mmap; +	struct drm_gem_close unref; +	uint8_t expected[OBJECT_SIZE]; +	uint8_t buf[OBJECT_SIZE]; +	uint8_t *addr; +	int ret; +	int handle; + +	fd = drm_open_any(); + +	memset(&mmap, 0, sizeof(mmap)); +	mmap.handle = 0x10101010; +	mmap.offset = 0; +	mmap.size = 4096; +	printf("Testing mmaping of bad object.\n"); +	ret = ioctl(fd, DRM_IOCTL_GEM_MMAP, &mmap); +	assert(ret == -1 && errno == EINVAL); + +	memset(&create, 0, sizeof(create)); +	create.size = OBJECT_SIZE; +	ret = ioctl(fd, DRM_IOCTL_GEM_CREATE, &create); +	assert(ret == 0); +	handle = create.handle; + +	printf("Testing mmaping of newly created object.\n"); +	mmap.handle = handle; +	mmap.offset = 0; +	mmap.size = OBJECT_SIZE; +	ret = ioctl(fd, DRM_IOCTL_GEM_MMAP, &mmap); +	assert(ret == 0); +	addr = (uint8_t *)(uintptr_t)mmap.addr_ptr; + +	printf("Testing contents of newly created object.\n"); +	memset(expected, 0, sizeof(expected)); +	assert(memcmp(addr, expected, sizeof(expected)) == 0); + +	printf("Testing coherency of writes and mmap reads.\n"); +	memset(buf, 0, sizeof(buf)); +	memset(buf + 1024, 0x01, 1024); +	memset(expected + 1024, 0x01, 1024); +	ret = do_write(fd, handle, buf, 0, OBJECT_SIZE); +	assert(ret == 0); +	assert(memcmp(buf, addr, sizeof(buf)) == 0); + +	printf("Testing that mapping stays after close\n"); +	unref.handle = handle; +	ret = ioctl(fd, DRM_IOCTL_GEM_CLOSE, &unref); +	assert(ret == 0); +	assert(memcmp(buf, addr, sizeof(buf)) == 0); + +	printf("Testing unmapping\n"); +	munmap(addr, OBJECT_SIZE); + +	close(fd); + +	return 0; +} diff --git a/tests/gem_readwrite.c b/tests/gem_readwrite.c new file mode 100644 index 00000000..a48f9847 --- /dev/null +++ b/tests/gem_readwrite.c @@ -0,0 +1,125 @@ +/* + * Copyright © 2008 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS + * IN THE SOFTWARE. + * + * Authors: + *    Eric Anholt <eric@anholt.net> + * + */ + +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <fcntl.h> +#include <inttypes.h> +#include <errno.h> +#include <sys/stat.h> +#include "drm.h" + +#define OBJECT_SIZE 16384 + +int do_read(int fd, int handle, void *buf, int offset, int size) +{ +	struct drm_gem_pread read; + +	/* Ensure that we don't have any convenient data in buf in case +	 * we fail. +	 */ +	memset(buf, 0xd0, size); + +	memset(&read, 0, sizeof(read)); +	read.handle = handle; +	read.data_ptr = (uintptr_t)buf; +	read.size = size; +	read.offset = offset; + +	return ioctl(fd, DRM_IOCTL_GEM_PREAD, &read); +} + +int do_write(int fd, int handle, void *buf, int offset, int size) +{ +	struct drm_gem_pwrite write; + +	memset(&write, 0, sizeof(write)); +	write.handle = handle; +	write.data_ptr = (uintptr_t)buf; +	write.size = size; +	write.offset = offset; + +	return ioctl(fd, DRM_IOCTL_GEM_PWRITE, &write); +} + +int main(int argc, char **argv) +{ +	int fd; +	struct drm_gem_create create; +	uint8_t expected[OBJECT_SIZE]; +	uint8_t buf[OBJECT_SIZE]; +	int ret; +	int handle; + +	fd = drm_open_any(); + +	memset(&create, 0, sizeof(create)); +	create.size = OBJECT_SIZE; +	ret = ioctl(fd, DRM_IOCTL_GEM_CREATE, &create); +	assert(ret == 0); +	handle = create.handle; + +	printf("Testing contents of newly created object.\n"); +	ret = do_read(fd, handle, buf, 0, OBJECT_SIZE); +	assert(ret == 0); +	memset(&expected, 0, sizeof(expected)); +	assert(memcmp(expected, buf, sizeof(expected)) == 0); + +	printf("Testing read beyond end of buffer.\n"); +	ret = do_read(fd, handle, buf, OBJECT_SIZE / 2, OBJECT_SIZE); +	assert(ret == -1 && errno == EINVAL); + +	printf("Testing full write of buffer\n"); +	memset(buf, 0, sizeof(buf)); +	memset(buf + 1024, 0x01, 1024); +	memset(expected + 1024, 0x01, 1024); +	ret = do_write(fd, handle, buf, 0, OBJECT_SIZE); +	assert(ret == 0); +	ret = do_read(fd, handle, buf, 0, OBJECT_SIZE); +	assert(ret == 0); +	assert(memcmp(buf, expected, sizeof(buf)) == 0); + +	printf("Testing partial write of buffer\n"); +	memset(buf + 4096, 0x02, 1024); +	memset(expected + 4096, 0x02, 1024); +	ret = do_write(fd, handle, buf + 4096, 4096, 1024); +	assert(ret == 0); +	ret = do_read(fd, handle, buf, 0, OBJECT_SIZE); +	assert(ret == 0); +	assert(memcmp(buf, expected, sizeof(buf)) == 0); + +	printf("Testing partial read of buffer\n"); +	ret = do_read(fd, handle, buf, 512, 1024); +	assert(ret == 0); +	assert(memcmp(buf, expected + 512, 1024) == 0); + +	close(fd); + +	return 0; +} | 
