WebSVN – Kolibri OS – Diff – /drivers/video/drm/radeon/radeon_object.c


/*
 * Copyright 2009 Jerome Glisse.
 * All Rights Reserved.
 *
 * Permission is hereby granted, free of charge, to any person obtaining a
 * copy of this software and associated documentation files (the
 * "Software"), to deal in the Software without restriction, including
 * without limitation the rights to use, copy, modify, merge, publish,
 * distribute, sub license, and/or sell copies of the Software, and to
 * permit persons to whom the Software is furnished to do so, subject to
 * the following conditions:
 *
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
 *
 * The above copyright notice and this permission notice (including the
 * next paragraph) shall be included in all copies or substantial portions
 * of the Software.
 *
 */
/*
 * Authors:
 *    Jerome Glisse 
 *    Thomas Hellstrom 
 *    Dave Airlie
 */
#include 
#include 
#include 
#include 
#include "radeon.h"
#include "radeon_trace.h"
 
 
int radeon_ttm_init(struct radeon_device *rdev);
void radeon_ttm_fini(struct radeon_device *rdev);
static void radeon_bo_clear_surface_reg(struct radeon_bo *bo);
 
/*
 * To exclude mutual BO access we rely on bo_reserve exclusion, as all
 * function are calling it.
 */
 
static void radeon_update_memory_usage(struct radeon_bo *bo,
				       unsigned mem_type, int sign)
{
	struct radeon_device *rdev = bo->rdev;
	u64 size = (u64)bo->tbo.num_pages << PAGE_SHIFT;
 
	switch (mem_type) {
	case TTM_PL_TT:
		if (sign > 0)
			__atomic_add_fetch(&rdev->gtt_usage.counter, size,__ATOMIC_RELAXED);
		else
			__atomic_sub_fetch(&rdev->gtt_usage.counter, size,__ATOMIC_RELAXED);
		break;
	case TTM_PL_VRAM:
		if (sign > 0)
			__atomic_add_fetch(&rdev->vram_usage.counter, size,__ATOMIC_RELAXED);
		else
			__atomic_sub_fetch(&rdev->vram_usage.counter, size,__ATOMIC_RELAXED );
		break;
	}
}
 
static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
{
	struct radeon_bo *bo;
 
	bo = container_of(tbo, struct radeon_bo, tbo);
 
	radeon_update_memory_usage(bo, bo->tbo.mem.mem_type, -1);
 
	mutex_lock(&bo->rdev->gem.mutex);
	list_del_init(&bo->list);
	mutex_unlock(&bo->rdev->gem.mutex);
	radeon_bo_clear_surface_reg(bo);
	WARN_ON(!list_empty(&bo->va));
	drm_gem_object_release(&bo->gem_base);
	kfree(bo);
}
 
bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo)
{
	if (bo->destroy == &radeon_ttm_bo_destroy)
		return true;
	return false;
}
 
void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
{
	u32 c = 0, i;
 
	rbo->placement.placement = rbo->placements;
	rbo->placement.busy_placement = rbo->placements;
	if (domain & RADEON_GEM_DOMAIN_VRAM) {
		/* Try placing BOs which don't need CPU access outside of the
		 * CPU accessible part of VRAM
		 */
		if ((rbo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
		    rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size) {
			rbo->placements[c].fpfn =
				rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
						     TTM_PL_FLAG_UNCACHED |
						     TTM_PL_FLAG_VRAM;
		}
 
		rbo->placements[c].fpfn = 0;
		rbo->placements[c++].flags = TTM_PL_FLAG_WC |
					     TTM_PL_FLAG_UNCACHED |
					     TTM_PL_FLAG_VRAM;
	}
 
	if (domain & RADEON_GEM_DOMAIN_GTT) {
		if (rbo->flags & RADEON_GEM_GTT_UC) {
			rbo->placements[c].fpfn = 0;
			rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED |
				TTM_PL_FLAG_TT;
 
		} else if ((rbo->flags & RADEON_GEM_GTT_WC) ||
			   (rbo->rdev->flags & RADEON_IS_AGP)) {
			rbo->placements[c].fpfn = 0;
			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
				TTM_PL_FLAG_UNCACHED |
				TTM_PL_FLAG_TT;
		} else {
			rbo->placements[c].fpfn = 0;
			rbo->placements[c++].flags = TTM_PL_FLAG_CACHED |
						     TTM_PL_FLAG_TT;
		}
	}
 
	if (domain & RADEON_GEM_DOMAIN_CPU) {
		if (rbo->flags & RADEON_GEM_GTT_UC) {
			rbo->placements[c].fpfn = 0;
			rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED |
				TTM_PL_FLAG_SYSTEM;
 
		} else if ((rbo->flags & RADEON_GEM_GTT_WC) ||
		    rbo->rdev->flags & RADEON_IS_AGP) {
			rbo->placements[c].fpfn = 0;
			rbo->placements[c++].flags = TTM_PL_FLAG_WC |
				TTM_PL_FLAG_UNCACHED |
				TTM_PL_FLAG_SYSTEM;
		} else {
			rbo->placements[c].fpfn = 0;
			rbo->placements[c++].flags = TTM_PL_FLAG_CACHED |
						     TTM_PL_FLAG_SYSTEM;
		}
	}
	if (!c) {
		rbo->placements[c].fpfn = 0;
		rbo->placements[c++].flags = TTM_PL_MASK_CACHING |
					     TTM_PL_FLAG_SYSTEM;
	}
 
	rbo->placement.num_placement = c;
	rbo->placement.num_busy_placement = c;
 
	for (i = 0; i < c; ++i) {
		if ((rbo->flags & RADEON_GEM_CPU_ACCESS) &&
		    (rbo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
		    !rbo->placements[i].fpfn)
			rbo->placements[i].lpfn =
				rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
		else
			rbo->placements[i].lpfn = 0;
	}
 
 
 
 
 
 
 
 
 
 
 
}
 
int radeon_bo_create(struct radeon_device *rdev,
		     unsigned long size, int byte_align, bool kernel,
		     u32 domain, u32 flags, struct sg_table *sg,
		     struct reservation_object *resv,
		     struct radeon_bo **bo_ptr)
{
	struct radeon_bo *bo;
	enum ttm_bo_type type;
	unsigned long page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT;
	size_t acc_size;
	int r;
 
	size = ALIGN(size, PAGE_SIZE);
 
	if (kernel) {
		type = ttm_bo_type_kernel;
	} else if (sg) {
		type = ttm_bo_type_sg;
	} else {
		type = ttm_bo_type_device;
	}
	*bo_ptr = NULL;
 
	acc_size = ttm_bo_dma_acc_size(&rdev->mman.bdev, size,
				       sizeof(struct radeon_bo));
 
	bo = kzalloc(sizeof(struct radeon_bo), GFP_KERNEL);
	if (bo == NULL)
		return -ENOMEM;
	r = drm_gem_object_init(rdev->ddev, &bo->gem_base, size);
	if (unlikely(r)) {
		kfree(bo);
		return r;
	}
	bo->rdev = rdev;
	bo->surface_reg = -1;
	INIT_LIST_HEAD(&bo->list);
	INIT_LIST_HEAD(&bo->va);
	bo->initial_domain = domain & (RADEON_GEM_DOMAIN_VRAM |
	                               RADEON_GEM_DOMAIN_GTT |
	                               RADEON_GEM_DOMAIN_CPU);
 
	bo->flags = flags;
	/* PCI GART is always snooped */
	if (!(rdev->flags & RADEON_IS_PCIE))
		bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
 
	/* Write-combined CPU mappings of GTT cause GPU hangs with RV6xx
	 * See https://bugs.freedesktop.org/show_bug.cgi?id=91268
	 */
	if (rdev->family >= CHIP_RV610 && rdev->family <= CHIP_RV635)
		bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
 
#ifdef CONFIG_X86_32
	/* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
	 * See https://bugs.freedesktop.org/show_bug.cgi?id=84627
	 */
	bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT)
	/* Don't try to enable write-combining when it can't work, or things
	 * may be slow
	 * See https://bugs.freedesktop.org/show_bug.cgi?id=88758
	 */
 
#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \
	 thanks to write-combining
 
	if (bo->flags & RADEON_GEM_GTT_WC)
		DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
			      "better performance thanks to write-combining\n");
	bo->flags &= ~(RADEON_GEM_GTT_WC | RADEON_GEM_GTT_UC);
#endif
 
	radeon_ttm_placement_from_domain(bo, domain);
	/* Kernel allocation are uninterruptible */
	down_read(&rdev->pm.mclk_lock);
	r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type,
			&bo->placement, page_align, !kernel, NULL,
			acc_size, sg, resv, &radeon_ttm_bo_destroy);
	up_read(&rdev->pm.mclk_lock);
	if (unlikely(r != 0)) {
		return r;
	}
	*bo_ptr = bo;
 
	trace_radeon_bo_create(bo);
 
	return 0;
}
 
int radeon_bo_kmap(struct radeon_bo *bo, void **ptr)
{
	bool is_iomem;
	int r;
 
	if (bo->kptr) {
		if (ptr) {
			*ptr = bo->kptr;
		}
		return 0;
	}
	r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.num_pages, &bo->kmap);
	if (r) {
		return r;
	}
	bo->kptr = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
	if (ptr) {
		*ptr = bo->kptr;
	}
	radeon_bo_check_tiling(bo, 0, 0);
	return 0;
}
 
void radeon_bo_kunmap(struct radeon_bo *bo)
{
	if (bo->kptr == NULL)
		return;
	bo->kptr = NULL;
	radeon_bo_check_tiling(bo, 0, 0);
	ttm_bo_kunmap(&bo->kmap);
}
 
struct radeon_bo *radeon_bo_ref(struct radeon_bo *bo)
{
	if (bo == NULL)
		return NULL;
 
	ttm_bo_reference(&bo->tbo);
	return bo;
}
 
void radeon_bo_unref(struct radeon_bo **bo)
{
	struct ttm_buffer_object *tbo;
	struct radeon_device *rdev;
 
	if ((*bo) == NULL)
		return;
	rdev = (*bo)->rdev;
	tbo = &((*bo)->tbo);
	ttm_bo_unref(&tbo);
	if (tbo == NULL)
		*bo = NULL;
}
 
int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
			     u64 *gpu_addr)
{
	int r, i;
 
	if (bo->pin_count) {
		bo->pin_count++;
		if (gpu_addr)
			*gpu_addr = radeon_bo_gpu_offset(bo);
 
		if (max_offset != 0) {
			u64 domain_start;
 
			if (domain == RADEON_GEM_DOMAIN_VRAM)
				domain_start = bo->rdev->mc.vram_start;
			else
				domain_start = bo->rdev->mc.gtt_start;
			WARN_ON_ONCE(max_offset <
				     (radeon_bo_gpu_offset(bo) - domain_start));
		}
 
		return 0;
	}
	radeon_ttm_placement_from_domain(bo, domain);
	for (i = 0; i < bo->placement.num_placement; i++) {
		/* force to pin into visible video ram */
		if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
		    !(bo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
		    (!max_offset || max_offset > bo->rdev->mc.visible_vram_size))
			bo->placements[i].lpfn =
				bo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
		else
			bo->placements[i].lpfn = max_offset >> PAGE_SHIFT;
 
		bo->placements[i].flags |= TTM_PL_FLAG_NO_EVICT;
	}
 
	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
	if (likely(r == 0)) {
		bo->pin_count = 1;
		if (gpu_addr != NULL)
			*gpu_addr = radeon_bo_gpu_offset(bo);
		if (domain == RADEON_GEM_DOMAIN_VRAM)
			bo->rdev->vram_pin_size += radeon_bo_size(bo);
		else
			bo->rdev->gart_pin_size += radeon_bo_size(bo);
	} else {
		dev_err(bo->rdev->dev, "%p pin failed\n", bo);
	}
	return r;
}
 
int radeon_bo_pin(struct radeon_bo *bo, u32 domain, u64 *gpu_addr)
{
	return radeon_bo_pin_restricted(bo, domain, 0, gpu_addr);
}
 
int radeon_bo_unpin(struct radeon_bo *bo)
{
	int r, i;
 
	if (!bo->pin_count) {
		dev_warn(bo->rdev->dev, "%p unpin not necessary\n", bo);
		return 0;
	}
	bo->pin_count--;
	if (bo->pin_count)
		return 0;
	for (i = 0; i < bo->placement.num_placement; i++) {
		bo->placements[i].lpfn = 0;
		bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
	}
	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
	if (likely(r == 0)) {
		if (bo->tbo.mem.mem_type == TTM_PL_VRAM)
			bo->rdev->vram_pin_size -= radeon_bo_size(bo);
		else
			bo->rdev->gart_pin_size -= radeon_bo_size(bo);
	} else {
		dev_err(bo->rdev->dev, "%p validate failed for unpin\n", bo);
	}
    return r;
}
 
int radeon_bo_init(struct radeon_device *rdev)
{
	/* Add an MTRR for the VRAM */
	DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
		rdev->mc.mc_vram_size >> 20,
		(unsigned long long)rdev->mc.aper_size >> 20);
	DRM_INFO("RAM width %dbits %cDR\n",
			rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S');
	return radeon_ttm_init(rdev);
}
 
void radeon_bo_fini(struct radeon_device *rdev)
{
//   radeon_ttm_fini(rdev);
//   arch_phys_wc_del(rdev->mc.vram_mtrr);
}
 
/* Returns how many bytes TTM can move per IB.
 */
static u64 radeon_bo_get_threshold_for_moves(struct radeon_device *rdev)
{
	u64 real_vram_size = rdev->mc.real_vram_size;
	u64 vram_usage = atomic64_read(&rdev->vram_usage);
 
	/* This function is based on the current VRAM usage.
	 *
	 * - If all of VRAM is free, allow relocating the number of bytes that
	 *   is equal to 1/4 of the size of VRAM for this IB.
 
	 * - If more than one half of VRAM is occupied, only allow relocating
	 *   1 MB of data for this IB.
	 *
	 * - From 0 to one half of used VRAM, the threshold decreases
	 *   linearly.
	 *         __________________
	 * 1/4 of -|\               |
	 * VRAM    | \              |
	 *         |  \             |
	 *         |   \            |
	 *         |    \           |
	 *         |     \          |
	 *         |      \         |
	 *         |       \________|1 MB
	 *         |----------------|
	 *    VRAM 0 %             100 %
	 *         used            used
	 *
	 * Note: It's a threshold, not a limit. The threshold must be crossed
	 * for buffer relocations to stop, so any buffer of an arbitrary size
	 * can be moved as long as the threshold isn't crossed before
	 * the relocation takes place. We don't want to disable buffer
	 * relocations completely.
	 *
	 * The idea is that buffers should be placed in VRAM at creation time
	 * and TTM should only do a minimum number of relocations during
	 * command submission. In practice, you need to submit at least
	 * a dozen IBs to move all buffers to VRAM if they are in GTT.
	 *
	 * Also, things can get pretty crazy under memory pressure and actual
	 * VRAM usage can change a lot, so playing safe even at 50% does
	 * consistently increase performance.
	 */
 
	u64 half_vram = real_vram_size >> 1;
	u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage;
	u64 bytes_moved_threshold = half_free_vram >> 1;
	return max(bytes_moved_threshold, 1024*1024ull);
}
 
int radeon_bo_list_validate(struct radeon_device *rdev,
			    struct ww_acquire_ctx *ticket,
			    struct list_head *head, int ring)
{
	struct radeon_bo_list *lobj;
	struct list_head duplicates;
	int r;
	u64 bytes_moved = 0, initial_bytes_moved;
	u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev);
 
	INIT_LIST_HEAD(&duplicates);
	r = ttm_eu_reserve_buffers(ticket, head, true, &duplicates);
	if (unlikely(r != 0)) {
		return r;
	}
 
	list_for_each_entry(lobj, head, tv.head) {
		struct radeon_bo *bo = lobj->robj;
		if (!bo->pin_count) {
			u32 domain = lobj->prefered_domains;
			u32 allowed = lobj->allowed_domains;
			u32 current_domain =
				radeon_mem_type_to_domain(bo->tbo.mem.mem_type);
 
			/* Check if this buffer will be moved and don't move it
			 * if we have moved too many buffers for this IB already.
			 *
			 * Note that this allows moving at least one buffer of
			 * any size, because it doesn't take the current "bo"
			 * into account. We don't want to disallow buffer moves
			 * completely.
			 */
			if ((allowed & current_domain) != 0 &&
			    (domain & current_domain) == 0 && /* will be moved */
			    bytes_moved > bytes_moved_threshold) {
				/* don't move it */
				domain = current_domain;
			}
 
		retry:
			radeon_ttm_placement_from_domain(bo, domain);
			if (ring == R600_RING_TYPE_UVD_INDEX)
				radeon_uvd_force_into_uvd_segment(bo, allowed);
 
			initial_bytes_moved = atomic64_read(&rdev->num_bytes_moved);
			r = ttm_bo_validate(&bo->tbo, &bo->placement, true, false);
			bytes_moved += atomic64_read(&rdev->num_bytes_moved) -
				       initial_bytes_moved;
 
			if (unlikely(r)) {
				if (r != -ERESTARTSYS &&
				    domain != lobj->allowed_domains) {
					domain = lobj->allowed_domains;
					goto retry;
				}
				ttm_eu_backoff_reservation(ticket, head);
				return r;
			}
		}
		lobj->gpu_offset = radeon_bo_gpu_offset(bo);
		lobj->tiling_flags = bo->tiling_flags;
	}
 
	list_for_each_entry(lobj, &duplicates, tv.head) {
		lobj->gpu_offset = radeon_bo_gpu_offset(lobj->robj);
		lobj->tiling_flags = lobj->robj->tiling_flags;
	}
 
	return 0;
}
 
int radeon_bo_get_surface_reg(struct radeon_bo *bo)
{
	struct radeon_device *rdev = bo->rdev;
	struct radeon_surface_reg *reg;
	struct radeon_bo *old_object;
	int steal;
	int i;
 
	lockdep_assert_held(&bo->tbo.resv->lock.base);
 
	if (!bo->tiling_flags)
		return 0;
 
	if (bo->surface_reg >= 0) {
		reg = &rdev->surface_regs[bo->surface_reg];
		i = bo->surface_reg;
		goto out;
	}
 
	steal = -1;
	for (i = 0; i < RADEON_GEM_MAX_SURFACES; i++) {
 
		reg = &rdev->surface_regs[i];
		if (!reg->bo)
			break;
 
		old_object = reg->bo;
		if (old_object->pin_count == 0)
			steal = i;
	}
 
	/* if we are all out */
	if (i == RADEON_GEM_MAX_SURFACES) {
		if (steal == -1)
			return -ENOMEM;
		/* find someone with a surface reg and nuke their BO */
		reg = &rdev->surface_regs[steal];
		old_object = reg->bo;
		/* blow away the mapping */
		DRM_DEBUG("stealing surface reg %d from %p\n", steal, old_object);
		ttm_bo_unmap_virtual(&old_object->tbo);
		old_object->surface_reg = -1;
		i = steal;
	}
 
	bo->surface_reg = i;
	reg->bo = bo;
 
out:
	radeon_set_surface_reg(rdev, i, bo->tiling_flags, bo->pitch,
			       bo->tbo.mem.start << PAGE_SHIFT,
			       bo->tbo.num_pages << PAGE_SHIFT);
	return 0;
}
 
static void radeon_bo_clear_surface_reg(struct radeon_bo *bo)
{
	struct radeon_device *rdev = bo->rdev;
	struct radeon_surface_reg *reg;
 
	if (bo->surface_reg == -1)
		return;
 
	reg = &rdev->surface_regs[bo->surface_reg];
	radeon_clear_surface_reg(rdev, bo->surface_reg);
 
	reg->bo = NULL;
	bo->surface_reg = -1;
}
 
int radeon_bo_set_tiling_flags(struct radeon_bo *bo,
				uint32_t tiling_flags, uint32_t pitch)
{
	struct radeon_device *rdev = bo->rdev;
	int r;
 
	if (rdev->family >= CHIP_CEDAR) {
		unsigned bankw, bankh, mtaspect, tilesplit, stilesplit;
 
		bankw = (tiling_flags >> RADEON_TILING_EG_BANKW_SHIFT) & RADEON_TILING_EG_BANKW_MASK;
		bankh = (tiling_flags >> RADEON_TILING_EG_BANKH_SHIFT) & RADEON_TILING_EG_BANKH_MASK;
		mtaspect = (tiling_flags >> RADEON_TILING_EG_MACRO_TILE_ASPECT_SHIFT) & RADEON_TILING_EG_MACRO_TILE_ASPECT_MASK;
		tilesplit = (tiling_flags >> RADEON_TILING_EG_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_TILE_SPLIT_MASK;
		stilesplit = (tiling_flags >> RADEON_TILING_EG_STENCIL_TILE_SPLIT_SHIFT) & RADEON_TILING_EG_STENCIL_TILE_SPLIT_MASK;
		switch (bankw) {
		case 0:
		case 1:
		case 2:
		case 4:
		case 8:
			break;
		default:
			return -EINVAL;
		}
		switch (bankh) {
		case 0:
		case 1:
		case 2:
		case 4:
		case 8:
			break;
		default:
			return -EINVAL;
		}
		switch (mtaspect) {
		case 0:
		case 1:
		case 2:
		case 4:
		case 8:
			break;
		default:
			return -EINVAL;
		}
		if (tilesplit > 6) {
			return -EINVAL;
		}
		if (stilesplit > 6) {
			return -EINVAL;
		}
	}
	r = radeon_bo_reserve(bo, false);
	if (unlikely(r != 0))
		return r;
	bo->tiling_flags = tiling_flags;
	bo->pitch = pitch;
	radeon_bo_unreserve(bo);
	return 0;
}
 
void radeon_bo_get_tiling_flags(struct radeon_bo *bo,
				uint32_t *tiling_flags,
				uint32_t *pitch)
{
	lockdep_assert_held(&bo->tbo.resv->lock.base);
 
	if (tiling_flags)
		*tiling_flags = bo->tiling_flags;
	if (pitch)
		*pitch = bo->pitch;
}
 
int radeon_bo_check_tiling(struct radeon_bo *bo, bool has_moved,
				bool force_drop)
{
	if (!force_drop)
		lockdep_assert_held(&bo->tbo.resv->lock.base);
 
	if (!(bo->tiling_flags & RADEON_TILING_SURFACE))
		return 0;
 
	if (force_drop) {
		radeon_bo_clear_surface_reg(bo);
		return 0;
	}
 
	if (bo->tbo.mem.mem_type != TTM_PL_VRAM) {
		if (!has_moved)
			return 0;
 
		if (bo->surface_reg >= 0)
			radeon_bo_clear_surface_reg(bo);
		return 0;
	}
 
	if ((bo->surface_reg >= 0) && !has_moved)
		return 0;
 
	return radeon_bo_get_surface_reg(bo);
}
 
void radeon_bo_move_notify(struct ttm_buffer_object *bo,
			   struct ttm_mem_reg *new_mem)
{
	struct radeon_bo *rbo;
 
	if (!radeon_ttm_bo_is_radeon_bo(bo))
		return;
 
	rbo = container_of(bo, struct radeon_bo, tbo);
	radeon_bo_check_tiling(rbo, 0, 1);
	radeon_vm_bo_invalidate(rbo->rdev, rbo);
 
	/* update statistics */
	if (!new_mem)
		return;
 
	radeon_update_memory_usage(rbo, bo->mem.mem_type, -1);
	radeon_update_memory_usage(rbo, new_mem->mem_type, 1);
}
int radeon_bo_wait(struct radeon_bo *bo, u32 *mem_type, bool no_wait)
{
	int r;
 
	r = ttm_bo_reserve(&bo->tbo, true, no_wait, false, NULL);
	if (unlikely(r != 0))
		return r;
	if (mem_type)
		*mem_type = bo->tbo.mem.mem_type;
 
	r = ttm_bo_wait(&bo->tbo, true, true, no_wait);
	ttm_bo_unreserve(&bo->tbo);
	return r;
}
 
/**
 * radeon_bo_fence - add fence to buffer object
 *
 * @bo: buffer object in question
 * @fence: fence to add
 * @shared: true if fence should be added shared
 *
 */
void radeon_bo_fence(struct radeon_bo *bo, struct radeon_fence *fence,
                     bool shared)
{
	struct reservation_object *resv = bo->tbo.resv;
 
	if (shared)
		reservation_object_add_shared_fence(resv, &fence->base);
	else
		reservation_object_add_excl_fence(resv, &fence->base);
}
>

Rev 5346	Rev 6104
1	/*	1	/*
2	* Copyright 2009 Jerome Glisse.	2	* Copyright 2009 Jerome Glisse.
3	* All Rights Reserved.	3	* All Rights Reserved.
4	*	4	*
5	* Permission is hereby granted, free of charge, to any person obtaining a	5	* Permission is hereby granted, free of charge, to any person obtaining a
6	* copy of this software and associated documentation files (the	6	* copy of this software and associated documentation files (the
7	* "Software"), to deal in the Software without restriction, including	7	* "Software"), to deal in the Software without restriction, including
8	* without limitation the rights to use, copy, modify, merge, publish,	8	* without limitation the rights to use, copy, modify, merge, publish,
9	* distribute, sub license, and/or sell copies of the Software, and to	9	* distribute, sub license, and/or sell copies of the Software, and to
10	* permit persons to whom the Software is furnished to do so, subject to	10	* permit persons to whom the Software is furnished to do so, subject to
11	* the following conditions:	11	* the following conditions:
12	*	12	*
13	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR	13	* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
14	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,	14	* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
15	* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL	15	* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
16	* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,	16	* THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
17	* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR	17	* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
18	* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE	18	* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
19	* USE OR OTHER DEALINGS IN THE SOFTWARE.	19	* USE OR OTHER DEALINGS IN THE SOFTWARE.
20	*	20	*
21	* The above copyright notice and this permission notice (including the	21	* The above copyright notice and this permission notice (including the
22	* next paragraph) shall be included in all copies or substantial portions	22	* next paragraph) shall be included in all copies or substantial portions
23	* of the Software.	23	* of the Software.
24	*	24	*
25	*/	25	*/
26	/*	26	/*
27	* Authors:	27	* Authors:
28	* Jerome Glisse	28	* Jerome Glisse
29	* Thomas Hellstrom	29	* Thomas Hellstrom
30	* Dave Airlie	30	* Dave Airlie
31	*/	31	*/
32	#include	32	#include
33	#include	33	#include
34	#include	34	#include
35	#include	35	#include
36	#include "radeon.h"	36	#include "radeon.h"
37	#include "radeon_trace.h"	37	#include "radeon_trace.h"
38		38
39		39
40	int radeon_ttm_init(struct radeon_device *rdev);	40	int radeon_ttm_init(struct radeon_device *rdev);
41	void radeon_ttm_fini(struct radeon_device *rdev);	41	void radeon_ttm_fini(struct radeon_device *rdev);
42	static void radeon_bo_clear_surface_reg(struct radeon_bo *bo);	42	static void radeon_bo_clear_surface_reg(struct radeon_bo *bo);
43		43
44	/*	44	/*
45	* To exclude mutual BO access we rely on bo_reserve exclusion, as all	45	* To exclude mutual BO access we rely on bo_reserve exclusion, as all
46	* function are calling it.	46	* function are calling it.
47	*/	47	*/
48		48
49	static void radeon_update_memory_usage(struct radeon_bo *bo,	49	static void radeon_update_memory_usage(struct radeon_bo *bo,
50	unsigned mem_type, int sign)	50	unsigned mem_type, int sign)
51	{	51	{
52	struct radeon_device *rdev = bo->rdev;	52	struct radeon_device *rdev = bo->rdev;
53	u64 size = (u64)bo->tbo.num_pages << PAGE_SHIFT;	53	u64 size = (u64)bo->tbo.num_pages << PAGE_SHIFT;
54		54
55	switch (mem_type) {	55	switch (mem_type) {
56	case TTM_PL_TT:	56	case TTM_PL_TT:
57	if (sign > 0)	57	if (sign > 0)
58	__atomic_add_fetch(&rdev->gtt_usage.counter, size,__ATOMIC_RELAXED);	58	__atomic_add_fetch(&rdev->gtt_usage.counter, size,__ATOMIC_RELAXED);
59	else	59	else
60	__atomic_sub_fetch(&rdev->gtt_usage.counter, size,__ATOMIC_RELAXED);	60	__atomic_sub_fetch(&rdev->gtt_usage.counter, size,__ATOMIC_RELAXED);
61	break;	61	break;
62	case TTM_PL_VRAM:	62	case TTM_PL_VRAM:
63	if (sign > 0)	63	if (sign > 0)
64	__atomic_add_fetch(&rdev->vram_usage.counter, size,__ATOMIC_RELAXED);	64	__atomic_add_fetch(&rdev->vram_usage.counter, size,__ATOMIC_RELAXED);
65	else	65	else
66	__atomic_sub_fetch(&rdev->vram_usage.counter, size,__ATOMIC_RELAXED );	66	__atomic_sub_fetch(&rdev->vram_usage.counter, size,__ATOMIC_RELAXED );
67	break;	67	break;
68	}	68	}
69	}	69	}
70		70
71	static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)	71	static void radeon_ttm_bo_destroy(struct ttm_buffer_object *tbo)
72	{	72	{
73	struct radeon_bo *bo;	73	struct radeon_bo *bo;
74		74
75	bo = container_of(tbo, struct radeon_bo, tbo);	75	bo = container_of(tbo, struct radeon_bo, tbo);
76		76
77	radeon_update_memory_usage(bo, bo->tbo.mem.mem_type, -1);	77	radeon_update_memory_usage(bo, bo->tbo.mem.mem_type, -1);
78		78
79	mutex_lock(&bo->rdev->gem.mutex);	79	mutex_lock(&bo->rdev->gem.mutex);
80	list_del_init(&bo->list);	80	list_del_init(&bo->list);
81	mutex_unlock(&bo->rdev->gem.mutex);	81	mutex_unlock(&bo->rdev->gem.mutex);
82	radeon_bo_clear_surface_reg(bo);	82	radeon_bo_clear_surface_reg(bo);
83	WARN_ON(!list_empty(&bo->va));	83	WARN_ON(!list_empty(&bo->va));
84	drm_gem_object_release(&bo->gem_base);	84	drm_gem_object_release(&bo->gem_base);
85	kfree(bo);	85	kfree(bo);
86	}	86	}
87		87
88	bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo)	88	bool radeon_ttm_bo_is_radeon_bo(struct ttm_buffer_object *bo)
89	{	89	{
90	if (bo->destroy == &radeon_ttm_bo_destroy)	90	if (bo->destroy == &radeon_ttm_bo_destroy)
91	return true;	91	return true;
92	return false;	92	return false;
93	}	93	}
94		94
95	void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)	95	void radeon_ttm_placement_from_domain(struct radeon_bo *rbo, u32 domain)
96	{	96	{
97	u32 c = 0, i;	97	u32 c = 0, i;
98		98
99	rbo->placement.placement = rbo->placements;	99	rbo->placement.placement = rbo->placements;
100	rbo->placement.busy_placement = rbo->placements;	100	rbo->placement.busy_placement = rbo->placements;
101	if (domain & RADEON_GEM_DOMAIN_VRAM) {	101	if (domain & RADEON_GEM_DOMAIN_VRAM) {
102	/* Try placing BOs which don't need CPU access outside of the	102	/* Try placing BOs which don't need CPU access outside of the
103	* CPU accessible part of VRAM	103	* CPU accessible part of VRAM
104	*/	104	*/
105	if ((rbo->flags & RADEON_GEM_NO_CPU_ACCESS) &&	105	if ((rbo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
106	rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size) {	106	rbo->rdev->mc.visible_vram_size < rbo->rdev->mc.real_vram_size) {
107	rbo->placements[c].fpfn =	107	rbo->placements[c].fpfn =
108	rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;	108	rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
109	rbo->placements[c++].flags = TTM_PL_FLAG_WC \|	109	rbo->placements[c++].flags = TTM_PL_FLAG_WC \|
110	TTM_PL_FLAG_UNCACHED \|	110	TTM_PL_FLAG_UNCACHED \|
111	TTM_PL_FLAG_VRAM;	111	TTM_PL_FLAG_VRAM;
112	}	112	}
113		113
114	rbo->placements[c].fpfn = 0;	114	rbo->placements[c].fpfn = 0;
115	rbo->placements[c++].flags = TTM_PL_FLAG_WC \|	115	rbo->placements[c++].flags = TTM_PL_FLAG_WC \|
116	TTM_PL_FLAG_UNCACHED \|	116	TTM_PL_FLAG_UNCACHED \|
117	TTM_PL_FLAG_VRAM;	117	TTM_PL_FLAG_VRAM;
118	}	118	}
119		119
120	if (domain & RADEON_GEM_DOMAIN_GTT) {	120	if (domain & RADEON_GEM_DOMAIN_GTT) {
121	if (rbo->flags & RADEON_GEM_GTT_UC) {	121	if (rbo->flags & RADEON_GEM_GTT_UC) {
122	rbo->placements[c].fpfn = 0;	122	rbo->placements[c].fpfn = 0;
123	rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED \|	123	rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED \|
124	TTM_PL_FLAG_TT;	124	TTM_PL_FLAG_TT;
125		125
126	} else if ((rbo->flags & RADEON_GEM_GTT_WC) \|\|	126	} else if ((rbo->flags & RADEON_GEM_GTT_WC) \|\|
127	(rbo->rdev->flags & RADEON_IS_AGP)) {	127	(rbo->rdev->flags & RADEON_IS_AGP)) {
128	rbo->placements[c].fpfn = 0;	128	rbo->placements[c].fpfn = 0;
129	rbo->placements[c++].flags = TTM_PL_FLAG_WC \|	129	rbo->placements[c++].flags = TTM_PL_FLAG_WC \|
130	TTM_PL_FLAG_UNCACHED \|	130	TTM_PL_FLAG_UNCACHED \|
131	TTM_PL_FLAG_TT;	131	TTM_PL_FLAG_TT;
132	} else {	132	} else {
133	rbo->placements[c].fpfn = 0;	133	rbo->placements[c].fpfn = 0;
134	rbo->placements[c++].flags = TTM_PL_FLAG_CACHED \|	134	rbo->placements[c++].flags = TTM_PL_FLAG_CACHED \|
135	TTM_PL_FLAG_TT;	135	TTM_PL_FLAG_TT;
136	}	136	}
137	}	137	}
138		138
139	if (domain & RADEON_GEM_DOMAIN_CPU) {	139	if (domain & RADEON_GEM_DOMAIN_CPU) {
140	if (rbo->flags & RADEON_GEM_GTT_UC) {	140	if (rbo->flags & RADEON_GEM_GTT_UC) {
141	rbo->placements[c].fpfn = 0;	141	rbo->placements[c].fpfn = 0;
142	rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED \|	142	rbo->placements[c++].flags = TTM_PL_FLAG_UNCACHED \|
143	TTM_PL_FLAG_SYSTEM;	143	TTM_PL_FLAG_SYSTEM;
144		144
145	} else if ((rbo->flags & RADEON_GEM_GTT_WC) \|\|	145	} else if ((rbo->flags & RADEON_GEM_GTT_WC) \|\|
146	rbo->rdev->flags & RADEON_IS_AGP) {	146	rbo->rdev->flags & RADEON_IS_AGP) {
147	rbo->placements[c].fpfn = 0;	147	rbo->placements[c].fpfn = 0;
148	rbo->placements[c++].flags = TTM_PL_FLAG_WC \|	148	rbo->placements[c++].flags = TTM_PL_FLAG_WC \|
149	TTM_PL_FLAG_UNCACHED \|	149	TTM_PL_FLAG_UNCACHED \|
150	TTM_PL_FLAG_SYSTEM;	150	TTM_PL_FLAG_SYSTEM;
151	} else {	151	} else {
152	rbo->placements[c].fpfn = 0;	152	rbo->placements[c].fpfn = 0;
153	rbo->placements[c++].flags = TTM_PL_FLAG_CACHED \|	153	rbo->placements[c++].flags = TTM_PL_FLAG_CACHED \|
154	TTM_PL_FLAG_SYSTEM;	154	TTM_PL_FLAG_SYSTEM;
155	}	155	}
156	}	156	}
157	if (!c) {	157	if (!c) {
158	rbo->placements[c].fpfn = 0;	158	rbo->placements[c].fpfn = 0;
159	rbo->placements[c++].flags = TTM_PL_MASK_CACHING \|	159	rbo->placements[c++].flags = TTM_PL_MASK_CACHING \|
160	TTM_PL_FLAG_SYSTEM;	160	TTM_PL_FLAG_SYSTEM;
161	}	161	}
162		162
163	rbo->placement.num_placement = c;	163	rbo->placement.num_placement = c;
164	rbo->placement.num_busy_placement = c;	164	rbo->placement.num_busy_placement = c;
165		165
166	for (i = 0; i < c; ++i) {	166	for (i = 0; i < c; ++i) {
167	if ((rbo->flags & RADEON_GEM_CPU_ACCESS) &&	167	if ((rbo->flags & RADEON_GEM_CPU_ACCESS) &&
168	(rbo->placements[i].flags & TTM_PL_FLAG_VRAM) &&	168	(rbo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
169	!rbo->placements[i].fpfn)	169	!rbo->placements[i].fpfn)
170	rbo->placements[i].lpfn =	170	rbo->placements[i].lpfn =
171	rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;	171	rbo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
172	else	172	else
173	rbo->placements[i].lpfn = 0;	173	rbo->placements[i].lpfn = 0;
174	}	174	}
175		-
176	/*	-
177	* Use two-ended allocation depending on the buffer size to	-
178	* improve fragmentation quality.	-
179	* 512kb was measured as the most optimal number.	-
180	*/	-
181	if (rbo->tbo.mem.size > 512 * 1024) {	-
182	for (i = 0; i < c; i++) {	-
183	rbo->placements[i].flags \|= TTM_PL_FLAG_TOPDOWN;	-
184	}	-
185	}	-
186	}	175	}
187		176
188	int radeon_bo_create(struct radeon_device *rdev,	177	int radeon_bo_create(struct radeon_device *rdev,
189	unsigned long size, int byte_align, bool kernel,	178	unsigned long size, int byte_align, bool kernel,
190	u32 domain, u32 flags, struct sg_table *sg,	179	u32 domain, u32 flags, struct sg_table *sg,
191	struct reservation_object *resv,	180	struct reservation_object *resv,
192	struct radeon_bo **bo_ptr)	181	struct radeon_bo **bo_ptr)
193	{	182	{
194	struct radeon_bo *bo;	183	struct radeon_bo *bo;
195	enum ttm_bo_type type;	184	enum ttm_bo_type type;
196	unsigned long page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT;	185	unsigned long page_align = roundup(byte_align, PAGE_SIZE) >> PAGE_SHIFT;
197	size_t acc_size;	186	size_t acc_size;
198	int r;	187	int r;
199		188
200	size = ALIGN(size, PAGE_SIZE);	189	size = ALIGN(size, PAGE_SIZE);
201		190
202	if (kernel) {	191	if (kernel) {
203	type = ttm_bo_type_kernel;	192	type = ttm_bo_type_kernel;
204	} else if (sg) {	193	} else if (sg) {
205	type = ttm_bo_type_sg;	194	type = ttm_bo_type_sg;
206	} else {	195	} else {
207	type = ttm_bo_type_device;	196	type = ttm_bo_type_device;
208	}	197	}
209	*bo_ptr = NULL;	198	*bo_ptr = NULL;
210		199
211	acc_size = ttm_bo_dma_acc_size(&rdev->mman.bdev, size,	200	acc_size = ttm_bo_dma_acc_size(&rdev->mman.bdev, size,
212	sizeof(struct radeon_bo));	201	sizeof(struct radeon_bo));
213		202
214	bo = kzalloc(sizeof(struct radeon_bo), GFP_KERNEL);	203	bo = kzalloc(sizeof(struct radeon_bo), GFP_KERNEL);
215	if (bo == NULL)	204	if (bo == NULL)
216	return -ENOMEM;	205	return -ENOMEM;
217	r = drm_gem_object_init(rdev->ddev, &bo->gem_base, size);	206	r = drm_gem_object_init(rdev->ddev, &bo->gem_base, size);
218	if (unlikely(r)) {	207	if (unlikely(r)) {
219	kfree(bo);	208	kfree(bo);
220	return r;	209	return r;
221	}	210	}
222	bo->rdev = rdev;	211	bo->rdev = rdev;
223	bo->surface_reg = -1;	212	bo->surface_reg = -1;
224	INIT_LIST_HEAD(&bo->list);	213	INIT_LIST_HEAD(&bo->list);
225	INIT_LIST_HEAD(&bo->va);	214	INIT_LIST_HEAD(&bo->va);
226	bo->initial_domain = domain & (RADEON_GEM_DOMAIN_VRAM \|	215	bo->initial_domain = domain & (RADEON_GEM_DOMAIN_VRAM \|
227	RADEON_GEM_DOMAIN_GTT \|	216	RADEON_GEM_DOMAIN_GTT \|
228	RADEON_GEM_DOMAIN_CPU);	217	RADEON_GEM_DOMAIN_CPU);
229		218
230	bo->flags = flags;	219	bo->flags = flags;
231	/* PCI GART is always snooped */	220	/* PCI GART is always snooped */
232	if (!(rdev->flags & RADEON_IS_PCIE))	221	if (!(rdev->flags & RADEON_IS_PCIE))
233	bo->flags &= ~(RADEON_GEM_GTT_WC \| RADEON_GEM_GTT_UC);	222	bo->flags &= ~(RADEON_GEM_GTT_WC \| RADEON_GEM_GTT_UC);
-		223
-		224	/* Write-combined CPU mappings of GTT cause GPU hangs with RV6xx
-		225	* See https://bugs.freedesktop.org/show_bug.cgi?id=91268
-		226	*/
-		227	if (rdev->family >= CHIP_RV610 && rdev->family <= CHIP_RV635)
-		228	bo->flags &= ~(RADEON_GEM_GTT_WC \| RADEON_GEM_GTT_UC);
234		229
235	#ifdef CONFIG_X86_32	230	#ifdef CONFIG_X86_32
236	/* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit	231	/* XXX: Write-combined CPU mappings of GTT seem broken on 32-bit
237	* See https://bugs.freedesktop.org/show_bug.cgi?id=84627	232	* See https://bugs.freedesktop.org/show_bug.cgi?id=84627
238	*/	233	*/
-		234	bo->flags &= ~(RADEON_GEM_GTT_WC \| RADEON_GEM_GTT_UC);
-		235	#elif defined(CONFIG_X86) && !defined(CONFIG_X86_PAT)
-		236	/* Don't try to enable write-combining when it can't work, or things
-		237	* may be slow
-		238	* See https://bugs.freedesktop.org/show_bug.cgi?id=88758
-		239	*/
-		240
-		241	#warning Please enable CONFIG_MTRR and CONFIG_X86_PAT for better performance \
-		242	thanks to write-combining
-		243
239	bo->flags &= ~RADEON_GEM_GTT_WC;	244	if (bo->flags & RADEON_GEM_GTT_WC)
-		245	DRM_INFO_ONCE("Please enable CONFIG_MTRR and CONFIG_X86_PAT for "
-		246	"better performance thanks to write-combining\n");
-		247	bo->flags &= ~(RADEON_GEM_GTT_WC \| RADEON_GEM_GTT_UC);
240	#endif	248	#endif
241		249
242	radeon_ttm_placement_from_domain(bo, domain);	250	radeon_ttm_placement_from_domain(bo, domain);
243	/* Kernel allocation are uninterruptible */	251	/* Kernel allocation are uninterruptible */
244	down_read(&rdev->pm.mclk_lock);	252	down_read(&rdev->pm.mclk_lock);
245	r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type,	253	r = ttm_bo_init(&rdev->mman.bdev, &bo->tbo, size, type,
246	&bo->placement, page_align, !kernel, NULL,	254	&bo->placement, page_align, !kernel, NULL,
247	acc_size, sg, resv, &radeon_ttm_bo_destroy);	255	acc_size, sg, resv, &radeon_ttm_bo_destroy);
248	up_read(&rdev->pm.mclk_lock);	256	up_read(&rdev->pm.mclk_lock);
249	if (unlikely(r != 0)) {	257	if (unlikely(r != 0)) {
250	return r;	258	return r;
251	}	259	}
252	*bo_ptr = bo;	260	*bo_ptr = bo;
253		261
254	trace_radeon_bo_create(bo);	262	trace_radeon_bo_create(bo);
255		263
256	return 0;	264	return 0;
257	}	265	}
258		266
259	int radeon_bo_kmap(struct radeon_bo bo, void *ptr)	267	int radeon_bo_kmap(struct radeon_bo bo, void *ptr)
260	{	268	{
261	bool is_iomem;	269	bool is_iomem;
262	int r;	270	int r;
263		271
264	if (bo->kptr) {	272	if (bo->kptr) {
265	if (ptr) {	273	if (ptr) {
266	*ptr = bo->kptr;	274	*ptr = bo->kptr;
267	}	275	}
268	return 0;	276	return 0;
269	}	277	}
270	r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.num_pages, &bo->kmap);	278	r = ttm_bo_kmap(&bo->tbo, 0, bo->tbo.num_pages, &bo->kmap);
271	if (r) {	279	if (r) {
272	return r;	280	return r;
273	}	281	}
274	bo->kptr = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);	282	bo->kptr = ttm_kmap_obj_virtual(&bo->kmap, &is_iomem);
275	if (ptr) {	283	if (ptr) {
276	*ptr = bo->kptr;	284	*ptr = bo->kptr;
277	}	285	}
278	radeon_bo_check_tiling(bo, 0, 0);	286	radeon_bo_check_tiling(bo, 0, 0);
279	return 0;	287	return 0;
280	}	288	}
281		289
282	void radeon_bo_kunmap(struct radeon_bo *bo)	290	void radeon_bo_kunmap(struct radeon_bo *bo)
283	{	291	{
284	if (bo->kptr == NULL)	292	if (bo->kptr == NULL)
285	return;	293	return;
286	bo->kptr = NULL;	294	bo->kptr = NULL;
287	radeon_bo_check_tiling(bo, 0, 0);	295	radeon_bo_check_tiling(bo, 0, 0);
288	ttm_bo_kunmap(&bo->kmap);	296	ttm_bo_kunmap(&bo->kmap);
289	}	297	}
290		298
291	struct radeon_bo radeon_bo_ref(struct radeon_bo bo)	299	struct radeon_bo radeon_bo_ref(struct radeon_bo bo)
292	{	300	{
293	if (bo == NULL)	301	if (bo == NULL)
294	return NULL;	302	return NULL;
295		303
296	ttm_bo_reference(&bo->tbo);	304	ttm_bo_reference(&bo->tbo);
297	return bo;	305	return bo;
298	}	306	}
299		307
300	void radeon_bo_unref(struct radeon_bo **bo)	308	void radeon_bo_unref(struct radeon_bo **bo)
301	{	309	{
302	struct ttm_buffer_object *tbo;	310	struct ttm_buffer_object *tbo;
303	struct radeon_device *rdev;	311	struct radeon_device *rdev;
304		312
305	if ((*bo) == NULL)	313	if ((*bo) == NULL)
306	return;	314	return;
307	rdev = (*bo)->rdev;	315	rdev = (*bo)->rdev;
308	tbo = &((*bo)->tbo);	316	tbo = &((*bo)->tbo);
309	ttm_bo_unref(&tbo);	317	ttm_bo_unref(&tbo);
310	if (tbo == NULL)	318	if (tbo == NULL)
311	*bo = NULL;	319	*bo = NULL;
312	}	320	}
313		321
314	int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,	322	int radeon_bo_pin_restricted(struct radeon_bo *bo, u32 domain, u64 max_offset,
315	u64 *gpu_addr)	323	u64 *gpu_addr)
316	{	324	{
317	int r, i;	325	int r, i;
318		326
319	if (bo->pin_count) {	327	if (bo->pin_count) {
320	bo->pin_count++;	328	bo->pin_count++;
321	if (gpu_addr)	329	if (gpu_addr)
322	*gpu_addr = radeon_bo_gpu_offset(bo);	330	*gpu_addr = radeon_bo_gpu_offset(bo);
323		331
324	if (max_offset != 0) {	332	if (max_offset != 0) {
325	u64 domain_start;	333	u64 domain_start;
326		334
327	if (domain == RADEON_GEM_DOMAIN_VRAM)	335	if (domain == RADEON_GEM_DOMAIN_VRAM)
328	domain_start = bo->rdev->mc.vram_start;	336	domain_start = bo->rdev->mc.vram_start;
329	else	337	else
330	domain_start = bo->rdev->mc.gtt_start;	338	domain_start = bo->rdev->mc.gtt_start;
331	WARN_ON_ONCE(max_offset <	339	WARN_ON_ONCE(max_offset <
332	(radeon_bo_gpu_offset(bo) - domain_start));	340	(radeon_bo_gpu_offset(bo) - domain_start));
333	}	341	}
334		342
335	return 0;	343	return 0;
336	}	344	}
337	radeon_ttm_placement_from_domain(bo, domain);	345	radeon_ttm_placement_from_domain(bo, domain);
338	for (i = 0; i < bo->placement.num_placement; i++) {	346	for (i = 0; i < bo->placement.num_placement; i++) {
339	/* force to pin into visible video ram */	347	/* force to pin into visible video ram */
340	if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) &&	348	if ((bo->placements[i].flags & TTM_PL_FLAG_VRAM) &&
341	!(bo->flags & RADEON_GEM_NO_CPU_ACCESS) &&	349	!(bo->flags & RADEON_GEM_NO_CPU_ACCESS) &&
342	(!max_offset \|\| max_offset > bo->rdev->mc.visible_vram_size))	350	(!max_offset \|\| max_offset > bo->rdev->mc.visible_vram_size))
343	bo->placements[i].lpfn =	351	bo->placements[i].lpfn =
344	bo->rdev->mc.visible_vram_size >> PAGE_SHIFT;	352	bo->rdev->mc.visible_vram_size >> PAGE_SHIFT;
345	else	353	else
346	bo->placements[i].lpfn = max_offset >> PAGE_SHIFT;	354	bo->placements[i].lpfn = max_offset >> PAGE_SHIFT;
347		355
348	bo->placements[i].flags \|= TTM_PL_FLAG_NO_EVICT;	356	bo->placements[i].flags \|= TTM_PL_FLAG_NO_EVICT;
349	}	357	}
350		358
351	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);	359	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
352	if (likely(r == 0)) {	360	if (likely(r == 0)) {
353	bo->pin_count = 1;	361	bo->pin_count = 1;
354	if (gpu_addr != NULL)	362	if (gpu_addr != NULL)
355	*gpu_addr = radeon_bo_gpu_offset(bo);	363	*gpu_addr = radeon_bo_gpu_offset(bo);
356	if (domain == RADEON_GEM_DOMAIN_VRAM)	364	if (domain == RADEON_GEM_DOMAIN_VRAM)
357	bo->rdev->vram_pin_size += radeon_bo_size(bo);	365	bo->rdev->vram_pin_size += radeon_bo_size(bo);
358	else	366	else
359	bo->rdev->gart_pin_size += radeon_bo_size(bo);	367	bo->rdev->gart_pin_size += radeon_bo_size(bo);
360	} else {	368	} else {
361	dev_err(bo->rdev->dev, "%p pin failed\n", bo);	369	dev_err(bo->rdev->dev, "%p pin failed\n", bo);
362	}	370	}
363	return r;	371	return r;
364	}	372	}
365		373
366	int radeon_bo_pin(struct radeon_bo bo, u32 domain, u64 gpu_addr)	374	int radeon_bo_pin(struct radeon_bo bo, u32 domain, u64 gpu_addr)
367	{	375	{
368	return radeon_bo_pin_restricted(bo, domain, 0, gpu_addr);	376	return radeon_bo_pin_restricted(bo, domain, 0, gpu_addr);
369	}	377	}
370		378
371	int radeon_bo_unpin(struct radeon_bo *bo)	379	int radeon_bo_unpin(struct radeon_bo *bo)
372	{	380	{
373	int r, i;	381	int r, i;
374		382
375	if (!bo->pin_count) {	383	if (!bo->pin_count) {
376	dev_warn(bo->rdev->dev, "%p unpin not necessary\n", bo);	384	dev_warn(bo->rdev->dev, "%p unpin not necessary\n", bo);
377	return 0;	385	return 0;
378	}	386	}
379	bo->pin_count--;	387	bo->pin_count--;
380	if (bo->pin_count)	388	if (bo->pin_count)
381	return 0;	389	return 0;
382	for (i = 0; i < bo->placement.num_placement; i++) {	390	for (i = 0; i < bo->placement.num_placement; i++) {
383	bo->placements[i].lpfn = 0;	391	bo->placements[i].lpfn = 0;
384	bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;	392	bo->placements[i].flags &= ~TTM_PL_FLAG_NO_EVICT;
385	}	393	}
386	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);	394	r = ttm_bo_validate(&bo->tbo, &bo->placement, false, false);
387	if (likely(r == 0)) {	395	if (likely(r == 0)) {
388	if (bo->tbo.mem.mem_type == TTM_PL_VRAM)	396	if (bo->tbo.mem.mem_type == TTM_PL_VRAM)
389	bo->rdev->vram_pin_size -= radeon_bo_size(bo);	397	bo->rdev->vram_pin_size -= radeon_bo_size(bo);
390	else	398	else
391	bo->rdev->gart_pin_size -= radeon_bo_size(bo);	399	bo->rdev->gart_pin_size -= radeon_bo_size(bo);
392	} else {	400	} else {
393	dev_err(bo->rdev->dev, "%p validate failed for unpin\n", bo);	401	dev_err(bo->rdev->dev, "%p validate failed for unpin\n", bo);
394	}	402	}
395	return r;	403	return r;
396	}	404	}
397		405
398	int radeon_bo_init(struct radeon_device *rdev)	406	int radeon_bo_init(struct radeon_device *rdev)
399	{	407	{
400	/* Add an MTRR for the VRAM */	408	/* Add an MTRR for the VRAM */
401	DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",	409	DRM_INFO("Detected VRAM RAM=%lluM, BAR=%lluM\n",
402	rdev->mc.mc_vram_size >> 20,	410	rdev->mc.mc_vram_size >> 20,
403	(unsigned long long)rdev->mc.aper_size >> 20);	411	(unsigned long long)rdev->mc.aper_size >> 20);
404	DRM_INFO("RAM width %dbits %cDR\n",	412	DRM_INFO("RAM width %dbits %cDR\n",
405	rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S');	413	rdev->mc.vram_width, rdev->mc.vram_is_ddr ? 'D' : 'S');
406	return radeon_ttm_init(rdev);	414	return radeon_ttm_init(rdev);
407	}	415	}
408		416
409	void radeon_bo_fini(struct radeon_device *rdev)	417	void radeon_bo_fini(struct radeon_device *rdev)
410	{	418	{
411	// radeon_ttm_fini(rdev);	419	// radeon_ttm_fini(rdev);
412	// arch_phys_wc_del(rdev->mc.vram_mtrr);	420	// arch_phys_wc_del(rdev->mc.vram_mtrr);
413	}	421	}
414		422
415	/* Returns how many bytes TTM can move per IB.	423	/* Returns how many bytes TTM can move per IB.
416	*/	424	*/
417	static u64 radeon_bo_get_threshold_for_moves(struct radeon_device *rdev)	425	static u64 radeon_bo_get_threshold_for_moves(struct radeon_device *rdev)
418	{	426	{
419	u64 real_vram_size = rdev->mc.real_vram_size;	427	u64 real_vram_size = rdev->mc.real_vram_size;
420	u64 vram_usage = atomic64_read(&rdev->vram_usage);	428	u64 vram_usage = atomic64_read(&rdev->vram_usage);
421		429
422	/* This function is based on the current VRAM usage.	430	/* This function is based on the current VRAM usage.
423	*	431	*
424	* - If all of VRAM is free, allow relocating the number of bytes that	432	* - If all of VRAM is free, allow relocating the number of bytes that
425	* is equal to 1/4 of the size of VRAM for this IB.	433	* is equal to 1/4 of the size of VRAM for this IB.
426		434
427	* - If more than one half of VRAM is occupied, only allow relocating	435	* - If more than one half of VRAM is occupied, only allow relocating
428	* 1 MB of data for this IB.	436	* 1 MB of data for this IB.
429	*	437	*
430	* - From 0 to one half of used VRAM, the threshold decreases	438	* - From 0 to one half of used VRAM, the threshold decreases
431	* linearly.	439	* linearly.
432	* __________________	440	* __________________
433	* 1/4 of -\|\ \|	441	* 1/4 of -\|\ \|
434	* VRAM \| \ \|	442	* VRAM \| \ \|
435	* \| \ \|	443	* \| \ \|
436	* \| \ \|	444	* \| \ \|
437	* \| \ \|	445	* \| \ \|
438	* \| \ \|	446	* \| \ \|
439	* \| \ \|	447	* \| \ \|
440	* \| \________\|1 MB	448	* \| \________\|1 MB
441	* \|----------------\|	449	* \|----------------\|
442	* VRAM 0 % 100 %	450	* VRAM 0 % 100 %
443	* used used	451	* used used
444	*	452	*
445	* Note: It's a threshold, not a limit. The threshold must be crossed	453	* Note: It's a threshold, not a limit. The threshold must be crossed
446	* for buffer relocations to stop, so any buffer of an arbitrary size	454	* for buffer relocations to stop, so any buffer of an arbitrary size
447	* can be moved as long as the threshold isn't crossed before	455	* can be moved as long as the threshold isn't crossed before
448	* the relocation takes place. We don't want to disable buffer	456	* the relocation takes place. We don't want to disable buffer
449	* relocations completely.	457	* relocations completely.
450	*	458	*
451	* The idea is that buffers should be placed in VRAM at creation time	459	* The idea is that buffers should be placed in VRAM at creation time
452	* and TTM should only do a minimum number of relocations during	460	* and TTM should only do a minimum number of relocations during
453	* command submission. In practice, you need to submit at least	461	* command submission. In practice, you need to submit at least
454	* a dozen IBs to move all buffers to VRAM if they are in GTT.	462	* a dozen IBs to move all buffers to VRAM if they are in GTT.
455	*	463	*
456	* Also, things can get pretty crazy under memory pressure and actual	464	* Also, things can get pretty crazy under memory pressure and actual
457	* VRAM usage can change a lot, so playing safe even at 50% does	465	* VRAM usage can change a lot, so playing safe even at 50% does
458	* consistently increase performance.	466	* consistently increase performance.
459	*/	467	*/
460		468
461	u64 half_vram = real_vram_size >> 1;	469	u64 half_vram = real_vram_size >> 1;
462	u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage;	470	u64 half_free_vram = vram_usage >= half_vram ? 0 : half_vram - vram_usage;
463	u64 bytes_moved_threshold = half_free_vram >> 1;	471	u64 bytes_moved_threshold = half_free_vram >> 1;
464	return max(bytes_moved_threshold, 1024*1024ull);	472	return max(bytes_moved_threshold, 1024*1024ull);
465	}	473	}
466		474
467	int radeon_bo_list_validate(struct radeon_device *rdev,	475	int radeon_bo_list_validate(struct radeon_device *rdev,
468	struct ww_acquire_ctx *ticket,	476	struct ww_acquire_ctx *ticket,
469	struct list_head *head, int ring)	477	struct list_head *head, int ring)
470	{	478	{
471	struct radeon_bo_list *lobj;	479	struct radeon_bo_list *lobj;
472	struct list_head duplicates;	480	struct list_head duplicates;
473	int r;	481	int r;
474	u64 bytes_moved = 0, initial_bytes_moved;	482	u64 bytes_moved = 0, initial_bytes_moved;
475	u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev);	483	u64 bytes_moved_threshold = radeon_bo_get_threshold_for_moves(rdev);
476		484
477	INIT_LIST_HEAD(&duplicates);	485	INIT_LIST_HEAD(&duplicates);
478	r = ttm_eu_reserve_buffers(ticket, head, true, &duplicates);	486	r = ttm_eu_reserve_buffers(ticket, head, true, &duplicates);
479	if (unlikely(r != 0)) {	487	if (unlikely(r != 0)) {
480	return r;	488	return r;
481	}	489	}

Subversion Repositories Kolibri OS

(root)/drivers/video/drm/radeon/radeon_object.c – Rev 5346 → 6104