Subversion Repositories Kolibri OS

Rev

Rev 4363 | Rev 5368 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed

Rev Author Line No. Line
4363 Serge 1
/**************************************************************************
2
 *
3
 * Copyright © 2007 Red Hat Inc.
4
 * Copyright © 2007-2012 Intel Corporation
5
 * Copyright 2006 Tungsten Graphics, Inc., Bismarck, ND., USA
6
 * All Rights Reserved.
7
 *
8
 * Permission is hereby granted, free of charge, to any person obtaining a
9
 * copy of this software and associated documentation files (the
10
 * "Software"), to deal in the Software without restriction, including
11
 * without limitation the rights to use, copy, modify, merge, publish,
12
 * distribute, sub license, and/or sell copies of the Software, and to
13
 * permit persons to whom the Software is furnished to do so, subject to
14
 * the following conditions:
15
 *
16
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
19
 * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
20
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
21
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
22
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
23
 *
24
 * The above copyright notice and this permission notice (including the
25
 * next paragraph) shall be included in all copies or substantial portions
26
 * of the Software.
27
 *
28
 *
29
 **************************************************************************/
30
/*
31
 * Authors: Thomas Hellström 
32
 *          Keith Whitwell 
33
 *	    Eric Anholt 
34
 *	    Dave Airlie 
35
 */
36
 
37
#ifdef HAVE_CONFIG_H
38
#include "config.h"
39
#endif
40
 
41
#include 
42
#include 
43
#include 
44
#include 
45
#include 
46
#include 
47
#include 
48
#include 
49
//#include 
50
#include 
51
 
52
#include "errno.h"
53
#ifndef ETIME
54
#define ETIME ETIMEDOUT
55
#endif
56
#include "libdrm_lists.h"
57
#include "intel_bufmgr.h"
58
#include "intel_bufmgr_priv.h"
59
#include "intel_chipset.h"
60
#include "intel_aub.h"
61
#include "string.h"
62
 
63
#include "i915_drm.h"
64
 
65
#ifdef HAVE_VALGRIND
66
#include 
67
#include 
68
#define VG(x) x
69
#else
70
#define VG(x)
71
#endif
72
 
73
#define VG_CLEAR(s) VG(memset(&s, 0, sizeof(s)))
74
 
75
#if 0
76
#define DBG(...) do {					\
77
	if (bufmgr_gem->bufmgr.debug)			\
78
		fprintf(stderr, __VA_ARGS__);		\
79
} while (0)
80
#endif
81
 
82
//#define DBG(...) fprintf(stderr, __VA_ARGS__)
83
#define DBG(...)
84
 
85
#define ARRAY_SIZE(x) (sizeof(x) / sizeof((x)[0]))
86
 
87
typedef struct _drm_intel_bo_gem drm_intel_bo_gem;
88
 
89
struct drm_intel_gem_bo_bucket {
90
	drmMMListHead head;
91
	unsigned long size;
92
};
93
 
94
typedef struct _drm_intel_bufmgr_gem {
95
	drm_intel_bufmgr bufmgr;
96
 
97
	int fd;
98
 
99
	int max_relocs;
100
 
101
//	pthread_mutex_t lock;
102
 
103
	struct drm_i915_gem_exec_object *exec_objects;
104
	struct drm_i915_gem_exec_object2 *exec2_objects;
105
	drm_intel_bo **exec_bos;
106
	int exec_size;
107
	int exec_count;
108
 
109
	/** Array of lists of cached gem objects of power-of-two sizes */
110
	struct drm_intel_gem_bo_bucket cache_bucket[14 * 4];
111
	int num_buckets;
112
	time_t time;
113
 
114
	drmMMListHead named;
115
	drmMMListHead vma_cache;
116
	int vma_count, vma_open, vma_max;
117
 
118
	uint64_t gtt_size;
119
	int available_fences;
120
	int pci_device;
121
	int gen;
122
	unsigned int has_bsd : 1;
123
	unsigned int has_blt : 1;
124
	unsigned int has_relaxed_fencing : 1;
125
	unsigned int has_llc : 1;
126
	unsigned int has_wait_timeout : 1;
127
	unsigned int bo_reuse : 1;
128
	unsigned int no_exec : 1;
129
	unsigned int has_vebox : 1;
130
	bool fenced_relocs;
131
 
132
	char *aub_filename;
133
	FILE *aub_file;
134
	uint32_t aub_offset;
135
} drm_intel_bufmgr_gem;
136
 
137
#define DRM_INTEL_RELOC_FENCE (1<<0)
138
 
139
typedef struct _drm_intel_reloc_target_info {
140
	drm_intel_bo *bo;
141
	int flags;
142
} drm_intel_reloc_target;
143
 
144
struct _drm_intel_bo_gem {
145
	drm_intel_bo bo;
146
 
147
	atomic_t refcount;
148
	uint32_t gem_handle;
149
	const char *name;
150
 
151
	/**
152
	 * Kenel-assigned global name for this object
153
         *
154
         * List contains both flink named and prime fd'd objects
155
	 */
156
	unsigned int global_name;
157
	drmMMListHead name_list;
158
 
159
	/**
160
	 * Index of the buffer within the validation list while preparing a
161
	 * batchbuffer execution.
162
	 */
163
	int validate_index;
164
 
165
	/**
166
	 * Current tiling mode
167
	 */
168
	uint32_t tiling_mode;
169
	uint32_t swizzle_mode;
170
	unsigned long stride;
171
 
172
	time_t free_time;
173
 
174
	/** Array passed to the DRM containing relocation information. */
175
	struct drm_i915_gem_relocation_entry *relocs;
176
	/**
177
	 * Array of info structs corresponding to relocs[i].target_handle etc
178
	 */
179
	drm_intel_reloc_target *reloc_target_info;
180
	/** Number of entries in relocs */
181
	int reloc_count;
182
	/** Mapped address for the buffer, saved across map/unmap cycles */
183
	void *mem_virtual;
184
	/** GTT virtual address for the buffer, saved across map/unmap cycles */
185
	void *gtt_virtual;
186
	int map_count;
187
	drmMMListHead vma_list;
188
 
189
	/** BO cache list */
190
	drmMMListHead head;
191
 
192
	/**
193
	 * Boolean of whether this BO and its children have been included in
194
	 * the current drm_intel_bufmgr_check_aperture_space() total.
195
	 */
196
	bool included_in_check_aperture;
197
 
198
	/**
199
	 * Boolean of whether this buffer has been used as a relocation
200
	 * target and had its size accounted for, and thus can't have any
201
	 * further relocations added to it.
202
	 */
203
	bool used_as_reloc_target;
204
 
205
	/**
206
	 * Boolean of whether we have encountered an error whilst building the relocation tree.
207
	 */
208
	bool has_error;
209
 
210
	/**
211
	 * Boolean of whether this buffer can be re-used
212
	 */
213
	bool reusable;
214
 
215
	/**
5068 serge 216
	 * Boolean of whether the GPU is definitely not accessing the buffer.
217
	 *
218
	 * This is only valid when reusable, since non-reusable
219
	 * buffers are those that have been shared wth other
220
	 * processes, so we don't know their state.
221
	 */
222
	bool idle;
223
 
224
	/**
4363 Serge 225
	 * Size in bytes of this buffer and its relocation descendents.
226
	 *
227
	 * Used to avoid costly tree walking in
228
	 * drm_intel_bufmgr_check_aperture in the common case.
229
	 */
230
	int reloc_tree_size;
231
 
232
	/**
233
	 * Number of potential fence registers required by this buffer and its
234
	 * relocations.
235
	 */
236
	int reloc_tree_fences;
237
 
238
	/** Flags that we may need to do the SW_FINSIH ioctl on unmap. */
239
	bool mapped_cpu_write;
240
 
241
	uint32_t aub_offset;
242
 
243
	drm_intel_aub_annotation *aub_annotations;
244
	unsigned aub_annotation_count;
245
};
246
 
247
static unsigned int
248
drm_intel_gem_estimate_batch_space(drm_intel_bo ** bo_array, int count);
249
 
250
static unsigned int
251
drm_intel_gem_compute_batch_space(drm_intel_bo ** bo_array, int count);
252
 
253
static int
254
drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
255
			    uint32_t * swizzle_mode);
256
 
257
static int
258
drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo,
259
				     uint32_t tiling_mode,
260
				     uint32_t stride);
261
 
262
static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
263
						      time_t time);
264
 
265
static void drm_intel_gem_bo_unreference(drm_intel_bo *bo);
266
 
267
static void drm_intel_gem_bo_free(drm_intel_bo *bo);
268
 
269
static unsigned long
270
drm_intel_gem_bo_tile_size(drm_intel_bufmgr_gem *bufmgr_gem, unsigned long size,
271
			   uint32_t *tiling_mode)
272
{
273
	unsigned long min_size, max_size;
274
	unsigned long i;
275
 
276
	if (*tiling_mode == I915_TILING_NONE)
277
		return size;
278
 
279
	/* 965+ just need multiples of page size for tiling */
280
	if (bufmgr_gem->gen >= 4)
281
		return ROUND_UP_TO(size, 4096);
282
 
283
	/* Older chips need powers of two, of at least 512k or 1M */
284
	if (bufmgr_gem->gen == 3) {
285
		min_size = 1024*1024;
286
		max_size = 128*1024*1024;
287
	} else {
288
		min_size = 512*1024;
289
		max_size = 64*1024*1024;
290
	}
291
 
292
	if (size > max_size) {
293
		*tiling_mode = I915_TILING_NONE;
294
		return size;
295
	}
296
 
297
	/* Do we need to allocate every page for the fence? */
298
	if (bufmgr_gem->has_relaxed_fencing)
299
		return ROUND_UP_TO(size, 4096);
300
 
301
	for (i = min_size; i < size; i <<= 1)
302
		;
303
 
304
	return i;
305
}
306
 
307
/*
308
 * Round a given pitch up to the minimum required for X tiling on a
309
 * given chip.  We use 512 as the minimum to allow for a later tiling
310
 * change.
311
 */
312
static unsigned long
313
drm_intel_gem_bo_tile_pitch(drm_intel_bufmgr_gem *bufmgr_gem,
314
			    unsigned long pitch, uint32_t *tiling_mode)
315
{
316
	unsigned long tile_width;
317
	unsigned long i;
318
 
319
	/* If untiled, then just align it so that we can do rendering
320
	 * to it with the 3D engine.
321
	 */
322
	if (*tiling_mode == I915_TILING_NONE)
323
		return ALIGN(pitch, 64);
324
 
325
	if (*tiling_mode == I915_TILING_X
326
			|| (IS_915(bufmgr_gem->pci_device)
327
			    && *tiling_mode == I915_TILING_Y))
328
		tile_width = 512;
329
	else
330
		tile_width = 128;
331
 
332
	/* 965 is flexible */
333
	if (bufmgr_gem->gen >= 4)
334
		return ROUND_UP_TO(pitch, tile_width);
335
 
336
	/* The older hardware has a maximum pitch of 8192 with tiled
337
	 * surfaces, so fallback to untiled if it's too large.
338
	 */
339
	if (pitch > 8192) {
340
		*tiling_mode = I915_TILING_NONE;
341
		return ALIGN(pitch, 64);
342
	}
343
 
344
	/* Pre-965 needs power of two tile width */
345
	for (i = tile_width; i < pitch; i <<= 1)
346
		;
347
 
348
	return i;
349
}
350
 
351
static struct drm_intel_gem_bo_bucket *
352
drm_intel_gem_bo_bucket_for_size(drm_intel_bufmgr_gem *bufmgr_gem,
353
				 unsigned long size)
354
{
355
	int i;
356
 
357
	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
358
		struct drm_intel_gem_bo_bucket *bucket =
359
		    &bufmgr_gem->cache_bucket[i];
360
		if (bucket->size >= size) {
361
			return bucket;
362
		}
363
	}
364
 
365
	return NULL;
366
}
367
 
368
static void
369
drm_intel_gem_dump_validation_list(drm_intel_bufmgr_gem *bufmgr_gem)
370
{
371
	int i, j;
372
 
373
	for (i = 0; i < bufmgr_gem->exec_count; i++) {
374
		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
375
		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
376
 
377
		if (bo_gem->relocs == NULL) {
378
			DBG("%2d: %d (%s)\n", i, bo_gem->gem_handle,
379
			    bo_gem->name);
380
			continue;
381
		}
382
 
383
		for (j = 0; j < bo_gem->reloc_count; j++) {
384
			drm_intel_bo *target_bo = bo_gem->reloc_target_info[j].bo;
385
			drm_intel_bo_gem *target_gem =
386
			    (drm_intel_bo_gem *) target_bo;
387
 
388
			DBG("%2d: %d (%s)@0x%08llx -> "
389
			    "%d (%s)@0x%08lx + 0x%08x\n",
390
			    i,
391
			    bo_gem->gem_handle, bo_gem->name,
392
			    (unsigned long long)bo_gem->relocs[j].offset,
393
			    target_gem->gem_handle,
394
			    target_gem->name,
5068 serge 395
			    target_bo->offset64,
4363 Serge 396
			    bo_gem->relocs[j].delta);
397
		}
398
	}
399
}
400
 
401
static inline void
402
drm_intel_gem_bo_reference(drm_intel_bo *bo)
403
{
404
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
405
 
406
	atomic_inc(&bo_gem->refcount);
407
}
408
 
409
/**
410
 * Adds the given buffer to the list of buffers to be validated (moved into the
411
 * appropriate memory type) with the next batch submission.
412
 *
413
 * If a buffer is validated multiple times in a batch submission, it ends up
414
 * with the intersection of the memory type flags and the union of the
415
 * access flags.
416
 */
417
static void
418
drm_intel_add_validate_buffer(drm_intel_bo *bo)
419
{
420
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
421
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
422
	int index;
423
 
424
	if (bo_gem->validate_index != -1)
425
		return;
426
 
427
	/* Extend the array of validation entries as necessary. */
428
	if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
429
		int new_size = bufmgr_gem->exec_size * 2;
430
 
431
		if (new_size == 0)
432
			new_size = 5;
433
 
434
		bufmgr_gem->exec_objects =
435
		    realloc(bufmgr_gem->exec_objects,
436
			    sizeof(*bufmgr_gem->exec_objects) * new_size);
437
		bufmgr_gem->exec_bos =
438
		    realloc(bufmgr_gem->exec_bos,
439
			    sizeof(*bufmgr_gem->exec_bos) * new_size);
440
		bufmgr_gem->exec_size = new_size;
441
	}
442
 
443
	index = bufmgr_gem->exec_count;
444
	bo_gem->validate_index = index;
445
	/* Fill in array entry */
446
	bufmgr_gem->exec_objects[index].handle = bo_gem->gem_handle;
447
	bufmgr_gem->exec_objects[index].relocation_count = bo_gem->reloc_count;
448
	bufmgr_gem->exec_objects[index].relocs_ptr = (uintptr_t) bo_gem->relocs;
449
	bufmgr_gem->exec_objects[index].alignment = 0;
450
	bufmgr_gem->exec_objects[index].offset = 0;
451
	bufmgr_gem->exec_bos[index] = bo;
452
	bufmgr_gem->exec_count++;
453
}
454
 
455
static void
456
drm_intel_add_validate_buffer2(drm_intel_bo *bo, int need_fence)
457
{
458
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
459
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
460
	int index;
461
 
462
	if (bo_gem->validate_index != -1) {
463
		if (need_fence)
464
			bufmgr_gem->exec2_objects[bo_gem->validate_index].flags |=
465
				EXEC_OBJECT_NEEDS_FENCE;
466
		return;
467
	}
468
 
469
	/* Extend the array of validation entries as necessary. */
470
	if (bufmgr_gem->exec_count == bufmgr_gem->exec_size) {
471
		int new_size = bufmgr_gem->exec_size * 2;
472
 
473
		if (new_size == 0)
474
			new_size = 5;
475
 
476
		bufmgr_gem->exec2_objects =
477
			realloc(bufmgr_gem->exec2_objects,
478
				sizeof(*bufmgr_gem->exec2_objects) * new_size);
479
		bufmgr_gem->exec_bos =
480
			realloc(bufmgr_gem->exec_bos,
481
				sizeof(*bufmgr_gem->exec_bos) * new_size);
482
		bufmgr_gem->exec_size = new_size;
483
	}
484
 
485
	index = bufmgr_gem->exec_count;
486
	bo_gem->validate_index = index;
487
	/* Fill in array entry */
488
	bufmgr_gem->exec2_objects[index].handle = bo_gem->gem_handle;
489
	bufmgr_gem->exec2_objects[index].relocation_count = bo_gem->reloc_count;
490
	bufmgr_gem->exec2_objects[index].relocs_ptr = (uintptr_t)bo_gem->relocs;
491
	bufmgr_gem->exec2_objects[index].alignment = 0;
492
	bufmgr_gem->exec2_objects[index].offset = 0;
493
	bufmgr_gem->exec_bos[index] = bo;
494
	bufmgr_gem->exec2_objects[index].flags = 0;
495
	bufmgr_gem->exec2_objects[index].rsvd1 = 0;
496
	bufmgr_gem->exec2_objects[index].rsvd2 = 0;
497
	if (need_fence) {
498
		bufmgr_gem->exec2_objects[index].flags |=
499
			EXEC_OBJECT_NEEDS_FENCE;
500
	}
501
	bufmgr_gem->exec_count++;
502
}
503
 
504
#define RELOC_BUF_SIZE(x) ((I915_RELOC_HEADER + x * I915_RELOC0_STRIDE) * \
505
	sizeof(uint32_t))
506
 
507
static void
508
drm_intel_bo_gem_set_in_aperture_size(drm_intel_bufmgr_gem *bufmgr_gem,
509
				      drm_intel_bo_gem *bo_gem)
510
{
511
	int size;
512
 
513
	assert(!bo_gem->used_as_reloc_target);
514
 
515
	/* The older chipsets are far-less flexible in terms of tiling,
516
	 * and require tiled buffer to be size aligned in the aperture.
517
	 * This means that in the worst possible case we will need a hole
518
	 * twice as large as the object in order for it to fit into the
519
	 * aperture. Optimal packing is for wimps.
520
	 */
521
	size = bo_gem->bo.size;
522
	if (bufmgr_gem->gen < 4 && bo_gem->tiling_mode != I915_TILING_NONE) {
523
		int min_size;
524
 
525
		if (bufmgr_gem->has_relaxed_fencing) {
526
			if (bufmgr_gem->gen == 3)
527
				min_size = 1024*1024;
528
			else
529
				min_size = 512*1024;
530
 
531
			while (min_size < size)
532
				min_size *= 2;
533
		} else
534
			min_size = size;
535
 
536
		/* Account for worst-case alignment. */
537
		size = 2 * min_size;
538
	}
539
 
540
	bo_gem->reloc_tree_size = size;
541
}
542
 
543
static int
544
drm_intel_setup_reloc_list(drm_intel_bo *bo)
545
{
546
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
547
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
548
	unsigned int max_relocs = bufmgr_gem->max_relocs;
549
 
550
	if (bo->size / 4 < max_relocs)
551
		max_relocs = bo->size / 4;
552
 
553
	bo_gem->relocs = malloc(max_relocs *
554
				sizeof(struct drm_i915_gem_relocation_entry));
555
	bo_gem->reloc_target_info = malloc(max_relocs *
556
					   sizeof(drm_intel_reloc_target));
557
	if (bo_gem->relocs == NULL || bo_gem->reloc_target_info == NULL) {
558
		bo_gem->has_error = true;
559
 
560
		free (bo_gem->relocs);
561
		bo_gem->relocs = NULL;
562
 
563
		free (bo_gem->reloc_target_info);
564
		bo_gem->reloc_target_info = NULL;
565
 
566
		return 1;
567
	}
568
 
569
	return 0;
570
}
571
 
572
static int
573
drm_intel_gem_bo_busy(drm_intel_bo *bo)
574
{
575
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
576
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
577
	struct drm_i915_gem_busy busy;
578
	int ret;
579
 
5068 serge 580
	if (bo_gem->reusable && bo_gem->idle)
581
		return false;
582
 
4363 Serge 583
	VG_CLEAR(busy);
584
	busy.handle = bo_gem->gem_handle;
585
 
586
	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy);
5068 serge 587
	if (ret == 0) {
588
		bo_gem->idle = !busy.busy;
589
		return busy.busy;
590
	} else {
591
		return false;
592
	}
4363 Serge 593
	return (ret == 0 && busy.busy);
594
}
595
 
596
static int
597
drm_intel_gem_bo_madvise_internal(drm_intel_bufmgr_gem *bufmgr_gem,
598
				  drm_intel_bo_gem *bo_gem, int state)
599
{
600
	struct drm_i915_gem_madvise madv;
601
 
602
	VG_CLEAR(madv);
603
	madv.handle = bo_gem->gem_handle;
604
	madv.madv = state;
605
	madv.retained = 1;
606
//	drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_MADVISE, &madv);
607
 
608
	return madv.retained;
609
}
610
 
611
static int
612
drm_intel_gem_bo_madvise(drm_intel_bo *bo, int madv)
613
{
614
	return drm_intel_gem_bo_madvise_internal
615
		((drm_intel_bufmgr_gem *) bo->bufmgr,
616
		 (drm_intel_bo_gem *) bo,
617
		 madv);
618
}
619
 
620
/* drop the oldest entries that have been purged by the kernel */
621
static void
622
drm_intel_gem_bo_cache_purge_bucket(drm_intel_bufmgr_gem *bufmgr_gem,
623
				    struct drm_intel_gem_bo_bucket *bucket)
624
{
625
	while (!DRMLISTEMPTY(&bucket->head)) {
626
		drm_intel_bo_gem *bo_gem;
627
 
628
		bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
629
				      bucket->head.next, head);
630
		if (drm_intel_gem_bo_madvise_internal
631
		    (bufmgr_gem, bo_gem, I915_MADV_DONTNEED))
632
			break;
633
 
634
		DRMLISTDEL(&bo_gem->head);
635
		drm_intel_gem_bo_free(&bo_gem->bo);
636
	}
637
}
638
 
639
static drm_intel_bo *
640
drm_intel_gem_bo_alloc_internal(drm_intel_bufmgr *bufmgr,
641
				const char *name,
642
				unsigned long size,
643
				unsigned long flags,
644
				uint32_t tiling_mode,
645
				unsigned long stride)
646
{
647
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
648
	drm_intel_bo_gem *bo_gem;
649
	unsigned int page_size = 4096;
650
	int ret;
651
	struct drm_intel_gem_bo_bucket *bucket;
652
	bool alloc_from_cache;
653
	unsigned long bo_size;
654
	bool for_render = false;
655
 
656
	if (flags & BO_ALLOC_FOR_RENDER)
657
		for_render = true;
658
 
659
	/* Round the allocated size up to a power of two number of pages. */
660
	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, size);
661
 
662
	/* If we don't have caching at this size, don't actually round the
663
	 * allocation up.
664
	 */
665
	if (bucket == NULL) {
666
		bo_size = size;
667
		if (bo_size < page_size)
668
			bo_size = page_size;
669
	} else {
670
		bo_size = bucket->size;
671
	}
672
 
673
//	pthread_mutex_lock(&bufmgr_gem->lock);
674
	/* Get a buffer out of the cache if available */
675
retry:
676
	alloc_from_cache = false;
677
	if (bucket != NULL && !DRMLISTEMPTY(&bucket->head)) {
678
		if (for_render) {
679
			/* Allocate new render-target BOs from the tail (MRU)
680
			 * of the list, as it will likely be hot in the GPU
681
			 * cache and in the aperture for us.
682
			 */
683
			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
684
					      bucket->head.prev, head);
685
			DRMLISTDEL(&bo_gem->head);
686
			alloc_from_cache = true;
687
		} else {
688
			/* For non-render-target BOs (where we're probably
689
			 * going to map it first thing in order to fill it
690
			 * with data), check if the last BO in the cache is
691
			 * unbusy, and only reuse in that case. Otherwise,
692
			 * allocating a new buffer is probably faster than
693
			 * waiting for the GPU to finish.
694
			 */
695
			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
696
					      bucket->head.next, head);
697
			if (!drm_intel_gem_bo_busy(&bo_gem->bo)) {
698
				alloc_from_cache = true;
699
				DRMLISTDEL(&bo_gem->head);
700
			}
701
		}
702
 
703
		if (alloc_from_cache) {
704
			if (!drm_intel_gem_bo_madvise_internal
705
			    (bufmgr_gem, bo_gem, I915_MADV_WILLNEED)) {
706
				drm_intel_gem_bo_free(&bo_gem->bo);
707
				drm_intel_gem_bo_cache_purge_bucket(bufmgr_gem,
708
								    bucket);
709
				goto retry;
710
			}
711
 
712
			if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
713
								 tiling_mode,
714
								 stride)) {
715
				drm_intel_gem_bo_free(&bo_gem->bo);
716
				goto retry;
717
			}
718
		}
719
	}
720
//	pthread_mutex_unlock(&bufmgr_gem->lock);
721
 
722
	if (!alloc_from_cache) {
723
		struct drm_i915_gem_create create;
724
 
725
		bo_gem = calloc(1, sizeof(*bo_gem));
726
		if (!bo_gem)
727
			return NULL;
728
 
729
		bo_gem->bo.size = bo_size;
730
 
731
		VG_CLEAR(create);
732
		create.size = bo_size;
733
 
734
		ret = drmIoctl(bufmgr_gem->fd,
735
			       DRM_IOCTL_I915_GEM_CREATE,
736
			       &create);
737
		bo_gem->gem_handle = create.handle;
738
		bo_gem->bo.handle = bo_gem->gem_handle;
739
		if (ret != 0) {
740
			free(bo_gem);
741
			return NULL;
742
		}
743
		bo_gem->bo.bufmgr = bufmgr;
744
 
745
		bo_gem->tiling_mode = I915_TILING_NONE;
746
		bo_gem->swizzle_mode = I915_BIT_6_SWIZZLE_NONE;
747
		bo_gem->stride = 0;
748
 
749
		if (drm_intel_gem_bo_set_tiling_internal(&bo_gem->bo,
750
							 tiling_mode,
751
							 stride)) {
752
		    drm_intel_gem_bo_free(&bo_gem->bo);
753
		    return NULL;
754
		}
755
 
756
		DRMINITLISTHEAD(&bo_gem->name_list);
757
		DRMINITLISTHEAD(&bo_gem->vma_list);
758
	}
759
 
760
	bo_gem->name = name;
761
	atomic_set(&bo_gem->refcount, 1);
762
	bo_gem->validate_index = -1;
763
	bo_gem->reloc_tree_fences = 0;
764
	bo_gem->used_as_reloc_target = false;
765
	bo_gem->has_error = false;
766
	bo_gem->reusable = true;
767
	bo_gem->aub_annotations = NULL;
768
	bo_gem->aub_annotation_count = 0;
769
 
770
	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
771
 
772
	DBG("bo_create: buf %d (%s) %ldb\n",
773
	    bo_gem->gem_handle, bo_gem->name, size);
774
 
775
	return &bo_gem->bo;
776
}
777
 
778
static drm_intel_bo *
779
drm_intel_gem_bo_alloc_for_render(drm_intel_bufmgr *bufmgr,
780
				  const char *name,
781
				  unsigned long size,
782
				  unsigned int alignment)
783
{
784
	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size,
785
					       BO_ALLOC_FOR_RENDER,
786
					       I915_TILING_NONE, 0);
787
}
788
 
789
static drm_intel_bo *
790
drm_intel_gem_bo_alloc(drm_intel_bufmgr *bufmgr,
791
		       const char *name,
792
		       unsigned long size,
793
		       unsigned int alignment)
794
{
795
	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, 0,
796
					       I915_TILING_NONE, 0);
797
}
798
 
799
static drm_intel_bo *
800
drm_intel_gem_bo_alloc_tiled(drm_intel_bufmgr *bufmgr, const char *name,
801
			     int x, int y, int cpp, uint32_t *tiling_mode,
802
			     unsigned long *pitch, unsigned long flags)
803
{
804
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
805
	unsigned long size, stride;
806
	uint32_t tiling;
807
 
808
	do {
809
		unsigned long aligned_y, height_alignment;
810
 
811
		tiling = *tiling_mode;
812
 
813
		/* If we're tiled, our allocations are in 8 or 32-row blocks,
814
		 * so failure to align our height means that we won't allocate
815
		 * enough pages.
816
		 *
817
		 * If we're untiled, we still have to align to 2 rows high
818
		 * because the data port accesses 2x2 blocks even if the
819
		 * bottom row isn't to be rendered, so failure to align means
820
		 * we could walk off the end of the GTT and fault.  This is
821
		 * documented on 965, and may be the case on older chipsets
822
		 * too so we try to be careful.
823
		 */
824
		aligned_y = y;
825
		height_alignment = 2;
826
 
827
		if ((bufmgr_gem->gen == 2) && tiling != I915_TILING_NONE)
828
			height_alignment = 16;
829
		else if (tiling == I915_TILING_X
830
			|| (IS_915(bufmgr_gem->pci_device)
831
			    && tiling == I915_TILING_Y))
832
			height_alignment = 8;
833
		else if (tiling == I915_TILING_Y)
834
			height_alignment = 32;
835
		aligned_y = ALIGN(y, height_alignment);
836
 
837
		stride = x * cpp;
838
		stride = drm_intel_gem_bo_tile_pitch(bufmgr_gem, stride, tiling_mode);
839
		size = stride * aligned_y;
840
		size = drm_intel_gem_bo_tile_size(bufmgr_gem, size, tiling_mode);
841
	} while (*tiling_mode != tiling);
842
	*pitch = stride;
843
 
844
	if (tiling == I915_TILING_NONE)
845
		stride = 0;
846
 
847
	return drm_intel_gem_bo_alloc_internal(bufmgr, name, size, flags,
848
					       tiling, stride);
849
}
850
 
851
/**
852
 * Returns a drm_intel_bo wrapping the given buffer object handle.
853
 *
854
 * This can be used when one application needs to pass a buffer object
855
 * to another.
856
 */
857
drm_intel_bo *
858
drm_intel_bo_gem_create_from_name(drm_intel_bufmgr *bufmgr,
859
				  const char *name,
860
				  unsigned int handle)
861
{
862
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
863
	drm_intel_bo_gem *bo_gem;
864
	int ret;
865
	struct drm_gem_open open_arg;
866
	struct drm_i915_gem_get_tiling get_tiling;
867
	drmMMListHead *list;
868
 
869
	/* At the moment most applications only have a few named bo.
870
	 * For instance, in a DRI client only the render buffers passed
871
	 * between X and the client are named. And since X returns the
872
	 * alternating names for the front/back buffer a linear search
873
	 * provides a sufficiently fast match.
874
	 */
875
	for (list = bufmgr_gem->named.next;
876
	     list != &bufmgr_gem->named;
877
	     list = list->next) {
878
		bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list);
879
		if (bo_gem->global_name == handle) {
880
			drm_intel_gem_bo_reference(&bo_gem->bo);
881
			return &bo_gem->bo;
882
		}
883
	}
884
 
885
	VG_CLEAR(open_arg);
886
	open_arg.name = handle;
887
	ret = drmIoctl(bufmgr_gem->fd,
888
		       DRM_IOCTL_GEM_OPEN,
889
		       &open_arg);
890
	if (ret != 0) {
891
		DBG("Couldn't reference %s handle 0x%08x: %s\n",
892
		    name, handle, strerror(errno));
893
		return NULL;
894
	}
5068 serge 895
        /* Now see if someone has used a prime handle to get this
896
         * object from the kernel before by looking through the list
897
         * again for a matching gem_handle
898
         */
899
	for (list = bufmgr_gem->named.next;
900
	     list != &bufmgr_gem->named;
901
	     list = list->next) {
902
		bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list);
903
		if (bo_gem->gem_handle == open_arg.handle) {
904
			drm_intel_gem_bo_reference(&bo_gem->bo);
905
			return &bo_gem->bo;
906
		}
907
	}
908
 
909
	bo_gem = calloc(1, sizeof(*bo_gem));
910
	if (!bo_gem)
911
		return NULL;
912
 
4363 Serge 913
	bo_gem->bo.size = open_arg.size;
914
	bo_gem->bo.offset = 0;
5068 serge 915
	bo_gem->bo.offset64 = 0;
4363 Serge 916
	bo_gem->bo.virtual = NULL;
917
	bo_gem->bo.bufmgr = bufmgr;
918
	bo_gem->name = name;
919
	atomic_set(&bo_gem->refcount, 1);
920
	bo_gem->validate_index = -1;
921
	bo_gem->gem_handle = open_arg.handle;
922
	bo_gem->bo.handle = open_arg.handle;
923
	bo_gem->global_name = handle;
924
	bo_gem->reusable = false;
925
 
926
	VG_CLEAR(get_tiling);
927
	get_tiling.handle = bo_gem->gem_handle;
928
	ret = drmIoctl(bufmgr_gem->fd,
929
		       DRM_IOCTL_I915_GEM_GET_TILING,
930
		       &get_tiling);
931
	if (ret != 0) {
932
		drm_intel_gem_bo_unreference(&bo_gem->bo);
933
		return NULL;
934
	}
935
	bo_gem->tiling_mode = get_tiling.tiling_mode;
936
	bo_gem->swizzle_mode = get_tiling.swizzle_mode;
937
	/* XXX stride is unknown */
938
	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
939
 
940
	DRMINITLISTHEAD(&bo_gem->vma_list);
941
	DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
942
	DBG("bo_create_from_handle: %d (%s)\n", handle, bo_gem->name);
943
 
944
	return &bo_gem->bo;
945
}
946
 
947
static void
948
drm_intel_gem_bo_free(drm_intel_bo *bo)
949
{
950
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
951
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
952
	struct drm_gem_close close;
953
	int ret;
954
 
955
	DRMLISTDEL(&bo_gem->vma_list);
956
	if (bo_gem->mem_virtual) {
957
		VG(VALGRIND_FREELIKE_BLOCK(bo_gem->mem_virtual, 0));
958
		bufmgr_gem->vma_count--;
959
	}
960
	if (bo_gem->gtt_virtual) {
961
		bufmgr_gem->vma_count--;
962
	}
963
 
964
	/* Close this object */
965
	VG_CLEAR(close);
966
	close.handle = bo_gem->gem_handle;
967
	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_CLOSE, &close);
968
	if (ret != 0) {
969
		DBG("DRM_IOCTL_GEM_CLOSE %d failed (%s): %s\n",
970
		    bo_gem->gem_handle, bo_gem->name, strerror(errno));
971
	}
972
	free(bo_gem->aub_annotations);
973
	free(bo);
974
}
975
 
976
static void
977
drm_intel_gem_bo_mark_mmaps_incoherent(drm_intel_bo *bo)
978
{
979
#if HAVE_VALGRIND
980
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
981
 
982
	if (bo_gem->mem_virtual)
983
		VALGRIND_MAKE_MEM_NOACCESS(bo_gem->mem_virtual, bo->size);
984
 
985
	if (bo_gem->gtt_virtual)
986
		VALGRIND_MAKE_MEM_NOACCESS(bo_gem->gtt_virtual, bo->size);
987
#endif
988
}
989
 
990
/** Frees all cached buffers significantly older than @time. */
991
static void
992
drm_intel_gem_cleanup_bo_cache(drm_intel_bufmgr_gem *bufmgr_gem, time_t time)
993
{
994
	int i;
995
 
996
	if (bufmgr_gem->time == time)
997
		return;
998
 
999
	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
1000
		struct drm_intel_gem_bo_bucket *bucket =
1001
		    &bufmgr_gem->cache_bucket[i];
1002
 
1003
		while (!DRMLISTEMPTY(&bucket->head)) {
1004
			drm_intel_bo_gem *bo_gem;
1005
 
1006
			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
1007
					      bucket->head.next, head);
1008
			if (time - bo_gem->free_time <= 1)
1009
				break;
1010
 
1011
			DRMLISTDEL(&bo_gem->head);
1012
 
1013
			drm_intel_gem_bo_free(&bo_gem->bo);
1014
		}
1015
	}
1016
 
1017
	bufmgr_gem->time = time;
1018
}
1019
 
1020
static void drm_intel_gem_bo_purge_vma_cache(drm_intel_bufmgr_gem *bufmgr_gem)
1021
{
1022
	int limit;
1023
 
1024
	DBG("%s: cached=%d, open=%d, limit=%d\n", __FUNCTION__,
1025
	    bufmgr_gem->vma_count, bufmgr_gem->vma_open, bufmgr_gem->vma_max);
1026
 
1027
	if (bufmgr_gem->vma_max < 0)
1028
		return;
1029
 
1030
	/* We may need to evict a few entries in order to create new mmaps */
1031
	limit = bufmgr_gem->vma_max - 2*bufmgr_gem->vma_open;
1032
	if (limit < 0)
1033
		limit = 0;
1034
 
1035
	while (bufmgr_gem->vma_count > limit) {
1036
		drm_intel_bo_gem *bo_gem;
1037
 
1038
		bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
1039
				      bufmgr_gem->vma_cache.next,
1040
				      vma_list);
1041
		assert(bo_gem->map_count == 0);
1042
		DRMLISTDELINIT(&bo_gem->vma_list);
1043
 
1044
		if (bo_gem->mem_virtual) {
1045
//			munmap(bo_gem->mem_virtual, bo_gem->bo.size);
1046
			bo_gem->mem_virtual = NULL;
1047
			bufmgr_gem->vma_count--;
1048
		}
1049
		if (bo_gem->gtt_virtual) {
1050
//			munmap(bo_gem->gtt_virtual, bo_gem->bo.size);
1051
			bo_gem->gtt_virtual = NULL;
1052
			bufmgr_gem->vma_count--;
1053
		}
1054
	}
1055
}
1056
 
1057
static void drm_intel_gem_bo_close_vma(drm_intel_bufmgr_gem *bufmgr_gem,
1058
				       drm_intel_bo_gem *bo_gem)
1059
{
1060
	bufmgr_gem->vma_open--;
1061
	DRMLISTADDTAIL(&bo_gem->vma_list, &bufmgr_gem->vma_cache);
1062
	if (bo_gem->mem_virtual)
1063
		bufmgr_gem->vma_count++;
1064
	if (bo_gem->gtt_virtual)
1065
		bufmgr_gem->vma_count++;
1066
	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
1067
}
1068
 
1069
static void drm_intel_gem_bo_open_vma(drm_intel_bufmgr_gem *bufmgr_gem,
1070
				      drm_intel_bo_gem *bo_gem)
1071
{
1072
	bufmgr_gem->vma_open++;
1073
	DRMLISTDEL(&bo_gem->vma_list);
1074
	if (bo_gem->mem_virtual)
1075
		bufmgr_gem->vma_count--;
1076
	if (bo_gem->gtt_virtual)
1077
		bufmgr_gem->vma_count--;
1078
	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
1079
}
1080
 
1081
static void
1082
drm_intel_gem_bo_unreference_final(drm_intel_bo *bo, time_t time)
1083
{
1084
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1085
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1086
	struct drm_intel_gem_bo_bucket *bucket;
1087
	int i;
1088
 
1089
	/* Unreference all the target buffers */
1090
	for (i = 0; i < bo_gem->reloc_count; i++) {
1091
		if (bo_gem->reloc_target_info[i].bo != bo) {
1092
			drm_intel_gem_bo_unreference_locked_timed(bo_gem->
1093
								  reloc_target_info[i].bo,
1094
								  time);
1095
		}
1096
	}
1097
	bo_gem->reloc_count = 0;
1098
	bo_gem->used_as_reloc_target = false;
1099
 
1100
	DBG("bo_unreference final: %d (%s)\n",
1101
	    bo_gem->gem_handle, bo_gem->name);
1102
 
1103
	/* release memory associated with this object */
1104
	if (bo_gem->reloc_target_info) {
1105
		free(bo_gem->reloc_target_info);
1106
		bo_gem->reloc_target_info = NULL;
1107
	}
1108
	if (bo_gem->relocs) {
1109
		free(bo_gem->relocs);
1110
		bo_gem->relocs = NULL;
1111
	}
1112
 
1113
	/* Clear any left-over mappings */
1114
	if (bo_gem->map_count) {
1115
		DBG("bo freed with non-zero map-count %d\n", bo_gem->map_count);
1116
		bo_gem->map_count = 0;
1117
		drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1118
		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1119
	}
1120
 
1121
	DRMLISTDEL(&bo_gem->name_list);
1122
 
1123
	bucket = drm_intel_gem_bo_bucket_for_size(bufmgr_gem, bo->size);
1124
	/* Put the buffer into our internal cache for reuse if we can. */
1125
	if (bufmgr_gem->bo_reuse && bo_gem->reusable && bucket != NULL &&
1126
	    drm_intel_gem_bo_madvise_internal(bufmgr_gem, bo_gem,
1127
					      I915_MADV_DONTNEED)) {
1128
		bo_gem->free_time = time;
1129
 
1130
		bo_gem->name = NULL;
1131
		bo_gem->validate_index = -1;
1132
 
1133
		DRMLISTADDTAIL(&bo_gem->head, &bucket->head);
1134
	} else {
1135
		drm_intel_gem_bo_free(bo);
1136
	}
1137
}
1138
 
1139
static void drm_intel_gem_bo_unreference_locked_timed(drm_intel_bo *bo,
1140
						      time_t time)
1141
{
1142
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1143
 
1144
	assert(atomic_read(&bo_gem->refcount) > 0);
1145
	if (atomic_dec_and_test(&bo_gem->refcount))
1146
		drm_intel_gem_bo_unreference_final(bo, time);
1147
}
1148
 
1149
static void drm_intel_gem_bo_unreference(drm_intel_bo *bo)
1150
{
1151
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1152
 
1153
	assert(atomic_read(&bo_gem->refcount) > 0);
1154
	if (atomic_dec_and_test(&bo_gem->refcount)) {
1155
		drm_intel_bufmgr_gem *bufmgr_gem =
1156
		    (drm_intel_bufmgr_gem *) bo->bufmgr;
1157
//		struct timespec time;
1158
 
1159
//		clock_gettime(CLOCK_MONOTONIC, &time);
1160
 
1161
//		pthread_mutex_lock(&bufmgr_gem->lock);
1162
		drm_intel_gem_bo_unreference_final(bo, 0);
1163
		drm_intel_gem_cleanup_bo_cache(bufmgr_gem, 0);
1164
//		pthread_mutex_unlock(&bufmgr_gem->lock);
1165
	}
1166
}
1167
 
1168
static int drm_intel_gem_bo_map(drm_intel_bo *bo, int write_enable)
1169
{
1170
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1171
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1172
	struct drm_i915_gem_set_domain set_domain;
1173
	int ret;
1174
 
1175
//	pthread_mutex_lock(&bufmgr_gem->lock);
1176
 
1177
	if (bo_gem->map_count++ == 0)
1178
		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
1179
 
1180
	if (!bo_gem->mem_virtual) {
1181
		struct drm_i915_gem_mmap mmap_arg;
1182
 
1183
		DBG("bo_map: %d (%s), map_count=%d\n",
1184
		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
1185
 
1186
		VG_CLEAR(mmap_arg);
1187
		mmap_arg.handle = bo_gem->gem_handle;
1188
		mmap_arg.offset = 0;
1189
		mmap_arg.size = bo->size;
1190
		ret = drmIoctl(bufmgr_gem->fd,
1191
			       DRM_IOCTL_I915_GEM_MMAP,
1192
			       &mmap_arg);
1193
		if (ret != 0) {
1194
			ret = -errno;
1195
			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1196
			    __FILE__, __LINE__, bo_gem->gem_handle,
1197
			    bo_gem->name, strerror(errno));
1198
			if (--bo_gem->map_count == 0)
1199
				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1200
//			pthread_mutex_unlock(&bufmgr_gem->lock);
1201
			return ret;
1202
		}
1203
		VG(VALGRIND_MALLOCLIKE_BLOCK(mmap_arg.addr_ptr, mmap_arg.size, 0, 1));
1204
		bo_gem->mem_virtual = (void *)(uintptr_t) mmap_arg.addr_ptr;
1205
	}
1206
	DBG("bo_map: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
1207
	    bo_gem->mem_virtual);
1208
	bo->virtual = bo_gem->mem_virtual;
1209
 
1210
	VG_CLEAR(set_domain);
1211
	set_domain.handle = bo_gem->gem_handle;
1212
	set_domain.read_domains = I915_GEM_DOMAIN_CPU;
1213
	if (write_enable)
1214
		set_domain.write_domain = I915_GEM_DOMAIN_CPU;
1215
	else
1216
		set_domain.write_domain = 0;
1217
	ret = drmIoctl(bufmgr_gem->fd,
1218
		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
1219
		       &set_domain);
1220
	if (ret != 0) {
1221
		DBG("%s:%d: Error setting to CPU domain %d: %s\n",
1222
		    __FILE__, __LINE__, bo_gem->gem_handle,
1223
		    strerror(errno));
1224
	}
1225
 
1226
	if (write_enable)
1227
		bo_gem->mapped_cpu_write = true;
1228
 
1229
	drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1230
	VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->mem_virtual, bo->size));
1231
//	pthread_mutex_unlock(&bufmgr_gem->lock);
1232
 
1233
	return 0;
1234
}
1235
 
1236
static int
1237
map_gtt(drm_intel_bo *bo)
1238
{
1239
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1240
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1241
	int ret;
1242
 
1243
	if (bo_gem->map_count++ == 0)
1244
		drm_intel_gem_bo_open_vma(bufmgr_gem, bo_gem);
1245
 
1246
	/* Get a mapping of the buffer if we haven't before. */
1247
	if (bo_gem->gtt_virtual == NULL) {
1248
		struct drm_i915_gem_mmap_gtt mmap_arg;
1249
 
1250
		DBG("bo_map_gtt: mmap %d (%s), map_count=%d\n",
1251
		    bo_gem->gem_handle, bo_gem->name, bo_gem->map_count);
1252
 
1253
		VG_CLEAR(mmap_arg);
1254
		mmap_arg.handle = bo_gem->gem_handle;
1255
		mmap_arg.offset = 0;
1256
 
1257
		/* Get the fake offset back... */
1258
		ret = drmIoctl(bufmgr_gem->fd,
1259
			       DRM_IOCTL_I915_GEM_MMAP_GTT,
1260
			       &mmap_arg);
1261
		if (ret != 0) {
1262
			ret = -errno;
1263
			DBG("%s:%d: Error preparing buffer map %d (%s): %s .\n",
1264
			    __FILE__, __LINE__,
1265
			    bo_gem->gem_handle, bo_gem->name,
1266
			    strerror(errno));
1267
			if (--bo_gem->map_count == 0)
1268
				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1269
			return ret;
1270
		}
1271
 
1272
		/* and mmap it */
1273
		bo_gem->gtt_virtual = mmap_arg.offset;
1274
		if (bo_gem->gtt_virtual == 0) {
1275
			bo_gem->gtt_virtual = NULL;
1276
			ret = -errno;
1277
			DBG("%s:%d: Error mapping buffer %d (%s): %s .\n",
1278
			    __FILE__, __LINE__,
1279
			    bo_gem->gem_handle, bo_gem->name,
1280
			    strerror(errno));
1281
			if (--bo_gem->map_count == 0)
1282
				drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1283
			return ret;
1284
		}
1285
	}
1286
 
1287
	bo->virtual = bo_gem->gtt_virtual;
1288
 
1289
	DBG("bo_map_gtt: %d (%s) -> %p\n", bo_gem->gem_handle, bo_gem->name,
1290
	    bo_gem->gtt_virtual);
1291
 
1292
	return 0;
1293
}
1294
 
1295
int drm_intel_gem_bo_map_gtt(drm_intel_bo *bo)
1296
{
1297
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1298
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1299
	struct drm_i915_gem_set_domain set_domain;
1300
	int ret;
1301
 
1302
//	pthread_mutex_lock(&bufmgr_gem->lock);
1303
 
1304
	ret = map_gtt(bo);
1305
	if (ret) {
1306
//		pthread_mutex_unlock(&bufmgr_gem->lock);
1307
		return ret;
1308
	}
1309
 
1310
	/* Now move it to the GTT domain so that the GPU and CPU
1311
	 * caches are flushed and the GPU isn't actively using the
1312
	 * buffer.
1313
	 *
1314
	 * The pagefault handler does this domain change for us when
1315
	 * it has unbound the BO from the GTT, but it's up to us to
1316
	 * tell it when we're about to use things if we had done
1317
	 * rendering and it still happens to be bound to the GTT.
1318
	 */
1319
	VG_CLEAR(set_domain);
1320
	set_domain.handle = bo_gem->gem_handle;
1321
	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1322
	set_domain.write_domain = I915_GEM_DOMAIN_GTT;
1323
	ret = drmIoctl(bufmgr_gem->fd,
1324
		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
1325
		       &set_domain);
1326
	if (ret != 0) {
1327
		DBG("%s:%d: Error setting domain %d: %s\n",
1328
		    __FILE__, __LINE__, bo_gem->gem_handle,
1329
		    strerror(errno));
1330
	}
1331
 
1332
	drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1333
	VG(VALGRIND_MAKE_MEM_DEFINED(bo_gem->gtt_virtual, bo->size));
1334
//	pthread_mutex_unlock(&bufmgr_gem->lock);
1335
 
1336
	return 0;
1337
}
1338
 
1339
/**
1340
 * Performs a mapping of the buffer object like the normal GTT
1341
 * mapping, but avoids waiting for the GPU to be done reading from or
1342
 * rendering to the buffer.
1343
 *
1344
 * This is used in the implementation of GL_ARB_map_buffer_range: The
1345
 * user asks to create a buffer, then does a mapping, fills some
1346
 * space, runs a drawing command, then asks to map it again without
1347
 * synchronizing because it guarantees that it won't write over the
1348
 * data that the GPU is busy using (or, more specifically, that if it
1349
 * does write over the data, it acknowledges that rendering is
1350
 * undefined).
1351
 */
1352
 
1353
int drm_intel_gem_bo_map_unsynchronized(drm_intel_bo *bo)
1354
{
1355
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
5068 serge 1356
#ifdef HAVE_VALGRIND
1357
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1358
#endif
4363 Serge 1359
	int ret;
1360
 
1361
	/* If the CPU cache isn't coherent with the GTT, then use a
1362
	 * regular synchronized mapping.  The problem is that we don't
1363
	 * track where the buffer was last used on the CPU side in
1364
	 * terms of drm_intel_bo_map vs drm_intel_gem_bo_map_gtt, so
1365
	 * we would potentially corrupt the buffer even when the user
1366
	 * does reasonable things.
1367
	 */
1368
	if (!bufmgr_gem->has_llc)
1369
		return drm_intel_gem_bo_map_gtt(bo);
1370
 
1371
//	pthread_mutex_lock(&bufmgr_gem->lock);
1372
	ret = map_gtt(bo);
1373
//	pthread_mutex_unlock(&bufmgr_gem->lock);
1374
 
1375
	return ret;
1376
}
1377
 
1378
static int drm_intel_gem_bo_unmap(drm_intel_bo *bo)
1379
{
1380
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1381
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1382
	int ret = 0;
1383
 
1384
	if (bo == NULL)
1385
		return 0;
1386
 
1387
//	pthread_mutex_lock(&bufmgr_gem->lock);
1388
 
1389
	if (bo_gem->map_count <= 0) {
1390
		DBG("attempted to unmap an unmapped bo\n");
1391
//		pthread_mutex_unlock(&bufmgr_gem->lock);
1392
		/* Preserve the old behaviour of just treating this as a
1393
		 * no-op rather than reporting the error.
1394
		 */
1395
		return 0;
1396
	}
1397
 
1398
	if (bo_gem->mapped_cpu_write) {
1399
		struct drm_i915_gem_sw_finish sw_finish;
1400
 
1401
		/* Cause a flush to happen if the buffer's pinned for
1402
		 * scanout, so the results show up in a timely manner.
1403
		 * Unlike GTT set domains, this only does work if the
1404
		 * buffer should be scanout-related.
1405
		 */
1406
 
1407
		bo_gem->mapped_cpu_write = false;
1408
	}
1409
 
1410
	/* We need to unmap after every innovation as we cannot track
1411
	 * an open vma for every bo as that will exhaasut the system
1412
	 * limits and cause later failures.
1413
	 */
1414
	if (--bo_gem->map_count == 0) {
1415
		drm_intel_gem_bo_close_vma(bufmgr_gem, bo_gem);
1416
		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1417
		bo->virtual = NULL;
1418
	}
1419
//	pthread_mutex_unlock(&bufmgr_gem->lock);
1420
 
1421
	return ret;
1422
}
1423
 
1424
int drm_intel_gem_bo_unmap_gtt(drm_intel_bo *bo)
1425
{
1426
	return drm_intel_gem_bo_unmap(bo);
1427
}
1428
 
1429
static int
1430
drm_intel_gem_bo_subdata(drm_intel_bo *bo, unsigned long offset,
1431
			 unsigned long size, const void *data)
1432
{
1433
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1434
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1435
	struct drm_i915_gem_pwrite pwrite;
1436
	int ret;
1437
 
1438
 	VG_CLEAR(pwrite);
1439
	pwrite.handle = bo_gem->gem_handle;
1440
	pwrite.offset = offset;
1441
	pwrite.size = size;
1442
	pwrite.data_ptr = (uint64_t) (uintptr_t) data;
1443
	ret = drmIoctl(bufmgr_gem->fd,
1444
		       DRM_IOCTL_I915_GEM_PWRITE,
1445
		       &pwrite);
1446
	if (ret != 0) {
1447
		ret = -errno;
1448
		DBG("%s:%d: Error writing data to buffer %d: (%d %d) %s .\n",
1449
		    __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1450
		    (int)size, strerror(errno));
1451
	}
1452
 
1453
	return ret;
1454
}
1455
 
1456
#if 0
1457
static int
1458
drm_intel_gem_get_pipe_from_crtc_id(drm_intel_bufmgr *bufmgr, int crtc_id)
1459
{
1460
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
1461
	struct drm_i915_get_pipe_from_crtc_id get_pipe_from_crtc_id;
1462
	int ret;
1463
 
1464
	VG_CLEAR(get_pipe_from_crtc_id);
1465
	get_pipe_from_crtc_id.crtc_id = crtc_id;
1466
	ret = drmIoctl(bufmgr_gem->fd,
1467
		       DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID,
1468
		       &get_pipe_from_crtc_id);
1469
	if (ret != 0) {
1470
		/* We return -1 here to signal that we don't
1471
		 * know which pipe is associated with this crtc.
1472
		 * This lets the caller know that this information
1473
		 * isn't available; using the wrong pipe for
1474
		 * vblank waiting can cause the chipset to lock up
1475
		 */
1476
		return -1;
1477
	}
1478
 
1479
	return get_pipe_from_crtc_id.pipe;
1480
}
1481
 
1482
static int
1483
drm_intel_gem_bo_get_subdata(drm_intel_bo *bo, unsigned long offset,
1484
			     unsigned long size, void *data)
1485
{
1486
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1487
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1488
	struct drm_i915_gem_pread pread;
1489
	int ret;
1490
 
1491
	VG_CLEAR(pread);
1492
	pread.handle = bo_gem->gem_handle;
1493
	pread.offset = offset;
1494
	pread.size = size;
1495
	pread.data_ptr = (uint64_t) (uintptr_t) data;
1496
	ret = drmIoctl(bufmgr_gem->fd,
1497
		       DRM_IOCTL_I915_GEM_PREAD,
1498
		       &pread);
1499
	if (ret != 0) {
1500
		ret = -errno;
1501
		DBG("%s:%d: Error reading data from buffer %d: (%d %d) %s .\n",
1502
		    __FILE__, __LINE__, bo_gem->gem_handle, (int)offset,
1503
		    (int)size, strerror(errno));
1504
	}
1505
 
1506
	return ret;
1507
}
1508
 
1509
#endif
1510
 
1511
/** Waits for all GPU rendering with the object to have completed. */
1512
static void
1513
drm_intel_gem_bo_wait_rendering(drm_intel_bo *bo)
1514
{
1515
	drm_intel_gem_bo_start_gtt_access(bo, 1);
1516
}
1517
 
1518
/**
1519
 * Waits on a BO for the given amount of time.
1520
 *
1521
 * @bo: buffer object to wait for
1522
 * @timeout_ns: amount of time to wait in nanoseconds.
1523
 *   If value is less than 0, an infinite wait will occur.
1524
 *
1525
 * Returns 0 if the wait was successful ie. the last batch referencing the
1526
 * object has completed within the allotted time. Otherwise some negative return
1527
 * value describes the error. Of particular interest is -ETIME when the wait has
1528
 * failed to yield the desired result.
1529
 *
1530
 * Similar to drm_intel_gem_bo_wait_rendering except a timeout parameter allows
1531
 * the operation to give up after a certain amount of time. Another subtle
1532
 * difference is the internal locking semantics are different (this variant does
1533
 * not hold the lock for the duration of the wait). This makes the wait subject
1534
 * to a larger userspace race window.
1535
 *
1536
 * The implementation shall wait until the object is no longer actively
1537
 * referenced within a batch buffer at the time of the call. The wait will
1538
 * not guarantee that the buffer is re-issued via another thread, or an flinked
1539
 * handle. Userspace must make sure this race does not occur if such precision
1540
 * is important.
1541
 */
1542
int drm_intel_gem_bo_wait(drm_intel_bo *bo, int64_t timeout_ns)
1543
{
1544
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1545
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1546
	struct drm_i915_gem_wait wait;
1547
	int ret;
1548
 
1549
	if (!bufmgr_gem->has_wait_timeout) {
1550
		DBG("%s:%d: Timed wait is not supported. Falling back to "
1551
		    "infinite wait\n", __FILE__, __LINE__);
1552
		if (timeout_ns) {
1553
			drm_intel_gem_bo_wait_rendering(bo);
1554
			return 0;
1555
		} else {
1556
			return drm_intel_gem_bo_busy(bo) ? -1 : 0;
1557
		}
1558
	}
1559
 
1560
	wait.bo_handle = bo_gem->gem_handle;
1561
	wait.timeout_ns = timeout_ns;
1562
	wait.flags = 0;
1563
	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_WAIT, &wait);
1564
	if (ret == -1)
1565
		return -errno;
1566
 
1567
	return ret;
1568
}
1569
 
1570
/**
1571
 * Sets the object to the GTT read and possibly write domain, used by the X
1572
 * 2D driver in the absence of kernel support to do drm_intel_gem_bo_map_gtt().
1573
 *
1574
 * In combination with drm_intel_gem_bo_pin() and manual fence management, we
1575
 * can do tiled pixmaps this way.
1576
 */
1577
void
1578
drm_intel_gem_bo_start_gtt_access(drm_intel_bo *bo, int write_enable)
1579
{
1580
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1581
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1582
	struct drm_i915_gem_set_domain set_domain;
1583
	int ret;
1584
 
1585
	VG_CLEAR(set_domain);
1586
	set_domain.handle = bo_gem->gem_handle;
1587
	set_domain.read_domains = I915_GEM_DOMAIN_GTT;
1588
	set_domain.write_domain = write_enable ? I915_GEM_DOMAIN_GTT : 0;
1589
	ret = drmIoctl(bufmgr_gem->fd,
1590
		       DRM_IOCTL_I915_GEM_SET_DOMAIN,
1591
		       &set_domain);
1592
	if (ret != 0) {
1593
		DBG("%s:%d: Error setting memory domains %d (%08x %08x): %s .\n",
1594
		    __FILE__, __LINE__, bo_gem->gem_handle,
1595
		    set_domain.read_domains, set_domain.write_domain,
1596
		    strerror(errno));
1597
	}
1598
}
1599
 
1600
static void
1601
drm_intel_bufmgr_gem_destroy(drm_intel_bufmgr *bufmgr)
1602
{
1603
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
1604
	int i;
1605
 
1606
	free(bufmgr_gem->exec2_objects);
1607
	free(bufmgr_gem->exec_objects);
1608
	free(bufmgr_gem->exec_bos);
1609
	free(bufmgr_gem->aub_filename);
1610
 
1611
//	pthread_mutex_destroy(&bufmgr_gem->lock);
1612
 
1613
	/* Free any cached buffer objects we were going to reuse */
1614
	for (i = 0; i < bufmgr_gem->num_buckets; i++) {
1615
		struct drm_intel_gem_bo_bucket *bucket =
1616
		    &bufmgr_gem->cache_bucket[i];
1617
		drm_intel_bo_gem *bo_gem;
1618
 
1619
		while (!DRMLISTEMPTY(&bucket->head)) {
1620
			bo_gem = DRMLISTENTRY(drm_intel_bo_gem,
1621
					      bucket->head.next, head);
1622
			DRMLISTDEL(&bo_gem->head);
1623
 
1624
			drm_intel_gem_bo_free(&bo_gem->bo);
1625
		}
1626
	}
1627
 
1628
	free(bufmgr);
1629
}
1630
 
1631
/**
1632
 * Adds the target buffer to the validation list and adds the relocation
1633
 * to the reloc_buffer's relocation list.
1634
 *
1635
 * The relocation entry at the given offset must already contain the
1636
 * precomputed relocation value, because the kernel will optimize out
1637
 * the relocation entry write when the buffer hasn't moved from the
1638
 * last known offset in target_bo.
1639
 */
1640
static int
1641
do_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
1642
		 drm_intel_bo *target_bo, uint32_t target_offset,
1643
		 uint32_t read_domains, uint32_t write_domain,
1644
		 bool need_fence)
1645
{
1646
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1647
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1648
	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
1649
	bool fenced_command;
1650
 
1651
	if (bo_gem->has_error)
1652
		return -ENOMEM;
1653
 
1654
	if (target_bo_gem->has_error) {
1655
		bo_gem->has_error = true;
1656
		return -ENOMEM;
1657
	}
1658
 
1659
	/* We never use HW fences for rendering on 965+ */
1660
	if (bufmgr_gem->gen >= 4)
1661
		need_fence = false;
1662
 
1663
	fenced_command = need_fence;
1664
	if (target_bo_gem->tiling_mode == I915_TILING_NONE)
1665
		need_fence = false;
1666
 
1667
	/* Create a new relocation list if needed */
1668
	if (bo_gem->relocs == NULL && drm_intel_setup_reloc_list(bo))
1669
		return -ENOMEM;
1670
 
1671
	/* Check overflow */
1672
	assert(bo_gem->reloc_count < bufmgr_gem->max_relocs);
1673
 
1674
	/* Check args */
1675
	assert(offset <= bo->size - 4);
1676
	assert((write_domain & (write_domain - 1)) == 0);
1677
 
1678
	/* Make sure that we're not adding a reloc to something whose size has
1679
	 * already been accounted for.
1680
	 */
1681
	assert(!bo_gem->used_as_reloc_target);
1682
	if (target_bo_gem != bo_gem) {
1683
		target_bo_gem->used_as_reloc_target = true;
1684
		bo_gem->reloc_tree_size += target_bo_gem->reloc_tree_size;
1685
	}
1686
	/* An object needing a fence is a tiled buffer, so it won't have
1687
	 * relocs to other buffers.
1688
	 */
1689
	if (need_fence)
1690
		target_bo_gem->reloc_tree_fences = 1;
1691
	bo_gem->reloc_tree_fences += target_bo_gem->reloc_tree_fences;
1692
 
1693
	bo_gem->relocs[bo_gem->reloc_count].offset = offset;
1694
	bo_gem->relocs[bo_gem->reloc_count].delta = target_offset;
1695
	bo_gem->relocs[bo_gem->reloc_count].target_handle =
1696
	    target_bo_gem->gem_handle;
1697
	bo_gem->relocs[bo_gem->reloc_count].read_domains = read_domains;
1698
	bo_gem->relocs[bo_gem->reloc_count].write_domain = write_domain;
5068 serge 1699
	bo_gem->relocs[bo_gem->reloc_count].presumed_offset = target_bo->offset64;
4363 Serge 1700
 
1701
	bo_gem->reloc_target_info[bo_gem->reloc_count].bo = target_bo;
1702
	if (target_bo != bo)
1703
		drm_intel_gem_bo_reference(target_bo);
1704
	if (fenced_command)
1705
		bo_gem->reloc_target_info[bo_gem->reloc_count].flags =
1706
			DRM_INTEL_RELOC_FENCE;
1707
	else
1708
		bo_gem->reloc_target_info[bo_gem->reloc_count].flags = 0;
1709
 
1710
	bo_gem->reloc_count++;
1711
 
1712
	return 0;
1713
}
1714
 
1715
static int
1716
drm_intel_gem_bo_emit_reloc(drm_intel_bo *bo, uint32_t offset,
1717
			    drm_intel_bo *target_bo, uint32_t target_offset,
1718
			    uint32_t read_domains, uint32_t write_domain)
1719
{
1720
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
1721
 
1722
	return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
1723
				read_domains, write_domain,
1724
				!bufmgr_gem->fenced_relocs);
1725
}
1726
 
1727
static int
1728
drm_intel_gem_bo_emit_reloc_fence(drm_intel_bo *bo, uint32_t offset,
1729
				  drm_intel_bo *target_bo,
1730
				  uint32_t target_offset,
1731
				  uint32_t read_domains, uint32_t write_domain)
1732
{
1733
	return do_bo_emit_reloc(bo, offset, target_bo, target_offset,
1734
				read_domains, write_domain, true);
1735
}
1736
 
1737
int
1738
drm_intel_gem_bo_get_reloc_count(drm_intel_bo *bo)
1739
{
1740
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1741
 
1742
	return bo_gem->reloc_count;
1743
}
1744
 
1745
/**
1746
 * Removes existing relocation entries in the BO after "start".
1747
 *
1748
 * This allows a user to avoid a two-step process for state setup with
1749
 * counting up all the buffer objects and doing a
1750
 * drm_intel_bufmgr_check_aperture_space() before emitting any of the
1751
 * relocations for the state setup.  Instead, save the state of the
1752
 * batchbuffer including drm_intel_gem_get_reloc_count(), emit all the
1753
 * state, and then check if it still fits in the aperture.
1754
 *
1755
 * Any further drm_intel_bufmgr_check_aperture_space() queries
1756
 * involving this buffer in the tree are undefined after this call.
1757
 */
1758
void
1759
drm_intel_gem_bo_clear_relocs(drm_intel_bo *bo, int start)
1760
{
1761
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1762
	int i;
1763
//	struct timespec time;
1764
 
1765
//	clock_gettime(CLOCK_MONOTONIC, &time);
1766
 
1767
	assert(bo_gem->reloc_count >= start);
1768
	/* Unreference the cleared target buffers */
1769
	for (i = start; i < bo_gem->reloc_count; i++) {
1770
		drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) bo_gem->reloc_target_info[i].bo;
1771
		if (&target_bo_gem->bo != bo) {
1772
			bo_gem->reloc_tree_fences -= target_bo_gem->reloc_tree_fences;
1773
			drm_intel_gem_bo_unreference_locked_timed(&target_bo_gem->bo,
1774
								  0);
1775
		}
1776
	}
1777
	bo_gem->reloc_count = start;
1778
}
1779
 
1780
/**
1781
 * Walk the tree of relocations rooted at BO and accumulate the list of
1782
 * validations to be performed and update the relocation buffers with
1783
 * index values into the validation list.
1784
 */
1785
static void
1786
drm_intel_gem_bo_process_reloc(drm_intel_bo *bo)
1787
{
1788
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1789
	int i;
1790
 
1791
	if (bo_gem->relocs == NULL)
1792
		return;
1793
 
1794
	for (i = 0; i < bo_gem->reloc_count; i++) {
1795
		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
1796
 
1797
		if (target_bo == bo)
1798
			continue;
1799
 
1800
		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1801
 
1802
		/* Continue walking the tree depth-first. */
1803
		drm_intel_gem_bo_process_reloc(target_bo);
1804
 
1805
		/* Add the target to the validate list */
1806
		drm_intel_add_validate_buffer(target_bo);
1807
	}
1808
}
1809
 
1810
static void
1811
drm_intel_gem_bo_process_reloc2(drm_intel_bo *bo)
1812
{
1813
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
1814
	int i;
1815
 
1816
	if (bo_gem->relocs == NULL)
1817
		return;
1818
 
1819
	for (i = 0; i < bo_gem->reloc_count; i++) {
1820
		drm_intel_bo *target_bo = bo_gem->reloc_target_info[i].bo;
1821
		int need_fence;
1822
 
1823
		if (target_bo == bo)
1824
			continue;
1825
 
1826
		drm_intel_gem_bo_mark_mmaps_incoherent(bo);
1827
 
1828
		/* Continue walking the tree depth-first. */
1829
		drm_intel_gem_bo_process_reloc2(target_bo);
1830
 
1831
		need_fence = (bo_gem->reloc_target_info[i].flags &
1832
			      DRM_INTEL_RELOC_FENCE);
1833
 
1834
		/* Add the target to the validate list */
1835
		drm_intel_add_validate_buffer2(target_bo, need_fence);
1836
	}
1837
}
1838
 
1839
 
1840
static void
1841
drm_intel_update_buffer_offsets(drm_intel_bufmgr_gem *bufmgr_gem)
1842
{
1843
	int i;
1844
 
1845
	for (i = 0; i < bufmgr_gem->exec_count; i++) {
1846
		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
1847
		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1848
 
1849
		/* Update the buffer offset */
5068 serge 1850
		if (bufmgr_gem->exec_objects[i].offset != bo->offset64) {
4363 Serge 1851
			DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
5068 serge 1852
			    bo_gem->gem_handle, bo_gem->name, bo->offset64,
4363 Serge 1853
			    (unsigned long long)bufmgr_gem->exec_objects[i].
1854
			    offset);
5068 serge 1855
			bo->offset64 = bufmgr_gem->exec_objects[i].offset;
4363 Serge 1856
			bo->offset = bufmgr_gem->exec_objects[i].offset;
1857
		}
1858
	}
1859
}
1860
 
1861
static void
1862
drm_intel_update_buffer_offsets2 (drm_intel_bufmgr_gem *bufmgr_gem)
1863
{
1864
	int i;
1865
 
1866
	for (i = 0; i < bufmgr_gem->exec_count; i++) {
1867
		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
1868
		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
1869
 
1870
		/* Update the buffer offset */
5068 serge 1871
		if (bufmgr_gem->exec2_objects[i].offset != bo->offset64) {
4363 Serge 1872
			DBG("BO %d (%s) migrated: 0x%08lx -> 0x%08llx\n",
5068 serge 1873
			    bo_gem->gem_handle, bo_gem->name, bo->offset64,
4363 Serge 1874
			    (unsigned long long)bufmgr_gem->exec2_objects[i].offset);
5068 serge 1875
			bo->offset64 = bufmgr_gem->exec2_objects[i].offset;
4363 Serge 1876
			bo->offset = bufmgr_gem->exec2_objects[i].offset;
1877
		}
1878
	}
1879
}
1880
 
1881
static void
1882
aub_out(drm_intel_bufmgr_gem *bufmgr_gem, uint32_t data)
1883
{
1884
	fwrite(&data, 1, 4, bufmgr_gem->aub_file);
1885
}
1886
 
1887
static void
1888
aub_out_data(drm_intel_bufmgr_gem *bufmgr_gem, void *data, size_t size)
1889
{
1890
	fwrite(data, 1, size, bufmgr_gem->aub_file);
1891
}
1892
 
1893
static void
1894
aub_write_bo_data(drm_intel_bo *bo, uint32_t offset, uint32_t size)
1895
{
1896
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1897
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1898
	uint32_t *data;
1899
	unsigned int i;
1900
 
1901
	data = malloc(bo->size);
1902
	drm_intel_bo_get_subdata(bo, offset, size, data);
1903
 
1904
	/* Easy mode: write out bo with no relocations */
1905
	if (!bo_gem->reloc_count) {
1906
		aub_out_data(bufmgr_gem, data, size);
1907
		free(data);
1908
		return;
1909
	}
1910
 
1911
	/* Otherwise, handle the relocations while writing. */
1912
	for (i = 0; i < size / 4; i++) {
1913
		int r;
1914
		for (r = 0; r < bo_gem->reloc_count; r++) {
1915
			struct drm_i915_gem_relocation_entry *reloc;
1916
			drm_intel_reloc_target *info;
1917
 
1918
			reloc = &bo_gem->relocs[r];
1919
			info = &bo_gem->reloc_target_info[r];
1920
 
1921
			if (reloc->offset == offset + i * 4) {
1922
				drm_intel_bo_gem *target_gem;
1923
				uint32_t val;
1924
 
1925
				target_gem = (drm_intel_bo_gem *)info->bo;
1926
 
1927
				val = reloc->delta;
1928
				val += target_gem->aub_offset;
1929
 
1930
				aub_out(bufmgr_gem, val);
1931
				data[i] = val;
1932
				break;
1933
			}
1934
		}
1935
		if (r == bo_gem->reloc_count) {
1936
			/* no relocation, just the data */
1937
			aub_out(bufmgr_gem, data[i]);
1938
		}
1939
	}
1940
 
1941
	free(data);
1942
}
1943
 
1944
static void
1945
aub_bo_get_address(drm_intel_bo *bo)
1946
{
1947
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1948
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1949
 
1950
	/* Give the object a graphics address in the AUB file.  We
1951
	 * don't just use the GEM object address because we do AUB
1952
	 * dumping before execution -- we want to successfully log
1953
	 * when the hardware might hang, and we might even want to aub
1954
	 * capture for a driver trying to execute on a different
1955
	 * generation of hardware by disabling the actual kernel exec
1956
	 * call.
1957
	 */
1958
	bo_gem->aub_offset = bufmgr_gem->aub_offset;
1959
	bufmgr_gem->aub_offset += bo->size;
1960
	/* XXX: Handle aperture overflow. */
1961
	assert(bufmgr_gem->aub_offset < 256 * 1024 * 1024);
1962
}
1963
 
1964
static void
1965
aub_write_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype,
1966
		      uint32_t offset, uint32_t size)
1967
{
1968
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
1969
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
1970
 
1971
	aub_out(bufmgr_gem,
1972
		CMD_AUB_TRACE_HEADER_BLOCK |
1973
		((bufmgr_gem->gen >= 8 ? 6 : 5) - 2));
1974
	aub_out(bufmgr_gem,
1975
		AUB_TRACE_MEMTYPE_GTT | type | AUB_TRACE_OP_DATA_WRITE);
1976
	aub_out(bufmgr_gem, subtype);
1977
	aub_out(bufmgr_gem, bo_gem->aub_offset + offset);
1978
	aub_out(bufmgr_gem, size);
1979
	if (bufmgr_gem->gen >= 8)
1980
		aub_out(bufmgr_gem, 0);
1981
	aub_write_bo_data(bo, offset, size);
1982
}
1983
 
1984
/**
1985
 * Break up large objects into multiple writes.  Otherwise a 128kb VBO
1986
 * would overflow the 16 bits of size field in the packet header and
1987
 * everything goes badly after that.
1988
 */
1989
static void
1990
aub_write_large_trace_block(drm_intel_bo *bo, uint32_t type, uint32_t subtype,
1991
			    uint32_t offset, uint32_t size)
1992
{
1993
	uint32_t block_size;
1994
	uint32_t sub_offset;
1995
 
1996
	for (sub_offset = 0; sub_offset < size; sub_offset += block_size) {
1997
		block_size = size - sub_offset;
1998
 
1999
		if (block_size > 8 * 4096)
2000
			block_size = 8 * 4096;
2001
 
2002
		aub_write_trace_block(bo, type, subtype, offset + sub_offset,
2003
				      block_size);
2004
	}
2005
}
2006
 
2007
static void
2008
aub_write_bo(drm_intel_bo *bo)
2009
{
2010
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2011
	uint32_t offset = 0;
2012
	unsigned i;
2013
 
2014
	aub_bo_get_address(bo);
2015
 
2016
	/* Write out each annotated section separately. */
2017
	for (i = 0; i < bo_gem->aub_annotation_count; ++i) {
2018
		drm_intel_aub_annotation *annotation =
2019
			&bo_gem->aub_annotations[i];
2020
		uint32_t ending_offset = annotation->ending_offset;
2021
		if (ending_offset > bo->size)
2022
			ending_offset = bo->size;
2023
		if (ending_offset > offset) {
2024
			aub_write_large_trace_block(bo, annotation->type,
2025
						    annotation->subtype,
2026
						    offset,
2027
						    ending_offset - offset);
2028
			offset = ending_offset;
2029
		}
2030
	}
2031
 
2032
	/* Write out any remaining unannotated data */
2033
	if (offset < bo->size) {
2034
		aub_write_large_trace_block(bo, AUB_TRACE_TYPE_NOTYPE, 0,
2035
					    offset, bo->size - offset);
2036
	}
2037
}
2038
 
2039
/*
2040
 * Make a ringbuffer on fly and dump it
2041
 */
2042
static void
2043
aub_build_dump_ringbuffer(drm_intel_bufmgr_gem *bufmgr_gem,
2044
			  uint32_t batch_buffer, int ring_flag)
2045
{
2046
	uint32_t ringbuffer[4096];
2047
	int ring = AUB_TRACE_TYPE_RING_PRB0; /* The default ring */
2048
	int ring_count = 0;
2049
 
2050
	if (ring_flag == I915_EXEC_BSD)
2051
		ring = AUB_TRACE_TYPE_RING_PRB1;
2052
	else if (ring_flag == I915_EXEC_BLT)
2053
		ring = AUB_TRACE_TYPE_RING_PRB2;
2054
 
2055
	/* Make a ring buffer to execute our batchbuffer. */
2056
	memset(ringbuffer, 0, sizeof(ringbuffer));
2057
	if (bufmgr_gem->gen >= 8) {
2058
		ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START | (3 - 2);
2059
		ringbuffer[ring_count++] = batch_buffer;
2060
		ringbuffer[ring_count++] = 0;
2061
	} else {
2062
	ringbuffer[ring_count++] = AUB_MI_BATCH_BUFFER_START;
2063
	ringbuffer[ring_count++] = batch_buffer;
2064
	}
2065
 
2066
	/* Write out the ring.  This appears to trigger execution of
2067
	 * the ring in the simulator.
2068
	 */
2069
	aub_out(bufmgr_gem,
2070
		CMD_AUB_TRACE_HEADER_BLOCK |
2071
		((bufmgr_gem->gen >= 8 ? 6 : 5) - 2));
2072
	aub_out(bufmgr_gem,
2073
		AUB_TRACE_MEMTYPE_GTT | ring | AUB_TRACE_OP_COMMAND_WRITE);
2074
	aub_out(bufmgr_gem, 0); /* general/surface subtype */
2075
	aub_out(bufmgr_gem, bufmgr_gem->aub_offset);
2076
	aub_out(bufmgr_gem, ring_count * 4);
2077
	if (bufmgr_gem->gen >= 8)
2078
		aub_out(bufmgr_gem, 0);
2079
 
2080
	/* FIXME: Need some flush operations here? */
2081
	aub_out_data(bufmgr_gem, ringbuffer, ring_count * 4);
2082
 
2083
	/* Update offset pointer */
2084
	bufmgr_gem->aub_offset += 4096;
2085
}
2086
 
2087
void
2088
drm_intel_gem_bo_aub_dump_bmp(drm_intel_bo *bo,
2089
			      int x1, int y1, int width, int height,
2090
			      enum aub_dump_bmp_format format,
2091
			      int pitch, int offset)
2092
{
2093
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2094
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
2095
	uint32_t cpp;
2096
 
2097
	switch (format) {
2098
	case AUB_DUMP_BMP_FORMAT_8BIT:
2099
		cpp = 1;
2100
		break;
2101
	case AUB_DUMP_BMP_FORMAT_ARGB_4444:
2102
		cpp = 2;
2103
		break;
2104
	case AUB_DUMP_BMP_FORMAT_ARGB_0888:
2105
	case AUB_DUMP_BMP_FORMAT_ARGB_8888:
2106
		cpp = 4;
2107
		break;
2108
	default:
2109
		printf("Unknown AUB dump format %d\n", format);
2110
		return;
2111
	}
2112
 
2113
	if (!bufmgr_gem->aub_file)
2114
		return;
2115
 
2116
	aub_out(bufmgr_gem, CMD_AUB_DUMP_BMP | 4);
2117
	aub_out(bufmgr_gem, (y1 << 16) | x1);
2118
	aub_out(bufmgr_gem,
2119
		(format << 24) |
2120
		(cpp << 19) |
2121
		pitch / 4);
2122
	aub_out(bufmgr_gem, (height << 16) | width);
2123
	aub_out(bufmgr_gem, bo_gem->aub_offset + offset);
2124
	aub_out(bufmgr_gem,
2125
		((bo_gem->tiling_mode != I915_TILING_NONE) ? (1 << 2) : 0) |
2126
		((bo_gem->tiling_mode == I915_TILING_Y) ? (1 << 3) : 0));
2127
}
2128
 
2129
static void
2130
aub_exec(drm_intel_bo *bo, int ring_flag, int used)
2131
{
2132
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2133
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2134
	int i;
2135
	bool batch_buffer_needs_annotations;
2136
 
2137
	if (!bufmgr_gem->aub_file)
2138
		return;
2139
 
2140
	/* If batch buffer is not annotated, annotate it the best we
2141
	 * can.
2142
	 */
2143
	batch_buffer_needs_annotations = bo_gem->aub_annotation_count == 0;
2144
	if (batch_buffer_needs_annotations) {
2145
		drm_intel_aub_annotation annotations[2] = {
2146
			{ AUB_TRACE_TYPE_BATCH, 0, used },
2147
			{ AUB_TRACE_TYPE_NOTYPE, 0, bo->size }
2148
		};
2149
		drm_intel_bufmgr_gem_set_aub_annotations(bo, annotations, 2);
2150
	}
2151
 
2152
	/* Write out all buffers to AUB memory */
2153
	for (i = 0; i < bufmgr_gem->exec_count; i++) {
2154
		aub_write_bo(bufmgr_gem->exec_bos[i]);
2155
	}
2156
 
2157
	/* Remove any annotations we added */
2158
	if (batch_buffer_needs_annotations)
2159
		drm_intel_bufmgr_gem_set_aub_annotations(bo, NULL, 0);
2160
 
2161
	/* Dump ring buffer */
2162
	aub_build_dump_ringbuffer(bufmgr_gem, bo_gem->aub_offset, ring_flag);
2163
 
2164
	fflush(bufmgr_gem->aub_file);
2165
 
2166
	/*
2167
	 * One frame has been dumped. So reset the aub_offset for the next frame.
2168
	 *
2169
	 * FIXME: Can we do this?
2170
	 */
2171
	bufmgr_gem->aub_offset = 0x10000;
2172
}
2173
 
2174
 
2175
static int
2176
do_exec2(drm_intel_bo *bo, int used, drm_intel_context *ctx,
2177
	 drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
2178
	 unsigned int flags)
2179
{
2180
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bo->bufmgr;
2181
	struct drm_i915_gem_execbuffer2 execbuf;
2182
	int ret = 0;
2183
	int i;
2184
 
2185
	switch (flags & 0x7) {
2186
	default:
2187
		return -EINVAL;
2188
	case I915_EXEC_BLT:
2189
		if (!bufmgr_gem->has_blt)
2190
			return -EINVAL;
2191
		break;
2192
	case I915_EXEC_BSD:
2193
		if (!bufmgr_gem->has_bsd)
2194
			return -EINVAL;
2195
		break;
2196
	case I915_EXEC_VEBOX:
2197
		if (!bufmgr_gem->has_vebox)
2198
			return -EINVAL;
2199
		break;
2200
	case I915_EXEC_RENDER:
2201
	case I915_EXEC_DEFAULT:
2202
		break;
2203
	}
2204
 
2205
//	pthread_mutex_lock(&bufmgr_gem->lock);
2206
	/* Update indices and set up the validate list. */
2207
	drm_intel_gem_bo_process_reloc2(bo);
2208
 
2209
	/* Add the batch buffer to the validation list.  There are no relocations
2210
	 * pointing to it.
2211
	 */
2212
	drm_intel_add_validate_buffer2(bo, 0);
2213
 
2214
	VG_CLEAR(execbuf);
2215
	execbuf.buffers_ptr = (uintptr_t)bufmgr_gem->exec2_objects;
2216
	execbuf.buffer_count = bufmgr_gem->exec_count;
2217
	execbuf.batch_start_offset = 0;
2218
	execbuf.batch_len = used;
2219
	execbuf.cliprects_ptr = (uintptr_t)cliprects;
2220
	execbuf.num_cliprects = num_cliprects;
2221
	execbuf.DR1 = 0;
2222
	execbuf.DR4 = DR4;
2223
	execbuf.flags = flags;
2224
	if (ctx == NULL)
2225
		i915_execbuffer2_set_context_id(execbuf, 0);
2226
	else
2227
		i915_execbuffer2_set_context_id(execbuf, ctx->ctx_id);
2228
	execbuf.rsvd2 = 0;
2229
 
2230
	aub_exec(bo, flags, used);
2231
 
2232
	if (bufmgr_gem->no_exec)
2233
		goto skip_execution;
2234
 
2235
	ret = drmIoctl(bufmgr_gem->fd,
2236
		       DRM_IOCTL_I915_GEM_EXECBUFFER2,
2237
		       &execbuf);
2238
	if (ret != 0) {
2239
		ret = -errno;
2240
		if (ret == -ENOSPC) {
2241
			DBG("Execbuffer fails to pin. "
2242
			    "Estimate: %u. Actual: %u. Available: %u\n",
2243
			    drm_intel_gem_estimate_batch_space(bufmgr_gem->exec_bos,
2244
							       bufmgr_gem->exec_count),
2245
			    drm_intel_gem_compute_batch_space(bufmgr_gem->exec_bos,
2246
							      bufmgr_gem->exec_count),
2247
			    (unsigned int) bufmgr_gem->gtt_size);
2248
		}
2249
	}
2250
	drm_intel_update_buffer_offsets2(bufmgr_gem);
2251
 
2252
skip_execution:
2253
	if (bufmgr_gem->bufmgr.debug)
2254
		drm_intel_gem_dump_validation_list(bufmgr_gem);
2255
 
2256
	for (i = 0; i < bufmgr_gem->exec_count; i++) {
2257
		drm_intel_bo *bo = bufmgr_gem->exec_bos[i];
2258
		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *)bo;
2259
 
5068 serge 2260
		bo_gem->idle = false;
2261
 
4363 Serge 2262
		/* Disconnect the buffer from the validate list */
2263
		bo_gem->validate_index = -1;
2264
		bufmgr_gem->exec_bos[i] = NULL;
2265
	}
2266
	bufmgr_gem->exec_count = 0;
2267
//	pthread_mutex_unlock(&bufmgr_gem->lock);
2268
 
2269
	return ret;
2270
}
2271
 
2272
static int
2273
drm_intel_gem_bo_exec2(drm_intel_bo *bo, int used,
2274
		       drm_clip_rect_t *cliprects, int num_cliprects,
2275
		       int DR4)
2276
{
2277
	return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4,
2278
			I915_EXEC_RENDER);
2279
}
2280
 
2281
static int
2282
drm_intel_gem_bo_mrb_exec2(drm_intel_bo *bo, int used,
2283
			drm_clip_rect_t *cliprects, int num_cliprects, int DR4,
2284
			unsigned int flags)
2285
{
2286
	return do_exec2(bo, used, NULL, cliprects, num_cliprects, DR4,
2287
			flags);
2288
}
2289
 
2290
int
2291
drm_intel_gem_bo_context_exec(drm_intel_bo *bo, drm_intel_context *ctx,
2292
			      int used, unsigned int flags)
2293
{
2294
	return do_exec2(bo, used, ctx, NULL, 0, 0, flags);
2295
}
2296
 
2297
static int
2298
drm_intel_gem_bo_pin(drm_intel_bo *bo, uint32_t alignment)
2299
{
2300
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2301
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2302
	struct drm_i915_gem_pin pin;
2303
	int ret;
2304
 
2305
	VG_CLEAR(pin);
2306
	pin.handle = bo_gem->gem_handle;
2307
	pin.alignment = alignment;
2308
 
2309
	ret = drmIoctl(bufmgr_gem->fd,
2310
		       DRM_IOCTL_I915_GEM_PIN,
2311
		       &pin);
2312
	if (ret != 0)
2313
		return -errno;
2314
 
5068 serge 2315
	bo->offset64 = pin.offset;
4363 Serge 2316
	bo->offset = pin.offset;
2317
	return 0;
2318
}
2319
 
2320
static int
2321
drm_intel_gem_bo_unpin(drm_intel_bo *bo)
2322
{
2323
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2324
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2325
	struct drm_i915_gem_unpin unpin;
2326
	int ret;
2327
 
2328
	VG_CLEAR(unpin);
2329
	unpin.handle = bo_gem->gem_handle;
2330
 
2331
	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_UNPIN, &unpin);
2332
	if (ret != 0)
2333
		return -errno;
2334
 
2335
	return 0;
2336
}
2337
 
2338
static int
2339
drm_intel_gem_bo_set_tiling_internal(drm_intel_bo *bo,
2340
				     uint32_t tiling_mode,
2341
				     uint32_t stride)
2342
{
2343
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2344
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2345
	struct drm_i915_gem_set_tiling set_tiling;
2346
	int ret;
2347
 
2348
	if (bo_gem->global_name == 0 &&
2349
	    tiling_mode == bo_gem->tiling_mode &&
2350
	    stride == bo_gem->stride)
2351
		return 0;
2352
 
2353
	memset(&set_tiling, 0, sizeof(set_tiling));
2354
//	do {
2355
		/* set_tiling is slightly broken and overwrites the
2356
		 * input on the error path, so we have to open code
2357
		 * rmIoctl.
2358
		 */
2359
		set_tiling.handle = bo_gem->gem_handle;
2360
		set_tiling.tiling_mode = tiling_mode;
2361
		set_tiling.stride = stride;
2362
 
2363
		ret = drmIoctl(bufmgr_gem->fd,
2364
			    DRM_IOCTL_I915_GEM_SET_TILING,
2365
			    &set_tiling);
2366
//	} while (ret == -1 && (errno == EINTR || errno == EAGAIN));
2367
	if (ret == -1)
2368
		return -errno;
2369
 
2370
	bo_gem->tiling_mode = set_tiling.tiling_mode;
2371
	bo_gem->swizzle_mode = set_tiling.swizzle_mode;
2372
	bo_gem->stride = set_tiling.stride;
2373
	return 0;
2374
}
2375
 
2376
static int
2377
drm_intel_gem_bo_set_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
2378
			    uint32_t stride)
2379
{
2380
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2381
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2382
	int ret;
2383
 
2384
	/* Linear buffers have no stride. By ensuring that we only ever use
2385
	 * stride 0 with linear buffers, we simplify our code.
2386
	 */
2387
	if (*tiling_mode == I915_TILING_NONE)
2388
		stride = 0;
2389
 
2390
	ret = drm_intel_gem_bo_set_tiling_internal(bo, *tiling_mode, stride);
2391
	if (ret == 0)
2392
		drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
2393
 
2394
	*tiling_mode = bo_gem->tiling_mode;
2395
	return ret;
2396
}
2397
 
2398
static int
2399
drm_intel_gem_bo_get_tiling(drm_intel_bo *bo, uint32_t * tiling_mode,
2400
			    uint32_t * swizzle_mode)
2401
{
2402
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2403
 
2404
	*tiling_mode = bo_gem->tiling_mode;
2405
	*swizzle_mode = bo_gem->swizzle_mode;
2406
	return 0;
2407
}
2408
 
2409
#if 0
2410
drm_intel_bo *
2411
drm_intel_bo_gem_create_from_prime(drm_intel_bufmgr *bufmgr, int prime_fd, int size)
2412
{
2413
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
2414
	int ret;
2415
	uint32_t handle;
2416
	drm_intel_bo_gem *bo_gem;
2417
	struct drm_i915_gem_get_tiling get_tiling;
2418
	drmMMListHead *list;
2419
 
2420
	ret = drmPrimeFDToHandle(bufmgr_gem->fd, prime_fd, &handle);
2421
 
2422
	/*
2423
	 * See if the kernel has already returned this buffer to us. Just as
2424
	 * for named buffers, we must not create two bo's pointing at the same
2425
	 * kernel object
2426
	 */
2427
	for (list = bufmgr_gem->named.next;
2428
	     list != &bufmgr_gem->named;
2429
	     list = list->next) {
2430
		bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list);
2431
		if (bo_gem->gem_handle == handle) {
2432
			drm_intel_gem_bo_reference(&bo_gem->bo);
2433
			return &bo_gem->bo;
2434
		}
2435
	}
2436
 
2437
	if (ret) {
2438
	  fprintf(stderr,"ret is %d %d\n", ret, errno);
2439
		return NULL;
2440
	}
2441
 
2442
	bo_gem = calloc(1, sizeof(*bo_gem));
2443
	if (!bo_gem)
2444
		return NULL;
2445
 
2446
	/* Determine size of bo.  The fd-to-handle ioctl really should
2447
	 * return the size, but it doesn't.  If we have kernel 3.12 or
2448
	 * later, we can lseek on the prime fd to get the size.  Older
2449
	 * kernels will just fail, in which case we fall back to the
2450
	 * provided (estimated or guess size). */
2451
	ret = lseek(prime_fd, 0, SEEK_END);
2452
	if (ret != -1)
2453
		bo_gem->bo.size = ret;
2454
	else
2455
	bo_gem->bo.size = size;
2456
 
2457
	bo_gem->bo.handle = handle;
2458
	bo_gem->bo.bufmgr = bufmgr;
2459
 
2460
	bo_gem->gem_handle = handle;
2461
 
2462
	atomic_set(&bo_gem->refcount, 1);
2463
 
2464
	bo_gem->name = "prime";
2465
	bo_gem->validate_index = -1;
2466
	bo_gem->reloc_tree_fences = 0;
2467
	bo_gem->used_as_reloc_target = false;
2468
	bo_gem->has_error = false;
2469
	bo_gem->reusable = false;
2470
 
2471
	DRMINITLISTHEAD(&bo_gem->vma_list);
2472
	DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
2473
 
2474
	VG_CLEAR(get_tiling);
2475
	get_tiling.handle = bo_gem->gem_handle;
2476
	ret = drmIoctl(bufmgr_gem->fd,
2477
		       DRM_IOCTL_I915_GEM_GET_TILING,
2478
		       &get_tiling);
2479
	if (ret != 0) {
2480
		drm_intel_gem_bo_unreference(&bo_gem->bo);
2481
		return NULL;
2482
	}
2483
	bo_gem->tiling_mode = get_tiling.tiling_mode;
2484
	bo_gem->swizzle_mode = get_tiling.swizzle_mode;
2485
	/* XXX stride is unknown */
2486
	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
2487
 
2488
	return &bo_gem->bo;
2489
}
2490
 
2491
int
2492
drm_intel_bo_gem_export_to_prime(drm_intel_bo *bo, int *prime_fd)
2493
{
2494
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2495
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2496
 
2497
        if (DRMLISTEMPTY(&bo_gem->name_list))
2498
                DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
2499
 
2500
	if (drmPrimeHandleToFD(bufmgr_gem->fd, bo_gem->gem_handle,
2501
			       DRM_CLOEXEC, prime_fd) != 0)
2502
		return -errno;
2503
 
2504
	bo_gem->reusable = false;
2505
 
2506
	return 0;
2507
}
2508
#endif
2509
 
2510
static int
2511
drm_intel_gem_bo_flink(drm_intel_bo *bo, uint32_t * name)
2512
{
2513
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bo->bufmgr;
2514
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2515
	int ret;
2516
 
2517
	if (!bo_gem->global_name) {
2518
		struct drm_gem_flink flink;
2519
 
2520
		VG_CLEAR(flink);
2521
		flink.handle = bo_gem->gem_handle;
2522
 
2523
		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_GEM_FLINK, &flink);
2524
		if (ret != 0)
2525
			return -errno;
2526
 
2527
		bo_gem->global_name = flink.name;
2528
		bo_gem->reusable = false;
2529
 
5068 serge 2530
		if (DRMLISTEMPTY(&bo_gem->name_list))
2531
			DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
4363 Serge 2532
	}
2533
 
2534
	*name = bo_gem->global_name;
2535
	return 0;
2536
}
2537
 
2538
/**
2539
 * Enables unlimited caching of buffer objects for reuse.
2540
 *
2541
 * This is potentially very memory expensive, as the cache at each bucket
2542
 * size is only bounded by how many buffers of that size we've managed to have
2543
 * in flight at once.
2544
 */
2545
void
2546
drm_intel_bufmgr_gem_enable_reuse(drm_intel_bufmgr *bufmgr)
2547
{
2548
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
2549
 
2550
	bufmgr_gem->bo_reuse = true;
2551
}
2552
 
2553
/**
2554
 * Enable use of fenced reloc type.
2555
 *
2556
 * New code should enable this to avoid unnecessary fence register
2557
 * allocation.  If this option is not enabled, all relocs will have fence
2558
 * register allocated.
2559
 */
2560
void
2561
drm_intel_bufmgr_gem_enable_fenced_relocs(drm_intel_bufmgr *bufmgr)
2562
{
2563
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
2564
 
2565
	if (bufmgr_gem->bufmgr.bo_exec == drm_intel_gem_bo_exec2)
2566
		bufmgr_gem->fenced_relocs = true;
2567
}
2568
 
2569
/**
2570
 * Return the additional aperture space required by the tree of buffer objects
2571
 * rooted at bo.
2572
 */
2573
static int
2574
drm_intel_gem_bo_get_aperture_space(drm_intel_bo *bo)
2575
{
2576
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2577
	int i;
2578
	int total = 0;
2579
 
2580
	if (bo == NULL || bo_gem->included_in_check_aperture)
2581
		return 0;
2582
 
2583
	total += bo->size;
2584
	bo_gem->included_in_check_aperture = true;
2585
 
2586
	for (i = 0; i < bo_gem->reloc_count; i++)
2587
		total +=
2588
		    drm_intel_gem_bo_get_aperture_space(bo_gem->
2589
							reloc_target_info[i].bo);
2590
 
2591
	return total;
2592
}
2593
 
2594
/**
2595
 * Count the number of buffers in this list that need a fence reg
2596
 *
2597
 * If the count is greater than the number of available regs, we'll have
2598
 * to ask the caller to resubmit a batch with fewer tiled buffers.
2599
 *
2600
 * This function over-counts if the same buffer is used multiple times.
2601
 */
2602
static unsigned int
2603
drm_intel_gem_total_fences(drm_intel_bo ** bo_array, int count)
2604
{
2605
	int i;
2606
	unsigned int total = 0;
2607
 
2608
	for (i = 0; i < count; i++) {
2609
		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
2610
 
2611
		if (bo_gem == NULL)
2612
			continue;
2613
 
2614
		total += bo_gem->reloc_tree_fences;
2615
	}
2616
	return total;
2617
}
2618
 
2619
/**
2620
 * Clear the flag set by drm_intel_gem_bo_get_aperture_space() so we're ready
2621
 * for the next drm_intel_bufmgr_check_aperture_space() call.
2622
 */
2623
static void
2624
drm_intel_gem_bo_clear_aperture_space_flag(drm_intel_bo *bo)
2625
{
2626
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2627
	int i;
2628
 
2629
	if (bo == NULL || !bo_gem->included_in_check_aperture)
2630
		return;
2631
 
2632
	bo_gem->included_in_check_aperture = false;
2633
 
2634
	for (i = 0; i < bo_gem->reloc_count; i++)
2635
		drm_intel_gem_bo_clear_aperture_space_flag(bo_gem->
2636
							   reloc_target_info[i].bo);
2637
}
2638
 
2639
/**
2640
 * Return a conservative estimate for the amount of aperture required
2641
 * for a collection of buffers. This may double-count some buffers.
2642
 */
2643
static unsigned int
2644
drm_intel_gem_estimate_batch_space(drm_intel_bo **bo_array, int count)
2645
{
2646
	int i;
2647
	unsigned int total = 0;
2648
 
2649
	for (i = 0; i < count; i++) {
2650
		drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo_array[i];
2651
		if (bo_gem != NULL)
2652
			total += bo_gem->reloc_tree_size;
2653
	}
2654
	return total;
2655
}
2656
 
2657
/**
2658
 * Return the amount of aperture needed for a collection of buffers.
2659
 * This avoids double counting any buffers, at the cost of looking
2660
 * at every buffer in the set.
2661
 */
2662
static unsigned int
2663
drm_intel_gem_compute_batch_space(drm_intel_bo **bo_array, int count)
2664
{
2665
	int i;
2666
	unsigned int total = 0;
2667
 
2668
	for (i = 0; i < count; i++) {
2669
		total += drm_intel_gem_bo_get_aperture_space(bo_array[i]);
2670
		/* For the first buffer object in the array, we get an
2671
		 * accurate count back for its reloc_tree size (since nothing
2672
		 * had been flagged as being counted yet).  We can save that
2673
		 * value out as a more conservative reloc_tree_size that
2674
		 * avoids double-counting target buffers.  Since the first
2675
		 * buffer happens to usually be the batch buffer in our
2676
		 * callers, this can pull us back from doing the tree
2677
		 * walk on every new batch emit.
2678
		 */
2679
		if (i == 0) {
2680
			drm_intel_bo_gem *bo_gem =
2681
			    (drm_intel_bo_gem *) bo_array[i];
2682
			bo_gem->reloc_tree_size = total;
2683
		}
2684
	}
2685
 
2686
	for (i = 0; i < count; i++)
2687
		drm_intel_gem_bo_clear_aperture_space_flag(bo_array[i]);
2688
	return total;
2689
}
2690
 
2691
/**
2692
 * Return -1 if the batchbuffer should be flushed before attempting to
2693
 * emit rendering referencing the buffers pointed to by bo_array.
2694
 *
2695
 * This is required because if we try to emit a batchbuffer with relocations
2696
 * to a tree of buffers that won't simultaneously fit in the aperture,
2697
 * the rendering will return an error at a point where the software is not
2698
 * prepared to recover from it.
2699
 *
2700
 * However, we also want to emit the batchbuffer significantly before we reach
2701
 * the limit, as a series of batchbuffers each of which references buffers
2702
 * covering almost all of the aperture means that at each emit we end up
2703
 * waiting to evict a buffer from the last rendering, and we get synchronous
2704
 * performance.  By emitting smaller batchbuffers, we eat some CPU overhead to
2705
 * get better parallelism.
2706
 */
2707
static int
2708
drm_intel_gem_check_aperture_space(drm_intel_bo **bo_array, int count)
2709
{
2710
	drm_intel_bufmgr_gem *bufmgr_gem =
2711
	    (drm_intel_bufmgr_gem *) bo_array[0]->bufmgr;
2712
	unsigned int total = 0;
2713
	unsigned int threshold = bufmgr_gem->gtt_size * 3 / 4;
2714
	int total_fences;
2715
 
2716
	/* Check for fence reg constraints if necessary */
2717
	if (bufmgr_gem->available_fences) {
2718
		total_fences = drm_intel_gem_total_fences(bo_array, count);
2719
		if (total_fences > bufmgr_gem->available_fences)
2720
			return -ENOSPC;
2721
	}
2722
 
2723
	total = drm_intel_gem_estimate_batch_space(bo_array, count);
2724
 
2725
	if (total > threshold)
2726
		total = drm_intel_gem_compute_batch_space(bo_array, count);
2727
 
2728
	if (total > threshold) {
2729
		DBG("check_space: overflowed available aperture, "
2730
		    "%dkb vs %dkb\n",
2731
		    total / 1024, (int)bufmgr_gem->gtt_size / 1024);
2732
		return -ENOSPC;
2733
	} else {
2734
		DBG("drm_check_space: total %dkb vs bufgr %dkb\n", total / 1024,
2735
		    (int)bufmgr_gem->gtt_size / 1024);
2736
		return 0;
2737
	}
2738
}
2739
 
2740
/*
2741
 * Disable buffer reuse for objects which are shared with the kernel
2742
 * as scanout buffers
2743
 */
2744
static int
2745
drm_intel_gem_bo_disable_reuse(drm_intel_bo *bo)
2746
{
2747
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2748
 
2749
	bo_gem->reusable = false;
2750
	return 0;
2751
}
2752
 
2753
static int
2754
drm_intel_gem_bo_is_reusable(drm_intel_bo *bo)
2755
{
2756
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2757
 
2758
	return bo_gem->reusable;
2759
}
2760
 
2761
static int
2762
_drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
2763
{
2764
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
2765
	int i;
2766
 
2767
	for (i = 0; i < bo_gem->reloc_count; i++) {
2768
		if (bo_gem->reloc_target_info[i].bo == target_bo)
2769
			return 1;
2770
		if (bo == bo_gem->reloc_target_info[i].bo)
2771
			continue;
2772
		if (_drm_intel_gem_bo_references(bo_gem->reloc_target_info[i].bo,
2773
						target_bo))
2774
			return 1;
2775
	}
2776
 
2777
	return 0;
2778
}
2779
 
2780
/** Return true if target_bo is referenced by bo's relocation tree. */
2781
static int
2782
drm_intel_gem_bo_references(drm_intel_bo *bo, drm_intel_bo *target_bo)
2783
{
2784
	drm_intel_bo_gem *target_bo_gem = (drm_intel_bo_gem *) target_bo;
2785
 
2786
	if (bo == NULL || target_bo == NULL)
2787
		return 0;
2788
	if (target_bo_gem->used_as_reloc_target)
2789
		return _drm_intel_gem_bo_references(bo, target_bo);
2790
	return 0;
2791
}
2792
 
2793
static void
2794
add_bucket(drm_intel_bufmgr_gem *bufmgr_gem, int size)
2795
{
2796
	unsigned int i = bufmgr_gem->num_buckets;
2797
 
2798
	assert(i < ARRAY_SIZE(bufmgr_gem->cache_bucket));
2799
 
2800
	DRMINITLISTHEAD(&bufmgr_gem->cache_bucket[i].head);
2801
	bufmgr_gem->cache_bucket[i].size = size;
2802
	bufmgr_gem->num_buckets++;
2803
}
2804
 
2805
static void
2806
init_cache_buckets(drm_intel_bufmgr_gem *bufmgr_gem)
2807
{
2808
	unsigned long size, cache_max_size = 64 * 1024 * 1024;
2809
 
2810
	/* OK, so power of two buckets was too wasteful of memory.
2811
	 * Give 3 other sizes between each power of two, to hopefully
2812
	 * cover things accurately enough.  (The alternative is
2813
	 * probably to just go for exact matching of sizes, and assume
2814
	 * that for things like composited window resize the tiled
2815
	 * width/height alignment and rounding of sizes to pages will
2816
	 * get us useful cache hit rates anyway)
2817
	 */
2818
	add_bucket(bufmgr_gem, 4096);
2819
	add_bucket(bufmgr_gem, 4096 * 2);
2820
	add_bucket(bufmgr_gem, 4096 * 3);
2821
 
2822
	/* Initialize the linked lists for BO reuse cache. */
2823
	for (size = 4 * 4096; size <= cache_max_size; size *= 2) {
2824
		add_bucket(bufmgr_gem, size);
2825
 
2826
		add_bucket(bufmgr_gem, size + size * 1 / 4);
2827
		add_bucket(bufmgr_gem, size + size * 2 / 4);
2828
		add_bucket(bufmgr_gem, size + size * 3 / 4);
2829
	}
2830
}
2831
 
2832
void
2833
drm_intel_bufmgr_gem_set_vma_cache_size(drm_intel_bufmgr *bufmgr, int limit)
2834
{
2835
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
2836
 
2837
	bufmgr_gem->vma_max = limit;
2838
 
2839
	drm_intel_gem_bo_purge_vma_cache(bufmgr_gem);
2840
}
2841
 
2842
/**
2843
 * Get the PCI ID for the device.  This can be overridden by setting the
2844
 * INTEL_DEVID_OVERRIDE environment variable to the desired ID.
2845
 */
2846
static int
2847
get_pci_device_id(drm_intel_bufmgr_gem *bufmgr_gem)
2848
{
2849
	char *devid_override;
2850
	int devid;
2851
	int ret;
2852
	drm_i915_getparam_t gp;
2853
 
2854
	VG_CLEAR(devid);
2855
	VG_CLEAR(gp);
2856
	gp.param = I915_PARAM_CHIPSET_ID;
2857
	gp.value = &devid;
2858
	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
2859
	if (ret) {
2860
		fprintf(stderr, "get chip id failed: %d [%d]\n", ret, errno);
2861
		fprintf(stderr, "param: %d, val: %d\n", gp.param, *gp.value);
2862
	}
2863
	return devid;
2864
}
2865
 
2866
int
2867
drm_intel_bufmgr_gem_get_devid(drm_intel_bufmgr *bufmgr)
2868
{
2869
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
2870
 
2871
	return bufmgr_gem->pci_device;
2872
}
2873
 
2874
/**
2875
 * Sets up AUB dumping.
2876
 *
2877
 * This is a trace file format that can be used with the simulator.
2878
 * Packets are emitted in a format somewhat like GPU command packets.
2879
 * You can set up a GTT and upload your objects into the referenced
2880
 * space, then send off batchbuffers and get BMPs out the other end.
2881
 */
2882
void
2883
drm_intel_bufmgr_gem_set_aub_dump(drm_intel_bufmgr *bufmgr, int enable)
2884
{
2885
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
2886
	int entry = 0x200003;
2887
	int i;
2888
	int gtt_size = 0x10000;
2889
	const char *filename;
2890
 
2891
	if (!enable) {
2892
		if (bufmgr_gem->aub_file) {
2893
			fclose(bufmgr_gem->aub_file);
2894
			bufmgr_gem->aub_file = NULL;
2895
		}
2896
		return;
2897
	}
2898
 
2899
	bufmgr_gem->aub_file = fopen("intel.aub", "w+");
2900
	if (!bufmgr_gem->aub_file)
2901
		return;
2902
 
2903
	/* Start allocating objects from just after the GTT. */
2904
	bufmgr_gem->aub_offset = gtt_size;
2905
 
2906
	/* Start with a (required) version packet. */
2907
	aub_out(bufmgr_gem, CMD_AUB_HEADER | (13 - 2));
2908
	aub_out(bufmgr_gem,
2909
		(4 << AUB_HEADER_MAJOR_SHIFT) |
2910
		(0 << AUB_HEADER_MINOR_SHIFT));
2911
	for (i = 0; i < 8; i++) {
2912
		aub_out(bufmgr_gem, 0); /* app name */
2913
	}
2914
	aub_out(bufmgr_gem, 0); /* timestamp */
2915
	aub_out(bufmgr_gem, 0); /* timestamp */
2916
	aub_out(bufmgr_gem, 0); /* comment len */
2917
 
2918
	/* Set up the GTT. The max we can handle is 256M */
5068 serge 2919
	aub_out(bufmgr_gem, CMD_AUB_TRACE_HEADER_BLOCK | ((bufmgr_gem->gen >= 8 ? 6 : 5) - 2));
4363 Serge 2920
	aub_out(bufmgr_gem, AUB_TRACE_MEMTYPE_NONLOCAL | 0 | AUB_TRACE_OP_DATA_WRITE);
2921
	aub_out(bufmgr_gem, 0); /* subtype */
2922
	aub_out(bufmgr_gem, 0); /* offset */
2923
	aub_out(bufmgr_gem, gtt_size); /* size */
2924
	for (i = 0x000; i < gtt_size; i += 4, entry += 0x1000) {
2925
		aub_out(bufmgr_gem, entry);
2926
	}
2927
}
2928
 
2929
drm_intel_context *
2930
drm_intel_gem_context_create(drm_intel_bufmgr *bufmgr)
2931
{
2932
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
2933
	struct drm_i915_gem_context_create create;
2934
	drm_intel_context *context = NULL;
2935
	int ret;
2936
 
5068 serge 2937
	context = calloc(1, sizeof(*context));
2938
	if (!context)
2939
		return NULL;
2940
 
4363 Serge 2941
	VG_CLEAR(create);
2942
	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_CREATE, &create);
2943
	if (ret != 0) {
2944
		DBG("DRM_IOCTL_I915_GEM_CONTEXT_CREATE failed: %s\n",
2945
		    strerror(errno));
5068 serge 2946
		free(context);
4363 Serge 2947
		return NULL;
2948
	}
2949
 
2950
	context->ctx_id = create.ctx_id;
2951
	context->bufmgr = bufmgr;
2952
 
2953
	return context;
2954
}
2955
 
2956
void
2957
drm_intel_gem_context_destroy(drm_intel_context *ctx)
2958
{
2959
	drm_intel_bufmgr_gem *bufmgr_gem;
2960
	struct drm_i915_gem_context_destroy destroy;
2961
	int ret;
2962
 
2963
	if (ctx == NULL)
2964
		return;
2965
 
2966
	VG_CLEAR(destroy);
2967
 
2968
	bufmgr_gem = (drm_intel_bufmgr_gem *)ctx->bufmgr;
2969
	destroy.ctx_id = ctx->ctx_id;
2970
	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GEM_CONTEXT_DESTROY,
2971
		       &destroy);
2972
	if (ret != 0)
2973
		fprintf(stderr, "DRM_IOCTL_I915_GEM_CONTEXT_DESTROY failed: %s\n",
2974
			strerror(errno));
2975
 
2976
	free(ctx);
2977
}
2978
 
2979
int
2980
drm_intel_reg_read(drm_intel_bufmgr *bufmgr,
2981
		   uint32_t offset,
2982
		   uint64_t *result)
2983
{
2984
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *)bufmgr;
2985
	struct drm_i915_reg_read reg_read;
2986
	int ret;
2987
 
2988
	VG_CLEAR(reg_read);
2989
	reg_read.offset = offset;
2990
 
2991
	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_REG_READ, ®_read);
2992
 
2993
	*result = reg_read.val;
2994
	return ret;
2995
}
2996
 
2997
 
2998
/**
2999
 * Annotate the given bo for use in aub dumping.
3000
 *
3001
 * \param annotations is an array of drm_intel_aub_annotation objects
3002
 * describing the type of data in various sections of the bo.  Each
3003
 * element of the array specifies the type and subtype of a section of
3004
 * the bo, and the past-the-end offset of that section.  The elements
3005
 * of \c annotations must be sorted so that ending_offset is
3006
 * increasing.
3007
 *
3008
 * \param count is the number of elements in the \c annotations array.
3009
 * If \c count is zero, then \c annotations will not be dereferenced.
3010
 *
3011
 * Annotations are copied into a private data structure, so caller may
3012
 * re-use the memory pointed to by \c annotations after the call
3013
 * returns.
3014
 *
3015
 * Annotations are stored for the lifetime of the bo; to reset to the
3016
 * default state (no annotations), call this function with a \c count
3017
 * of zero.
3018
 */
3019
void
3020
drm_intel_bufmgr_gem_set_aub_annotations(drm_intel_bo *bo,
3021
					 drm_intel_aub_annotation *annotations,
3022
					 unsigned count)
3023
{
3024
	drm_intel_bo_gem *bo_gem = (drm_intel_bo_gem *) bo;
3025
	unsigned size = sizeof(*annotations) * count;
3026
	drm_intel_aub_annotation *new_annotations =
3027
		count > 0 ? realloc(bo_gem->aub_annotations, size) : NULL;
3028
	if (new_annotations == NULL) {
3029
		free(bo_gem->aub_annotations);
3030
		bo_gem->aub_annotations = NULL;
3031
		bo_gem->aub_annotation_count = 0;
3032
		return;
3033
	}
3034
	memcpy(new_annotations, annotations, size);
3035
	bo_gem->aub_annotations = new_annotations;
3036
	bo_gem->aub_annotation_count = count;
3037
}
3038
 
3039
/**
3040
 * Initializes the GEM buffer manager, which uses the kernel to allocate, map,
3041
 * and manage map buffer objections.
3042
 *
3043
 * \param fd File descriptor of the opened DRM device.
3044
 */
3045
drm_intel_bufmgr *
3046
drm_intel_bufmgr_gem_init(int fd, int batch_size)
3047
{
3048
	drm_intel_bufmgr_gem *bufmgr_gem;
3049
	struct drm_i915_gem_get_aperture aperture;
3050
	drm_i915_getparam_t gp;
3051
	int ret, tmp;
3052
	bool exec2 = false;
3053
 
3054
	bufmgr_gem = calloc(1, sizeof(*bufmgr_gem));
3055
	if (bufmgr_gem == NULL)
3056
		return NULL;
3057
 
3058
	bufmgr_gem->fd = fd;
3059
 
3060
//	if (pthread_mutex_init(&bufmgr_gem->lock, NULL) != 0) {
3061
//		free(bufmgr_gem);
3062
//		return NULL;
3063
//	}
3064
 
3065
	ret = drmIoctl(bufmgr_gem->fd,
3066
		       DRM_IOCTL_I915_GEM_GET_APERTURE,
3067
		       &aperture);
3068
 
3069
	if (ret == 0)
3070
		bufmgr_gem->gtt_size = aperture.aper_available_size;
3071
	else {
3072
		printf("DRM_IOCTL_I915_GEM_APERTURE failed: %s\n",
3073
			strerror(errno));
3074
		bufmgr_gem->gtt_size = 128 * 1024 * 1024;
3075
		printf("Assuming %dkB available aperture size.\n"
3076
			"May lead to reduced performance or incorrect "
3077
			"rendering.\n",
3078
			(int)bufmgr_gem->gtt_size / 1024);
3079
	}
3080
 
3081
	bufmgr_gem->pci_device = get_pci_device_id(bufmgr_gem);
3082
 
3083
	if (IS_GEN2(bufmgr_gem->pci_device))
3084
		bufmgr_gem->gen = 2;
3085
	else if (IS_GEN3(bufmgr_gem->pci_device))
3086
		bufmgr_gem->gen = 3;
3087
	else if (IS_GEN4(bufmgr_gem->pci_device))
3088
		bufmgr_gem->gen = 4;
3089
	else if (IS_GEN5(bufmgr_gem->pci_device))
3090
		bufmgr_gem->gen = 5;
3091
	else if (IS_GEN6(bufmgr_gem->pci_device))
3092
		bufmgr_gem->gen = 6;
3093
	else if (IS_GEN7(bufmgr_gem->pci_device))
3094
		bufmgr_gem->gen = 7;
3095
	else {
3096
		free(bufmgr_gem);
3097
		return NULL;
3098
	}
3099
 
3100
//    printf("gen %d\n", bufmgr_gem->gen);
3101
 
3102
	if (IS_GEN3(bufmgr_gem->pci_device) &&
3103
	    bufmgr_gem->gtt_size > 256*1024*1024) {
3104
		/* The unmappable part of gtt on gen 3 (i.e. above 256MB) can't
3105
		 * be used for tiled blits. To simplify the accounting, just
3106
		 * substract the unmappable part (fixed to 256MB on all known
3107
		 * gen3 devices) if the kernel advertises it. */
3108
		bufmgr_gem->gtt_size -= 256*1024*1024;
3109
	}
3110
 
3111
	VG_CLEAR(gp);
3112
	gp.value = &tmp;
3113
 
3114
	gp.param = I915_PARAM_HAS_EXECBUF2;
3115
	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3116
	if (!ret)
3117
		exec2 = true;
3118
 
3119
	gp.param = I915_PARAM_HAS_BSD;
3120
	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3121
	bufmgr_gem->has_bsd = ret == 0;
3122
 
3123
	gp.param = I915_PARAM_HAS_BLT;
3124
	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3125
	bufmgr_gem->has_blt = ret == 0;
3126
 
3127
	gp.param = I915_PARAM_HAS_RELAXED_FENCING;
3128
	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3129
	bufmgr_gem->has_relaxed_fencing = ret == 0;
3130
 
3131
	gp.param = I915_PARAM_HAS_WAIT_TIMEOUT;
3132
	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3133
	bufmgr_gem->has_wait_timeout = ret == 0;
3134
 
3135
	gp.param = I915_PARAM_HAS_LLC;
3136
	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3137
	if (ret != 0) {
3138
		/* Kernel does not supports HAS_LLC query, fallback to GPU
3139
		 * generation detection and assume that we have LLC on GEN6/7
3140
		 */
3141
		bufmgr_gem->has_llc = (IS_GEN6(bufmgr_gem->pci_device) |
3142
				IS_GEN7(bufmgr_gem->pci_device));
3143
	} else
3144
		bufmgr_gem->has_llc = *gp.value;
3145
 
3146
	gp.param = I915_PARAM_HAS_VEBOX;
3147
	ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3148
	bufmgr_gem->has_vebox = (ret == 0) & (*gp.value > 0);
3149
 
3150
	if (bufmgr_gem->gen < 4) {
3151
		gp.param = I915_PARAM_NUM_FENCES_AVAIL;
3152
		gp.value = &bufmgr_gem->available_fences;
3153
		ret = drmIoctl(bufmgr_gem->fd, DRM_IOCTL_I915_GETPARAM, &gp);
3154
		if (ret) {
3155
			fprintf(stderr, "get fences failed: %d [%d]\n", ret,
3156
				errno);
3157
			fprintf(stderr, "param: %d, val: %d\n", gp.param,
3158
				*gp.value);
3159
			bufmgr_gem->available_fences = 0;
3160
		} else {
3161
			/* XXX The kernel reports the total number of fences,
3162
			 * including any that may be pinned.
3163
			 *
3164
			 * We presume that there will be at least one pinned
3165
			 * fence for the scanout buffer, but there may be more
3166
			 * than one scanout and the user may be manually
3167
			 * pinning buffers. Let's move to execbuffer2 and
3168
			 * thereby forget the insanity of using fences...
3169
			 */
3170
			bufmgr_gem->available_fences -= 2;
3171
			if (bufmgr_gem->available_fences < 0)
3172
				bufmgr_gem->available_fences = 0;
3173
		}
3174
	}
3175
 
3176
	/* Let's go with one relocation per every 2 dwords (but round down a bit
3177
	 * since a power of two will mean an extra page allocation for the reloc
3178
	 * buffer).
3179
	 *
3180
	 * Every 4 was too few for the blender benchmark.
3181
	 */
3182
	bufmgr_gem->max_relocs = batch_size / sizeof(uint32_t) / 2 - 2;
3183
 
3184
	bufmgr_gem->bufmgr.bo_alloc = drm_intel_gem_bo_alloc;
5068 serge 3185
	bufmgr_gem->bufmgr.bo_alloc_for_render =
3186
	    drm_intel_gem_bo_alloc_for_render;
4363 Serge 3187
	bufmgr_gem->bufmgr.bo_alloc_tiled = drm_intel_gem_bo_alloc_tiled;
3188
	bufmgr_gem->bufmgr.bo_reference = drm_intel_gem_bo_reference;
3189
	bufmgr_gem->bufmgr.bo_unreference = drm_intel_gem_bo_unreference;
3190
	bufmgr_gem->bufmgr.bo_map = drm_intel_gem_bo_map;
3191
	bufmgr_gem->bufmgr.bo_unmap = drm_intel_gem_bo_unmap;
3192
	bufmgr_gem->bufmgr.bo_subdata = drm_intel_gem_bo_subdata;
3193
//	bufmgr_gem->bufmgr.bo_get_subdata = drm_intel_gem_bo_get_subdata;
3194
	bufmgr_gem->bufmgr.bo_wait_rendering = drm_intel_gem_bo_wait_rendering;
3195
	bufmgr_gem->bufmgr.bo_emit_reloc = drm_intel_gem_bo_emit_reloc;
3196
	bufmgr_gem->bufmgr.bo_emit_reloc_fence = drm_intel_gem_bo_emit_reloc_fence;
3197
	bufmgr_gem->bufmgr.bo_pin = drm_intel_gem_bo_pin;
3198
	bufmgr_gem->bufmgr.bo_unpin = drm_intel_gem_bo_unpin;
3199
	bufmgr_gem->bufmgr.bo_get_tiling = drm_intel_gem_bo_get_tiling;
3200
	bufmgr_gem->bufmgr.bo_set_tiling = drm_intel_gem_bo_set_tiling;
3201
	bufmgr_gem->bufmgr.bo_flink = drm_intel_gem_bo_flink;
3202
	/* Use the new one if available */
3203
//	if (exec2) {
3204
		bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec2;
3205
		bufmgr_gem->bufmgr.bo_mrb_exec = drm_intel_gem_bo_mrb_exec2;
3206
//	} else
3207
//		bufmgr_gem->bufmgr.bo_exec = drm_intel_gem_bo_exec;
3208
  	bufmgr_gem->bufmgr.bo_busy = drm_intel_gem_bo_busy;
3209
	bufmgr_gem->bufmgr.bo_madvise = drm_intel_gem_bo_madvise;
3210
	bufmgr_gem->bufmgr.destroy = drm_intel_bufmgr_gem_destroy;
3211
	bufmgr_gem->bufmgr.debug = 0;
3212
	bufmgr_gem->bufmgr.check_aperture_space =
3213
	    drm_intel_gem_check_aperture_space;
3214
	bufmgr_gem->bufmgr.bo_disable_reuse = drm_intel_gem_bo_disable_reuse;
3215
	bufmgr_gem->bufmgr.bo_is_reusable = drm_intel_gem_bo_is_reusable;
3216
//	bufmgr_gem->bufmgr.get_pipe_from_crtc_id =
3217
//	    drm_intel_gem_get_pipe_from_crtc_id;
3218
	bufmgr_gem->bufmgr.bo_references = drm_intel_gem_bo_references;
3219
 
3220
	DRMINITLISTHEAD(&bufmgr_gem->named);
3221
	init_cache_buckets(bufmgr_gem);
3222
 
3223
	DRMINITLISTHEAD(&bufmgr_gem->vma_cache);
3224
	bufmgr_gem->vma_max = -1; /* unlimited by default */
3225
 
3226
	return &bufmgr_gem->bufmgr;
3227
}
3228
 
3229
 
3230
drm_intel_bo *
3231
bo_create_from_gem_handle(drm_intel_bufmgr *bufmgr,
3232
                          unsigned int size, unsigned int handle)
3233
{
3234
	drm_intel_bufmgr_gem *bufmgr_gem = (drm_intel_bufmgr_gem *) bufmgr;
3235
	drm_intel_bo_gem *bo_gem;
3236
	int ret;
3237
	struct drm_i915_gem_get_tiling get_tiling;
3238
	drmMMListHead *list;
3239
 
3240
	/* At the moment most applications only have a few named bo.
3241
	 * For instance, in a DRI client only the render buffers passed
3242
	 * between X and the client are named. And since X returns the
3243
	 * alternating names for the front/back buffer a linear search
3244
	 * provides a sufficiently fast match.
3245
	 */
3246
	for (list = bufmgr_gem->named.next;
3247
	     list != &bufmgr_gem->named;
3248
	     list = list->next) {
3249
		bo_gem = DRMLISTENTRY(drm_intel_bo_gem, list, name_list);
3250
		if (bo_gem->gem_handle == handle) {
3251
			return &bo_gem->bo;
3252
		}
3253
	}
3254
 
3255
	bo_gem = calloc(1, sizeof(*bo_gem));
3256
	if (!bo_gem)
3257
		return NULL;
3258
 
3259
	bo_gem->bo.size = size;
3260
	bo_gem->bo.offset = 0;
3261
	bo_gem->bo.virtual = NULL;
3262
	bo_gem->bo.bufmgr = bufmgr;
3263
	bo_gem->name = NULL;
3264
	atomic_set(&bo_gem->refcount, 1);
3265
	bo_gem->validate_index = -1;
3266
	bo_gem->gem_handle = handle;
3267
	bo_gem->bo.handle = handle;
3268
	bo_gem->global_name = 0;
3269
	bo_gem->reusable = false;
3270
 
3271
	VG_CLEAR(get_tiling);
3272
	get_tiling.handle = bo_gem->gem_handle;
3273
	ret = drmIoctl(bufmgr_gem->fd,
3274
		       DRM_IOCTL_I915_GEM_GET_TILING,
3275
		       &get_tiling);
3276
	if (ret != 0) {
3277
		drm_intel_gem_bo_unreference(&bo_gem->bo);
3278
		return NULL;
3279
	}
3280
	bo_gem->tiling_mode = get_tiling.tiling_mode;
3281
	bo_gem->swizzle_mode = get_tiling.swizzle_mode;
3282
	/* XXX stride is unknown */
3283
	drm_intel_bo_gem_set_in_aperture_size(bufmgr_gem, bo_gem);
3284
 
3285
	DRMINITLISTHEAD(&bo_gem->vma_list);
3286
	DRMLISTADDTAIL(&bo_gem->name_list, &bufmgr_gem->named);
3287
	printf("bo_create_from_handle: %d\n", handle);
3288
 
3289
	return &bo_gem->bo;
3290
}