Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
4358 Serge 1
/*
2
 * Copyright 2010 Jerome Glisse 
3
 *
4
 * Permission is hereby granted, free of charge, to any person obtaining a
5
 * copy of this software and associated documentation files (the "Software"),
6
 * to deal in the Software without restriction, including without limitation
7
 * on the rights to use, copy, modify, merge, publish, distribute, sub
8
 * license, and/or sell copies of the Software, and to permit persons to whom
9
 * the Software is furnished to do so, subject to the following conditions:
10
 *
11
 * The above copyright notice and this permission notice (including the next
12
 * paragraph) shall be included in all copies or substantial portions of the
13
 * Software.
14
 *
15
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
18
 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
19
 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
20
 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
21
 * USE OR OTHER DEALINGS IN THE SOFTWARE.
22
 *
23
 * Authors:
24
 *      Jerome Glisse
25
 */
26
#include "r600_hw_context_priv.h"
27
#include "radeonsi_pm4.h"
28
#include "radeonsi_pipe.h"
29
#include "sid.h"
30
#include "util/u_memory.h"
31
#include 
32
 
33
#define GROUP_FORCE_NEW_BLOCK	0
34
 
35
/* Get backends mask */
36
void si_get_backend_mask(struct r600_context *ctx)
37
{
38
	struct radeon_winsys_cs *cs = ctx->cs;
39
	struct si_resource *buffer;
40
	uint32_t *results;
41
	unsigned num_backends = ctx->screen->info.r600_num_backends;
42
	unsigned i, mask = 0;
43
 
44
	/* if backend_map query is supported by the kernel */
45
	if (ctx->screen->info.r600_backend_map_valid) {
46
		unsigned num_tile_pipes = ctx->screen->info.r600_num_tile_pipes;
47
		unsigned backend_map = ctx->screen->info.r600_backend_map;
48
		unsigned item_width = 4, item_mask = 0x7;
49
 
50
		while(num_tile_pipes--) {
51
			i = backend_map & item_mask;
52
			mask |= (1<
53
			backend_map >>= item_width;
54
		}
55
		if (mask != 0) {
56
			ctx->backend_mask = mask;
57
			return;
58
		}
59
	}
60
 
61
	/* otherwise backup path for older kernels */
62
 
63
	/* create buffer for event data */
64
	buffer = si_resource_create_custom(&ctx->screen->screen,
65
					   PIPE_USAGE_STAGING,
66
					   ctx->max_db*16);
67
	if (!buffer)
68
		goto err;
69
 
70
	/* initialize buffer with zeroes */
71
	results = ctx->ws->buffer_map(buffer->cs_buf, ctx->cs, PIPE_TRANSFER_WRITE);
72
	if (results) {
73
		uint64_t va = 0;
74
 
75
		memset(results, 0, ctx->max_db * 4 * 4);
76
		ctx->ws->buffer_unmap(buffer->cs_buf);
77
 
78
		/* emit EVENT_WRITE for ZPASS_DONE */
79
		va = r600_resource_va(&ctx->screen->screen, (void *)buffer);
80
		cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
81
		cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
82
		cs->buf[cs->cdw++] = va;
83
		cs->buf[cs->cdw++] = va >> 32;
84
 
85
		cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
86
		cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, buffer, RADEON_USAGE_WRITE);
87
 
88
		/* analyze results */
89
		results = ctx->ws->buffer_map(buffer->cs_buf, ctx->cs, PIPE_TRANSFER_READ);
90
		if (results) {
91
			for(i = 0; i < ctx->max_db; i++) {
92
				/* at least highest bit will be set if backend is used */
93
				if (results[i*4 + 1])
94
					mask |= (1<
95
			}
96
			ctx->ws->buffer_unmap(buffer->cs_buf);
97
		}
98
	}
99
 
100
	si_resource_reference(&buffer, NULL);
101
 
102
	if (mask != 0) {
103
		ctx->backend_mask = mask;
104
		return;
105
	}
106
 
107
err:
108
	/* fallback to old method - set num_backends lower bits to 1 */
109
	ctx->backend_mask = (~((uint32_t)0))>>(32-num_backends);
110
	return;
111
}
112
 
113
/* initialize */
114
void si_need_cs_space(struct r600_context *ctx, unsigned num_dw,
115
			boolean count_draw_in)
116
{
117
	/* The number of dwords we already used in the CS so far. */
118
	num_dw += ctx->cs->cdw;
119
 
120
	if (count_draw_in) {
121
		/* The number of dwords all the dirty states would take. */
122
		num_dw += ctx->pm4_dirty_cdwords;
123
 
124
		/* The upper-bound of how much a draw command would take. */
125
		num_dw += SI_MAX_DRAW_CS_DWORDS;
126
	}
127
 
128
	/* Count in queries_suspend. */
129
	num_dw += ctx->num_cs_dw_queries_suspend;
130
 
131
	/* Count in streamout_end at the end of CS. */
132
	num_dw += ctx->num_cs_dw_streamout_end;
133
 
134
	/* Count in render_condition(NULL) at the end of CS. */
135
	if (ctx->predicate_drawing) {
136
		num_dw += 3;
137
	}
138
 
139
	/* Count in framebuffer cache flushes at the end of CS. */
140
	num_dw += 7; /* one SURFACE_SYNC and CACHE_FLUSH_AND_INV (r6xx-only) */
141
 
142
	/* Save 16 dwords for the fence mechanism. */
143
	num_dw += 16;
144
 
145
#if R600_TRACE_CS
146
	if (ctx->screen->trace_bo) {
147
		num_dw += R600_TRACE_CS_DWORDS;
148
	}
149
#endif
150
 
151
	/* Flush if there's not enough space. */
152
	if (num_dw > RADEON_MAX_CMDBUF_DWORDS) {
153
		radeonsi_flush(&ctx->context, NULL, RADEON_FLUSH_ASYNC);
154
	}
155
}
156
 
157
static void r600_flush_framebuffer(struct r600_context *ctx)
158
{
159
	struct si_pm4_state *pm4;
160
 
161
	if (!(ctx->flags & R600_CONTEXT_DST_CACHES_DIRTY))
162
		return;
163
 
164
	pm4 = si_pm4_alloc_state(ctx);
165
 
166
	if (pm4 == NULL)
167
		return;
168
 
169
	si_cmd_surface_sync(pm4, S_0085F0_CB0_DEST_BASE_ENA(1) |
170
				S_0085F0_CB1_DEST_BASE_ENA(1) |
171
				S_0085F0_CB2_DEST_BASE_ENA(1) |
172
				S_0085F0_CB3_DEST_BASE_ENA(1) |
173
				S_0085F0_CB4_DEST_BASE_ENA(1) |
174
				S_0085F0_CB5_DEST_BASE_ENA(1) |
175
				S_0085F0_CB6_DEST_BASE_ENA(1) |
176
				S_0085F0_CB7_DEST_BASE_ENA(1) |
177
				S_0085F0_DB_ACTION_ENA(1) |
178
				S_0085F0_DB_DEST_BASE_ENA(1));
179
	si_pm4_emit(ctx, pm4);
180
	si_pm4_free_state(ctx, pm4, ~0);
181
 
182
	ctx->flags &= ~R600_CONTEXT_DST_CACHES_DIRTY;
183
}
184
 
185
void si_context_flush(struct r600_context *ctx, unsigned flags)
186
{
187
	struct radeon_winsys_cs *cs = ctx->cs;
188
	bool queries_suspended = false;
189
 
190
#if 0
191
	bool streamout_suspended = false;
192
#endif
193
 
194
	if (!cs->cdw)
195
		return;
196
 
197
	/* suspend queries */
198
	if (ctx->num_cs_dw_queries_suspend) {
199
		r600_context_queries_suspend(ctx);
200
		queries_suspended = true;
201
	}
202
 
203
#if 0
204
	if (ctx->num_cs_dw_streamout_end) {
205
		r600_context_streamout_end(ctx);
206
		streamout_suspended = true;
207
	}
208
#endif
209
 
210
	r600_flush_framebuffer(ctx);
211
 
212
	/* partial flush is needed to avoid lockups on some chips with user fences */
213
	cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
214
	cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
215
 
216
	/* force to keep tiling flags */
217
	flags |= RADEON_FLUSH_KEEP_TILING_FLAGS;
218
 
219
#if R600_TRACE_CS
220
	if (ctx->screen->trace_bo) {
221
		struct r600_screen *rscreen = ctx->screen;
222
		unsigned i;
223
 
224
		for (i = 0; i < cs->cdw; i++) {
225
			fprintf(stderr, "[%4d] [%5d] 0x%08x\n", rscreen->cs_count, i, cs->buf[i]);
226
		}
227
		rscreen->cs_count++;
228
	}
229
#endif
230
 
231
	/* Flush the CS. */
232
	ctx->ws->cs_flush(ctx->cs, flags, 0);
233
 
234
#if R600_TRACE_CS
235
	if (ctx->screen->trace_bo) {
236
		struct r600_screen *rscreen = ctx->screen;
237
		unsigned i;
238
 
239
		for (i = 0; i < 10; i++) {
240
			usleep(5);
241
			if (!ctx->ws->buffer_is_busy(rscreen->trace_bo->buf, RADEON_USAGE_READWRITE)) {
242
				break;
243
			}
244
		}
245
		if (i == 10) {
246
			fprintf(stderr, "timeout on cs lockup likely happen at cs %d dw %d\n",
247
				rscreen->trace_ptr[1], rscreen->trace_ptr[0]);
248
		} else {
249
			fprintf(stderr, "cs %d executed in %dms\n", rscreen->trace_ptr[1], i * 5);
250
		}
251
	}
252
#endif
253
 
254
	ctx->pm4_dirty_cdwords = 0;
255
	ctx->flags = 0;
256
 
257
#if 0
258
	if (streamout_suspended) {
259
		ctx->streamout_start = TRUE;
260
		ctx->streamout_append_bitmask = ~0;
261
	}
262
#endif
263
 
264
	/* resume queries */
265
	if (queries_suspended) {
266
		r600_context_queries_resume(ctx);
267
	}
268
 
269
	/* set all valid group as dirty so they get reemited on
270
	 * next draw command
271
	 */
272
	si_pm4_reset_emitted(ctx);
273
}
274
 
275
void si_context_emit_fence(struct r600_context *ctx, struct si_resource *fence_bo, unsigned offset, unsigned value)
276
{
277
	struct radeon_winsys_cs *cs = ctx->cs;
278
	uint64_t va;
279
 
280
	si_need_cs_space(ctx, 10, FALSE);
281
 
282
	va = r600_resource_va(&ctx->screen->screen, (void*)fence_bo);
283
	va = va + (offset << 2);
284
 
285
	cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 0, 0);
286
	cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_PS_PARTIAL_FLUSH) | EVENT_INDEX(4);
287
	cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
288
	cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
289
	cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL;       /* ADDRESS_LO */
290
	/* DATA_SEL | INT_EN | ADDRESS_HI */
291
	cs->buf[cs->cdw++] = (1 << 29) | (0 << 24) | ((va >> 32UL) & 0xFF);
292
	cs->buf[cs->cdw++] = value;                   /* DATA_LO */
293
	cs->buf[cs->cdw++] = 0;                       /* DATA_HI */
294
	cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
295
	cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, fence_bo, RADEON_USAGE_WRITE);
296
}
297
 
298
static unsigned r600_query_read_result(char *map, unsigned start_index, unsigned end_index,
299
				       bool test_status_bit)
300
{
301
	uint32_t *current_result = (uint32_t*)map;
302
	uint64_t start, end;
303
 
304
	start = (uint64_t)current_result[start_index] |
305
		(uint64_t)current_result[start_index+1] << 32;
306
	end = (uint64_t)current_result[end_index] |
307
	      (uint64_t)current_result[end_index+1] << 32;
308
 
309
	if (!test_status_bit ||
310
	    ((start & 0x8000000000000000UL) && (end & 0x8000000000000000UL))) {
311
		return end - start;
312
	}
313
	return 0;
314
}
315
 
316
static boolean r600_query_result(struct r600_context *ctx, struct r600_query *query, boolean wait)
317
{
318
	unsigned results_base = query->results_start;
319
	char *map;
320
 
321
	map = ctx->ws->buffer_map(query->buffer->cs_buf, ctx->cs,
322
				  PIPE_TRANSFER_READ |
323
				  (wait ? 0 : PIPE_TRANSFER_DONTBLOCK));
324
	if (!map)
325
		return FALSE;
326
 
327
	/* count all results across all data blocks */
328
	switch (query->type) {
329
	case PIPE_QUERY_OCCLUSION_COUNTER:
330
		while (results_base != query->results_end) {
331
			query->result.u64 +=
332
				r600_query_read_result(map + results_base, 0, 2, true);
333
			results_base = (results_base + 16) % query->buffer->b.b.width0;
334
		}
335
		break;
336
	case PIPE_QUERY_OCCLUSION_PREDICATE:
337
		while (results_base != query->results_end) {
338
			query->result.b = query->result.b ||
339
				r600_query_read_result(map + results_base, 0, 2, true) != 0;
340
			results_base = (results_base + 16) % query->buffer->b.b.width0;
341
		}
342
		break;
343
	case PIPE_QUERY_TIME_ELAPSED:
344
		while (results_base != query->results_end) {
345
			query->result.u64 +=
346
				r600_query_read_result(map + results_base, 0, 2, false);
347
			results_base = (results_base + query->result_size) % query->buffer->b.b.width0;
348
		}
349
		break;
350
	case PIPE_QUERY_PRIMITIVES_EMITTED:
351
		/* SAMPLE_STREAMOUTSTATS stores this structure:
352
		 * {
353
		 *    u64 NumPrimitivesWritten;
354
		 *    u64 PrimitiveStorageNeeded;
355
		 * }
356
		 * We only need NumPrimitivesWritten here. */
357
		while (results_base != query->results_end) {
358
			query->result.u64 +=
359
				r600_query_read_result(map + results_base, 2, 6, true);
360
			results_base = (results_base + query->result_size) % query->buffer->b.b.width0;
361
		}
362
		break;
363
	case PIPE_QUERY_PRIMITIVES_GENERATED:
364
		/* Here we read PrimitiveStorageNeeded. */
365
		while (results_base != query->results_end) {
366
			query->result.u64 +=
367
				r600_query_read_result(map + results_base, 0, 4, true);
368
			results_base = (results_base + query->result_size) % query->buffer->b.b.width0;
369
		}
370
		break;
371
	case PIPE_QUERY_SO_STATISTICS:
372
		while (results_base != query->results_end) {
373
			query->result.so.num_primitives_written +=
374
				r600_query_read_result(map + results_base, 2, 6, true);
375
			query->result.so.primitives_storage_needed +=
376
				r600_query_read_result(map + results_base, 0, 4, true);
377
			results_base = (results_base + query->result_size) % query->buffer->b.b.width0;
378
		}
379
		break;
380
	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
381
		while (results_base != query->results_end) {
382
			query->result.b = query->result.b ||
383
				r600_query_read_result(map + results_base, 2, 6, true) !=
384
				r600_query_read_result(map + results_base, 0, 4, true);
385
			results_base = (results_base + query->result_size) % query->buffer->b.b.width0;
386
		}
387
		break;
388
	default:
389
		assert(0);
390
	}
391
 
392
	query->results_start = query->results_end;
393
	ctx->ws->buffer_unmap(query->buffer->cs_buf);
394
	return TRUE;
395
}
396
 
397
void r600_query_begin(struct r600_context *ctx, struct r600_query *query)
398
{
399
	struct radeon_winsys_cs *cs = ctx->cs;
400
	unsigned new_results_end, i;
401
	uint32_t *results;
402
	uint64_t va;
403
 
404
	si_need_cs_space(ctx, query->num_cs_dw * 2, TRUE);
405
 
406
	new_results_end = (query->results_end + query->result_size) % query->buffer->b.b.width0;
407
 
408
	/* collect current results if query buffer is full */
409
	if (new_results_end == query->results_start) {
410
		r600_query_result(ctx, query, TRUE);
411
	}
412
 
413
	switch (query->type) {
414
	case PIPE_QUERY_OCCLUSION_COUNTER:
415
	case PIPE_QUERY_OCCLUSION_PREDICATE:
416
		results = ctx->ws->buffer_map(query->buffer->cs_buf, ctx->cs, PIPE_TRANSFER_WRITE);
417
		if (results) {
418
			results = (uint32_t*)((char*)results + query->results_end);
419
			memset(results, 0, query->result_size);
420
 
421
			/* Set top bits for unused backends */
422
			for (i = 0; i < ctx->max_db; i++) {
423
				if (!(ctx->backend_mask & (1<
424
					results[(i * 4)+1] = 0x80000000;
425
					results[(i * 4)+3] = 0x80000000;
426
				}
427
			}
428
			ctx->ws->buffer_unmap(query->buffer->cs_buf);
429
		}
430
		break;
431
	case PIPE_QUERY_TIME_ELAPSED:
432
		break;
433
	case PIPE_QUERY_PRIMITIVES_EMITTED:
434
	case PIPE_QUERY_PRIMITIVES_GENERATED:
435
	case PIPE_QUERY_SO_STATISTICS:
436
	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
437
		results = ctx->ws->buffer_map(query->buffer->cs_buf, ctx->cs, PIPE_TRANSFER_WRITE);
438
		results = (uint32_t*)((char*)results + query->results_end);
439
		memset(results, 0, query->result_size);
440
		ctx->ws->buffer_unmap(query->buffer->cs_buf);
441
		break;
442
	default:
443
		assert(0);
444
	}
445
 
446
	/* emit begin query */
447
	va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer);
448
	va += query->results_end;
449
 
450
	switch (query->type) {
451
	case PIPE_QUERY_OCCLUSION_COUNTER:
452
	case PIPE_QUERY_OCCLUSION_PREDICATE:
453
		cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
454
		cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
455
		cs->buf[cs->cdw++] = va;
456
		cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
457
		break;
458
	case PIPE_QUERY_PRIMITIVES_EMITTED:
459
	case PIPE_QUERY_PRIMITIVES_GENERATED:
460
	case PIPE_QUERY_SO_STATISTICS:
461
	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
462
		cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
463
		cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3);
464
		cs->buf[cs->cdw++] = query->results_end;
465
		cs->buf[cs->cdw++] = 0;
466
		break;
467
	case PIPE_QUERY_TIME_ELAPSED:
468
		cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
469
		cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
470
		cs->buf[cs->cdw++] = va;
471
		cs->buf[cs->cdw++] = (3 << 29) | ((va >> 32UL) & 0xFF);
472
		cs->buf[cs->cdw++] = 0;
473
		cs->buf[cs->cdw++] = 0;
474
		break;
475
	default:
476
		assert(0);
477
	}
478
	cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
479
	cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer, RADEON_USAGE_WRITE);
480
 
481
	ctx->num_cs_dw_queries_suspend += query->num_cs_dw;
482
}
483
 
484
void r600_query_end(struct r600_context *ctx, struct r600_query *query)
485
{
486
	struct radeon_winsys_cs *cs = ctx->cs;
487
	uint64_t va;
488
 
489
	va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer);
490
	/* emit end query */
491
	switch (query->type) {
492
	case PIPE_QUERY_OCCLUSION_COUNTER:
493
	case PIPE_QUERY_OCCLUSION_PREDICATE:
494
		va += query->results_end + 8;
495
		cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
496
		cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_ZPASS_DONE) | EVENT_INDEX(1);
497
		cs->buf[cs->cdw++] = va;
498
		cs->buf[cs->cdw++] = (va >> 32UL) & 0xFF;
499
		break;
500
	case PIPE_QUERY_PRIMITIVES_EMITTED:
501
	case PIPE_QUERY_PRIMITIVES_GENERATED:
502
	case PIPE_QUERY_SO_STATISTICS:
503
	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
504
		cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE, 2, 0);
505
		cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_SAMPLE_STREAMOUTSTATS) | EVENT_INDEX(3);
506
		cs->buf[cs->cdw++] = query->results_end + query->result_size/2;
507
		cs->buf[cs->cdw++] = 0;
508
		break;
509
	case PIPE_QUERY_TIME_ELAPSED:
510
		va += query->results_end + query->result_size/2;
511
		cs->buf[cs->cdw++] = PKT3(PKT3_EVENT_WRITE_EOP, 4, 0);
512
		cs->buf[cs->cdw++] = EVENT_TYPE(EVENT_TYPE_CACHE_FLUSH_AND_INV_TS_EVENT) | EVENT_INDEX(5);
513
		cs->buf[cs->cdw++] = va;
514
		cs->buf[cs->cdw++] = (3 << 29) | ((va >> 32UL) & 0xFF);
515
		cs->buf[cs->cdw++] = 0;
516
		cs->buf[cs->cdw++] = 0;
517
		break;
518
	default:
519
		assert(0);
520
	}
521
	cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
522
	cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer, RADEON_USAGE_WRITE);
523
 
524
	query->results_end = (query->results_end + query->result_size) % query->buffer->b.b.width0;
525
	ctx->num_cs_dw_queries_suspend -= query->num_cs_dw;
526
}
527
 
528
void r600_query_predication(struct r600_context *ctx, struct r600_query *query, int operation,
529
			    int flag_wait)
530
{
531
	struct radeon_winsys_cs *cs = ctx->cs;
532
	uint64_t va;
533
 
534
	if (operation == PREDICATION_OP_CLEAR) {
535
		si_need_cs_space(ctx, 3, FALSE);
536
 
537
		cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
538
		cs->buf[cs->cdw++] = 0;
539
		cs->buf[cs->cdw++] = PRED_OP(PREDICATION_OP_CLEAR);
540
	} else {
541
		unsigned results_base = query->results_start;
542
		unsigned count;
543
		uint32_t op;
544
 
545
		/* find count of the query data blocks */
546
		count = (query->buffer->b.b.width0 + query->results_end - query->results_start) % query->buffer->b.b.width0;
547
		count /= query->result_size;
548
 
549
		si_need_cs_space(ctx, 5 * count, TRUE);
550
 
551
		op = PRED_OP(operation) | PREDICATION_DRAW_VISIBLE |
552
				(flag_wait ? PREDICATION_HINT_WAIT : PREDICATION_HINT_NOWAIT_DRAW);
553
		va = r600_resource_va(&ctx->screen->screen, (void*)query->buffer);
554
 
555
		/* emit predicate packets for all data blocks */
556
		while (results_base != query->results_end) {
557
			cs->buf[cs->cdw++] = PKT3(PKT3_SET_PREDICATION, 1, 0);
558
			cs->buf[cs->cdw++] = (va + results_base) & 0xFFFFFFFFUL;
559
			cs->buf[cs->cdw++] = op | (((va + results_base) >> 32UL) & 0xFF);
560
			cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
561
			cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, query->buffer,
562
									     RADEON_USAGE_READ);
563
			results_base = (results_base + query->result_size) % query->buffer->b.b.width0;
564
 
565
			/* set CONTINUE bit for all packets except the first */
566
			op |= PREDICATION_CONTINUE;
567
		}
568
	}
569
}
570
 
571
struct r600_query *r600_context_query_create(struct r600_context *ctx, unsigned query_type)
572
{
573
	struct r600_query *query;
574
	unsigned buffer_size = 4096;
575
 
576
	query = CALLOC_STRUCT(r600_query);
577
	if (query == NULL)
578
		return NULL;
579
 
580
	query->type = query_type;
581
 
582
	switch (query_type) {
583
	case PIPE_QUERY_OCCLUSION_COUNTER:
584
	case PIPE_QUERY_OCCLUSION_PREDICATE:
585
		query->result_size = 16 * ctx->max_db;
586
		query->num_cs_dw = 6;
587
		break;
588
	case PIPE_QUERY_TIME_ELAPSED:
589
		query->result_size = 16;
590
		query->num_cs_dw = 8;
591
		break;
592
	case PIPE_QUERY_PRIMITIVES_EMITTED:
593
	case PIPE_QUERY_PRIMITIVES_GENERATED:
594
	case PIPE_QUERY_SO_STATISTICS:
595
	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
596
		/* NumPrimitivesWritten, PrimitiveStorageNeeded. */
597
		query->result_size = 32;
598
		query->num_cs_dw = 6;
599
		break;
600
	default:
601
		assert(0);
602
		FREE(query);
603
		return NULL;
604
	}
605
 
606
	/* adjust buffer size to simplify offsets wrapping math */
607
	buffer_size -= buffer_size % query->result_size;
608
 
609
	/* Queries are normally read by the CPU after
610
	 * being written by the gpu, hence staging is probably a good
611
	 * usage pattern.
612
	 */
613
	query->buffer = si_resource_create_custom(&ctx->screen->screen,
614
						  PIPE_USAGE_STAGING,
615
						  buffer_size);
616
	if (!query->buffer) {
617
		FREE(query);
618
		return NULL;
619
	}
620
	return query;
621
}
622
 
623
void r600_context_query_destroy(struct r600_context *ctx, struct r600_query *query)
624
{
625
	si_resource_reference(&query->buffer, NULL);
626
	free(query);
627
}
628
 
629
boolean r600_context_query_result(struct r600_context *ctx,
630
				struct r600_query *query,
631
				boolean wait, void *vresult)
632
{
633
	boolean *result_b = (boolean*)vresult;
634
	uint64_t *result_u64 = (uint64_t*)vresult;
635
	struct pipe_query_data_so_statistics *result_so =
636
		(struct pipe_query_data_so_statistics*)vresult;
637
 
638
	if (!r600_query_result(ctx, query, wait))
639
		return FALSE;
640
 
641
	switch (query->type) {
642
	case PIPE_QUERY_OCCLUSION_COUNTER:
643
	case PIPE_QUERY_PRIMITIVES_EMITTED:
644
	case PIPE_QUERY_PRIMITIVES_GENERATED:
645
		*result_u64 = query->result.u64;
646
		break;
647
	case PIPE_QUERY_OCCLUSION_PREDICATE:
648
	case PIPE_QUERY_SO_OVERFLOW_PREDICATE:
649
		*result_b = query->result.b;
650
		break;
651
	case PIPE_QUERY_TIME_ELAPSED:
652
		*result_u64 = (1000000 * query->result.u64) / ctx->screen->info.r600_clock_crystal_freq;
653
		break;
654
	case PIPE_QUERY_SO_STATISTICS:
655
		*result_so = query->result.so;
656
		break;
657
	default:
658
		assert(0);
659
	}
660
	return TRUE;
661
}
662
 
663
void r600_context_queries_suspend(struct r600_context *ctx)
664
{
665
	struct r600_query *query;
666
 
667
	LIST_FOR_EACH_ENTRY(query, &ctx->active_query_list, list) {
668
		r600_query_end(ctx, query);
669
	}
670
	assert(ctx->num_cs_dw_queries_suspend == 0);
671
}
672
 
673
void r600_context_queries_resume(struct r600_context *ctx)
674
{
675
	struct r600_query *query;
676
 
677
	assert(ctx->num_cs_dw_queries_suspend == 0);
678
 
679
	LIST_FOR_EACH_ENTRY(query, &ctx->active_query_list, list) {
680
		r600_query_begin(ctx, query);
681
	}
682
}
683
 
684
void r600_context_draw_opaque_count(struct r600_context *ctx, struct r600_so_target *t)
685
{
686
	struct radeon_winsys_cs *cs = ctx->cs;
687
	si_need_cs_space(ctx, 14 + 21, TRUE);
688
 
689
	cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
690
	cs->buf[cs->cdw++] = (R_028B28_VGT_STRMOUT_DRAW_OPAQUE_OFFSET - SI_CONTEXT_REG_OFFSET) >> 2;
691
	cs->buf[cs->cdw++] = 0;
692
 
693
	cs->buf[cs->cdw++] = PKT3(PKT3_SET_CONTEXT_REG, 1, 0);
694
	cs->buf[cs->cdw++] = (R_028B30_VGT_STRMOUT_DRAW_OPAQUE_VERTEX_STRIDE - SI_CONTEXT_REG_OFFSET) >> 2;
695
	cs->buf[cs->cdw++] = t->stride >> 2;
696
 
697
#if 0
698
	cs->buf[cs->cdw++] = PKT3(PKT3_COPY_DW, 4, 0);
699
	cs->buf[cs->cdw++] = COPY_DW_SRC_IS_MEM | COPY_DW_DST_IS_REG;
700
	cs->buf[cs->cdw++] = 0; /* src address lo */
701
	cs->buf[cs->cdw++] = 0; /* src address hi */
702
	cs->buf[cs->cdw++] = R_028B2C_VGT_STRMOUT_DRAW_OPAQUE_BUFFER_FILLED_SIZE >> 2; /* dst register */
703
	cs->buf[cs->cdw++] = 0; /* unused */
704
#endif
705
 
706
	cs->buf[cs->cdw++] = PKT3(PKT3_NOP, 0, 0);
707
	cs->buf[cs->cdw++] = r600_context_bo_reloc(ctx, t->filled_size, RADEON_USAGE_READ);
708
 
709
}
710
 
711
#if R600_TRACE_CS
712
void r600_trace_emit(struct r600_context *rctx)
713
{
714
	struct r600_screen *rscreen = rctx->screen;
715
	struct radeon_winsys_cs *cs = rctx->cs;
716
	uint64_t va;
717
 
718
	va = r600_resource_va(&rscreen->screen, (void*)rscreen->trace_bo);
719
	r600_context_bo_reloc(rctx, rscreen->trace_bo, RADEON_USAGE_READWRITE);
720
	cs->buf[cs->cdw++] = PKT3(PKT3_WRITE_DATA, 4, 0);
721
	cs->buf[cs->cdw++] = PKT3_WRITE_DATA_DST_SEL(PKT3_WRITE_DATA_DST_SEL_MEM_SYNC) |
722
				PKT3_WRITE_DATA_WR_CONFIRM |
723
				PKT3_WRITE_DATA_ENGINE_SEL(PKT3_WRITE_DATA_ENGINE_SEL_ME);
724
	cs->buf[cs->cdw++] = va & 0xFFFFFFFFUL;
725
	cs->buf[cs->cdw++] = (va >> 32UL) & 0xFFFFFFFFUL;
726
	cs->buf[cs->cdw++] = cs->cdw;
727
	cs->buf[cs->cdw++] = rscreen->cs_count;
728
}
729
#endif