Subversion Repositories Kolibri OS

Rev

Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5564 serge 1
/* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
 
3
/*
4
 * Copyright (C) 2012 Rob Clark 
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a
7
 * copy of this software and associated documentation files (the "Software"),
8
 * to deal in the Software without restriction, including without limitation
9
 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10
 * and/or sell copies of the Software, and to permit persons to whom the
11
 * Software is furnished to do so, subject to the following conditions:
12
 *
13
 * The above copyright notice and this permission notice (including the next
14
 * paragraph) shall be included in all copies or substantial portions of the
15
 * Software.
16
 *
17
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18
 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19
 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20
 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21
 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22
 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23
 * SOFTWARE.
24
 *
25
 * Authors:
26
 *    Rob Clark 
27
 */
28
 
29
#include "pipe/p_state.h"
30
#include "util/u_string.h"
31
#include "util/u_memory.h"
32
#include "util/u_inlines.h"
33
#include "util/u_format.h"
34
 
35
#include "freedreno_gmem.h"
36
#include "freedreno_context.h"
37
#include "freedreno_resource.h"
38
#include "freedreno_query_hw.h"
39
#include "freedreno_util.h"
40
 
41
/*
42
 * GMEM is the small (ie. 256KiB for a200, 512KiB for a220, etc) tile buffer
43
 * inside the GPU.  All rendering happens to GMEM.  Larger render targets
44
 * are split into tiles that are small enough for the color (and depth and/or
45
 * stencil, if enabled) buffers to fit within GMEM.  Before rendering a tile,
46
 * if there was not a clear invalidating the previous tile contents, we need
47
 * to restore the previous tiles contents (system mem -> GMEM), and after all
48
 * the draw calls, before moving to the next tile, we need to save the tile
49
 * contents (GMEM -> system mem).
50
 *
51
 * The code in this file handles dealing with GMEM and tiling.
52
 *
53
 * The structure of the ringbuffer ends up being:
54
 *
55
 *     +--<---<-- IB ---<---+---<---+---<---<---<--+
56
 *     |                    |       |              |
57
 *     v                    ^       ^              ^
58
 *   ------------------------------------------------------
59
 *     | clear/draw cmds | Tile0 | Tile1 | .... | TileN |
60
 *   ------------------------------------------------------
61
 *                       ^
62
 *                       |
63
 *                       address submitted in issueibcmds
64
 *
65
 * Where the per-tile section handles scissor setup, mem2gmem restore (if
66
 * needed), IB to draw cmds earlier in the ringbuffer, and then gmem2mem
67
 * resolve.
68
 */
69
 
70
static uint32_t bin_width(struct fd_context *ctx)
71
{
72
	if (is_a4xx(ctx->screen))
73
		return 1024;
74
	if (is_a3xx(ctx->screen))
75
		return 992;
76
	return 512;
77
}
78
 
79
static uint32_t
80
total_size(uint8_t cbuf_cpp[], uint8_t zsbuf_cpp[2],
81
		   uint32_t bin_w, uint32_t bin_h, struct fd_gmem_stateobj *gmem)
82
{
83
	uint32_t total = 0, i;
84
 
85
	for (i = 0; i < 4; i++) {
86
		if (cbuf_cpp[i]) {
87
			gmem->cbuf_base[i] = align(total, 0x4000);
88
			total = gmem->cbuf_base[i] + cbuf_cpp[i] * bin_w * bin_h;
89
		}
90
	}
91
 
92
	if (zsbuf_cpp[0]) {
93
		gmem->zsbuf_base[0] = align(total, 0x4000);
94
		total = gmem->zsbuf_base[0] + zsbuf_cpp[0] * bin_w * bin_h;
95
	}
96
 
97
	if (zsbuf_cpp[1]) {
98
		gmem->zsbuf_base[1] = align(total, 0x4000);
99
		total = gmem->zsbuf_base[1] + zsbuf_cpp[1] * bin_w * bin_h;
100
	}
101
 
102
	return total;
103
}
104
 
105
static void
106
calculate_tiles(struct fd_context *ctx)
107
{
108
	struct fd_gmem_stateobj *gmem = &ctx->gmem;
109
	struct pipe_scissor_state *scissor = &ctx->max_scissor;
110
	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
111
	uint32_t gmem_size = ctx->screen->gmemsize_bytes;
112
	uint32_t minx, miny, width, height;
113
	uint32_t nbins_x = 1, nbins_y = 1;
114
	uint32_t bin_w, bin_h;
115
	uint32_t max_width = bin_width(ctx);
116
	uint8_t cbuf_cpp[4] = {0}, zsbuf_cpp[2] = {0};
117
	uint32_t i, j, t, xoff, yoff;
118
	uint32_t tpp_x, tpp_y;
119
	bool has_zs = !!(ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL));
120
	int tile_n[ARRAY_SIZE(ctx->pipe)];
121
 
122
	if (has_zs) {
123
		struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture);
124
		zsbuf_cpp[0] = rsc->cpp;
125
		if (rsc->stencil)
126
			zsbuf_cpp[1] = rsc->stencil->cpp;
127
	}
128
	for (i = 0; i < pfb->nr_cbufs; i++) {
129
		if (pfb->cbufs[i])
130
			cbuf_cpp[i] = util_format_get_blocksize(pfb->cbufs[i]->format);
131
		else
132
			cbuf_cpp[i] = 4;
133
	}
134
 
135
	if (!memcmp(gmem->zsbuf_cpp, zsbuf_cpp, sizeof(zsbuf_cpp)) &&
136
		!memcmp(gmem->cbuf_cpp, cbuf_cpp, sizeof(cbuf_cpp)) &&
137
		!memcmp(&gmem->scissor, scissor, sizeof(gmem->scissor))) {
138
		/* everything is up-to-date */
139
		return;
140
	}
141
 
142
	if (fd_mesa_debug & FD_DBG_NOSCIS) {
143
		minx = 0;
144
		miny = 0;
145
		width = pfb->width;
146
		height = pfb->height;
147
	} else {
148
		minx = scissor->minx & ~31; /* round down to multiple of 32 */
149
		miny = scissor->miny & ~31;
150
		width = scissor->maxx - minx;
151
		height = scissor->maxy - miny;
152
	}
153
 
154
	bin_w = align(width, 32);
155
	bin_h = align(height, 32);
156
 
157
	/* first, find a bin width that satisfies the maximum width
158
	 * restrictions:
159
	 */
160
	while (bin_w > max_width) {
161
		nbins_x++;
162
		bin_w = align(width / nbins_x, 32);
163
	}
164
 
165
	/* then find a bin width/height that satisfies the memory
166
	 * constraints:
167
	 */
168
	DBG("binning input: cbuf cpp: %d %d %d %d, zsbuf cpp: %d; %dx%d",
169
		cbuf_cpp[0], cbuf_cpp[1], cbuf_cpp[2], cbuf_cpp[3], zsbuf_cpp[0],
170
		width, height);
171
	while (total_size(cbuf_cpp, zsbuf_cpp, bin_w, bin_h, gmem) > gmem_size) {
172
		if (bin_w > bin_h) {
173
			nbins_x++;
174
			bin_w = align(width / nbins_x, 32);
175
		} else {
176
			nbins_y++;
177
			bin_h = align(height / nbins_y, 32);
178
		}
179
	}
180
 
181
	DBG("using %d bins of size %dx%d", nbins_x*nbins_y, bin_w, bin_h);
182
 
183
	gmem->scissor = *scissor;
184
	memcpy(gmem->cbuf_cpp, cbuf_cpp, sizeof(cbuf_cpp));
185
	memcpy(gmem->zsbuf_cpp, zsbuf_cpp, sizeof(zsbuf_cpp));
186
	gmem->bin_h = bin_h;
187
	gmem->bin_w = bin_w;
188
	gmem->nbins_x = nbins_x;
189
	gmem->nbins_y = nbins_y;
190
	gmem->minx = minx;
191
	gmem->miny = miny;
192
	gmem->width = width;
193
	gmem->height = height;
194
 
195
	/*
196
	 * Assign tiles and pipes:
197
	 *
198
	 * At some point it might be worth playing with different
199
	 * strategies and seeing if that makes much impact on
200
	 * performance.
201
	 */
202
 
203
#define div_round_up(v, a)  (((v) + (a) - 1) / (a))
204
	/* figure out number of tiles per pipe: */
205
	tpp_x = tpp_y = 1;
206
	while (div_round_up(nbins_y, tpp_y) > 8)
207
		tpp_y += 2;
208
	while ((div_round_up(nbins_y, tpp_y) *
209
			div_round_up(nbins_x, tpp_x)) > 8)
210
		tpp_x += 1;
211
 
212
	/* configure pipes: */
213
	xoff = yoff = 0;
214
	for (i = 0; i < ARRAY_SIZE(ctx->pipe); i++) {
215
		struct fd_vsc_pipe *pipe = &ctx->pipe[i];
216
 
217
		if (xoff >= nbins_x) {
218
			xoff = 0;
219
			yoff += tpp_y;
220
		}
221
 
222
		if (yoff >= nbins_y) {
223
			break;
224
		}
225
 
226
		pipe->x = xoff;
227
		pipe->y = yoff;
228
		pipe->w = MIN2(tpp_x, nbins_x - xoff);
229
		pipe->h = MIN2(tpp_y, nbins_y - yoff);
230
 
231
		xoff += tpp_x;
232
	}
233
 
234
	for (; i < ARRAY_SIZE(ctx->pipe); i++) {
235
		struct fd_vsc_pipe *pipe = &ctx->pipe[i];
236
		pipe->x = pipe->y = pipe->w = pipe->h = 0;
237
	}
238
 
239
#if 0 /* debug */
240
	printf("%dx%d ... tpp=%dx%d\n", nbins_x, nbins_y, tpp_x, tpp_y);
241
	for (i = 0; i < 8; i++) {
242
		struct fd_vsc_pipe *pipe = &ctx->pipe[i];
243
		printf("pipe[%d]: %ux%u @ %u,%u\n", i,
244
				pipe->w, pipe->h, pipe->x, pipe->y);
245
	}
246
#endif
247
 
248
	/* configure tiles: */
249
	t = 0;
250
	yoff = miny;
251
	memset(tile_n, 0, sizeof(tile_n));
252
	for (i = 0; i < nbins_y; i++) {
253
		uint32_t bw, bh;
254
 
255
		xoff = minx;
256
 
257
		/* clip bin height: */
258
		bh = MIN2(bin_h, miny + height - yoff);
259
 
260
		for (j = 0; j < nbins_x; j++) {
261
			struct fd_tile *tile = &ctx->tile[t];
262
			uint32_t p;
263
 
264
			assert(t < ARRAY_SIZE(ctx->tile));
265
 
266
			/* pipe number: */
267
			p = ((i / tpp_y) * div_round_up(nbins_x, tpp_x)) + (j / tpp_x);
268
 
269
			/* clip bin width: */
270
			bw = MIN2(bin_w, minx + width - xoff);
271
 
272
			tile->n = tile_n[p]++;
273
			tile->p = p;
274
			tile->bin_w = bw;
275
			tile->bin_h = bh;
276
			tile->xoff = xoff;
277
			tile->yoff = yoff;
278
 
279
			t++;
280
 
281
			xoff += bw;
282
		}
283
 
284
		yoff += bh;
285
	}
286
 
287
#if 0 /* debug */
288
	t = 0;
289
	for (i = 0; i < nbins_y; i++) {
290
		for (j = 0; j < nbins_x; j++) {
291
			struct fd_tile *tile = &ctx->tile[t++];
292
			printf("|p:%u n:%u|", tile->p, tile->n);
293
		}
294
		printf("\n");
295
	}
296
#endif
297
}
298
 
299
static void
300
render_tiles(struct fd_context *ctx)
301
{
302
	struct fd_gmem_stateobj *gmem = &ctx->gmem;
303
	int i;
304
 
305
	ctx->emit_tile_init(ctx);
306
 
307
	if (ctx->restore)
308
		ctx->stats.batch_restore++;
309
 
310
	for (i = 0; i < (gmem->nbins_x * gmem->nbins_y); i++) {
311
		struct fd_tile *tile = &ctx->tile[i];
312
 
313
		DBG("bin_h=%d, yoff=%d, bin_w=%d, xoff=%d",
314
			tile->bin_h, tile->yoff, tile->bin_w, tile->xoff);
315
 
316
		ctx->emit_tile_prep(ctx, tile);
317
 
318
		if (ctx->restore) {
319
			fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_MEM2GMEM);
320
			ctx->emit_tile_mem2gmem(ctx, tile);
321
			fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL);
322
		}
323
 
324
		ctx->emit_tile_renderprep(ctx, tile);
325
 
326
		fd_hw_query_prepare_tile(ctx, i, ctx->ring);
327
 
328
		/* emit IB to drawcmds: */
329
		OUT_IB(ctx->ring, ctx->draw_start, ctx->draw_end);
330
		fd_reset_wfi(ctx);
331
 
332
		/* emit gmem2mem to transfer tile back to system memory: */
333
		fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_GMEM2MEM);
334
		ctx->emit_tile_gmem2mem(ctx, tile);
335
		fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL);
336
	}
337
}
338
 
339
static void
340
render_sysmem(struct fd_context *ctx)
341
{
342
	ctx->emit_sysmem_prep(ctx);
343
 
344
	fd_hw_query_prepare_tile(ctx, 0, ctx->ring);
345
 
346
	/* emit IB to drawcmds: */
347
	OUT_IB(ctx->ring, ctx->draw_start, ctx->draw_end);
348
	fd_reset_wfi(ctx);
349
}
350
 
351
void
352
fd_gmem_render_tiles(struct fd_context *ctx)
353
{
354
	struct pipe_framebuffer_state *pfb = &ctx->framebuffer;
355
	bool sysmem = false;
356
 
357
	if (ctx->emit_sysmem_prep) {
358
		if (ctx->cleared || ctx->gmem_reason || (ctx->num_draws > 5)) {
359
			DBG("GMEM: cleared=%x, gmem_reason=%x, num_draws=%u",
360
				ctx->cleared, ctx->gmem_reason, ctx->num_draws);
361
		} else if (!(fd_mesa_debug & FD_DBG_NOBYPASS)) {
362
			sysmem = true;
363
		}
364
	}
365
 
366
	/* close out the draw cmds by making sure any active queries are
367
	 * paused:
368
	 */
369
	fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL);
370
 
371
	/* mark the end of the clear/draw cmds before emitting per-tile cmds: */
372
	fd_ringmarker_mark(ctx->draw_end);
373
	fd_ringmarker_mark(ctx->binning_end);
374
 
375
	fd_reset_wfi(ctx);
376
 
377
	ctx->stats.batch_total++;
378
 
379
	if (sysmem) {
380
		DBG("rendering sysmem (%s/%s)",
381
			util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
382
			util_format_short_name(pipe_surface_format(pfb->zsbuf)));
383
		fd_hw_query_prepare(ctx, 1);
384
		render_sysmem(ctx);
385
		ctx->stats.batch_sysmem++;
386
	} else {
387
		struct fd_gmem_stateobj *gmem = &ctx->gmem;
388
		calculate_tiles(ctx);
389
		DBG("rendering %dx%d tiles (%s/%s)", gmem->nbins_x, gmem->nbins_y,
390
			util_format_short_name(pipe_surface_format(pfb->cbufs[0])),
391
			util_format_short_name(pipe_surface_format(pfb->zsbuf)));
392
		fd_hw_query_prepare(ctx, gmem->nbins_x * gmem->nbins_y);
393
		render_tiles(ctx);
394
		ctx->stats.batch_gmem++;
395
	}
396
 
397
	/* GPU executes starting from tile cmds, which IB back to draw cmds: */
398
	fd_ringmarker_flush(ctx->draw_end);
399
 
400
	/* mark start for next draw/binning cmds: */
401
	fd_ringmarker_mark(ctx->draw_start);
402
	fd_ringmarker_mark(ctx->binning_start);
403
 
404
	fd_reset_wfi(ctx);
405
 
406
	/* reset maximal bounds: */
407
	ctx->max_scissor.minx = ctx->max_scissor.miny = ~0;
408
	ctx->max_scissor.maxx = ctx->max_scissor.maxy = 0;
409
 
410
	ctx->dirty = ~0;
411
}
412
 
413
/* tile needs restore if it isn't completely contained within the
414
 * cleared scissor:
415
 */
416
static bool
417
skip_restore(struct pipe_scissor_state *scissor, struct fd_tile *tile)
418
{
419
	unsigned minx = tile->xoff;
420
	unsigned maxx = tile->xoff + tile->bin_w;
421
	unsigned miny = tile->yoff;
422
	unsigned maxy = tile->yoff + tile->bin_h;
423
	return (minx >= scissor->minx) && (maxx <= scissor->maxx) &&
424
			(miny >= scissor->miny) && (maxy <= scissor->maxy);
425
}
426
 
427
/* When deciding whether a tile needs mem2gmem, we need to take into
428
 * account the scissor rect(s) that were cleared.  To simplify we only
429
 * consider the last scissor rect for each buffer, since the common
430
 * case would be a single clear.
431
 */
432
bool
433
fd_gmem_needs_restore(struct fd_context *ctx, struct fd_tile *tile,
434
		uint32_t buffers)
435
{
436
	if (!(ctx->restore & buffers))
437
		return false;
438
 
439
	/* if buffers partially cleared, then slow-path to figure out
440
	 * if this particular tile needs restoring:
441
	 */
442
	if ((buffers & FD_BUFFER_COLOR) &&
443
			(ctx->partial_cleared & FD_BUFFER_COLOR) &&
444
			skip_restore(&ctx->cleared_scissor.color, tile))
445
		return false;
446
	if ((buffers & FD_BUFFER_DEPTH) &&
447
			(ctx->partial_cleared & FD_BUFFER_DEPTH) &&
448
			skip_restore(&ctx->cleared_scissor.depth, tile))
449
		return false;
450
	if ((buffers & FD_BUFFER_STENCIL) &&
451
			(ctx->partial_cleared & FD_BUFFER_STENCIL) &&
452
			skip_restore(&ctx->cleared_scissor.stencil, tile))
453
		return false;
454
 
455
	return true;
456
}