Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ |
2 | |||
3 | /* |
||
4 | * Copyright (C) 2012 Rob Clark |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
7 | * copy of this software and associated documentation files (the "Software"), |
||
8 | * to deal in the Software without restriction, including without limitation |
||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
10 | * and/or sell copies of the Software, and to permit persons to whom the |
||
11 | * Software is furnished to do so, subject to the following conditions: |
||
12 | * |
||
13 | * The above copyright notice and this permission notice (including the next |
||
14 | * paragraph) shall be included in all copies or substantial portions of the |
||
15 | * Software. |
||
16 | * |
||
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
20 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||
23 | * SOFTWARE. |
||
24 | * |
||
25 | * Authors: |
||
26 | * Rob Clark |
||
27 | */ |
||
28 | |||
29 | #include "pipe/p_state.h" |
||
30 | #include "util/u_string.h" |
||
31 | #include "util/u_memory.h" |
||
32 | #include "util/u_inlines.h" |
||
33 | #include "util/u_format.h" |
||
34 | |||
35 | #include "freedreno_gmem.h" |
||
36 | #include "freedreno_context.h" |
||
37 | #include "freedreno_resource.h" |
||
38 | #include "freedreno_query_hw.h" |
||
39 | #include "freedreno_util.h" |
||
40 | |||
41 | /* |
||
42 | * GMEM is the small (ie. 256KiB for a200, 512KiB for a220, etc) tile buffer |
||
43 | * inside the GPU. All rendering happens to GMEM. Larger render targets |
||
44 | * are split into tiles that are small enough for the color (and depth and/or |
||
45 | * stencil, if enabled) buffers to fit within GMEM. Before rendering a tile, |
||
46 | * if there was not a clear invalidating the previous tile contents, we need |
||
47 | * to restore the previous tiles contents (system mem -> GMEM), and after all |
||
48 | * the draw calls, before moving to the next tile, we need to save the tile |
||
49 | * contents (GMEM -> system mem). |
||
50 | * |
||
51 | * The code in this file handles dealing with GMEM and tiling. |
||
52 | * |
||
53 | * The structure of the ringbuffer ends up being: |
||
54 | * |
||
55 | * +--<---<-- IB ---<---+---<---+---<---<---<--+ |
||
56 | * | | | | |
||
57 | * v ^ ^ ^ |
||
58 | * ------------------------------------------------------ |
||
59 | * | clear/draw cmds | Tile0 | Tile1 | .... | TileN | |
||
60 | * ------------------------------------------------------ |
||
61 | * ^ |
||
62 | * | |
||
63 | * address submitted in issueibcmds |
||
64 | * |
||
65 | * Where the per-tile section handles scissor setup, mem2gmem restore (if |
||
66 | * needed), IB to draw cmds earlier in the ringbuffer, and then gmem2mem |
||
67 | * resolve. |
||
68 | */ |
||
69 | |||
70 | static uint32_t bin_width(struct fd_context *ctx) |
||
71 | { |
||
72 | if (is_a4xx(ctx->screen)) |
||
73 | return 1024; |
||
74 | if (is_a3xx(ctx->screen)) |
||
75 | return 992; |
||
76 | return 512; |
||
77 | } |
||
78 | |||
79 | static uint32_t |
||
80 | total_size(uint8_t cbuf_cpp[], uint8_t zsbuf_cpp[2], |
||
81 | uint32_t bin_w, uint32_t bin_h, struct fd_gmem_stateobj *gmem) |
||
82 | { |
||
83 | uint32_t total = 0, i; |
||
84 | |||
85 | for (i = 0; i < 4; i++) { |
||
86 | if (cbuf_cpp[i]) { |
||
87 | gmem->cbuf_base[i] = align(total, 0x4000); |
||
88 | total = gmem->cbuf_base[i] + cbuf_cpp[i] * bin_w * bin_h; |
||
89 | } |
||
90 | } |
||
91 | |||
92 | if (zsbuf_cpp[0]) { |
||
93 | gmem->zsbuf_base[0] = align(total, 0x4000); |
||
94 | total = gmem->zsbuf_base[0] + zsbuf_cpp[0] * bin_w * bin_h; |
||
95 | } |
||
96 | |||
97 | if (zsbuf_cpp[1]) { |
||
98 | gmem->zsbuf_base[1] = align(total, 0x4000); |
||
99 | total = gmem->zsbuf_base[1] + zsbuf_cpp[1] * bin_w * bin_h; |
||
100 | } |
||
101 | |||
102 | return total; |
||
103 | } |
||
104 | |||
105 | static void |
||
106 | calculate_tiles(struct fd_context *ctx) |
||
107 | { |
||
108 | struct fd_gmem_stateobj *gmem = &ctx->gmem; |
||
109 | struct pipe_scissor_state *scissor = &ctx->max_scissor; |
||
110 | struct pipe_framebuffer_state *pfb = &ctx->framebuffer; |
||
111 | uint32_t gmem_size = ctx->screen->gmemsize_bytes; |
||
112 | uint32_t minx, miny, width, height; |
||
113 | uint32_t nbins_x = 1, nbins_y = 1; |
||
114 | uint32_t bin_w, bin_h; |
||
115 | uint32_t max_width = bin_width(ctx); |
||
116 | uint8_t cbuf_cpp[4] = {0}, zsbuf_cpp[2] = {0}; |
||
117 | uint32_t i, j, t, xoff, yoff; |
||
118 | uint32_t tpp_x, tpp_y; |
||
119 | bool has_zs = !!(ctx->resolve & (FD_BUFFER_DEPTH | FD_BUFFER_STENCIL)); |
||
120 | int tile_n[ARRAY_SIZE(ctx->pipe)]; |
||
121 | |||
122 | if (has_zs) { |
||
123 | struct fd_resource *rsc = fd_resource(pfb->zsbuf->texture); |
||
124 | zsbuf_cpp[0] = rsc->cpp; |
||
125 | if (rsc->stencil) |
||
126 | zsbuf_cpp[1] = rsc->stencil->cpp; |
||
127 | } |
||
128 | for (i = 0; i < pfb->nr_cbufs; i++) { |
||
129 | if (pfb->cbufs[i]) |
||
130 | cbuf_cpp[i] = util_format_get_blocksize(pfb->cbufs[i]->format); |
||
131 | else |
||
132 | cbuf_cpp[i] = 4; |
||
133 | } |
||
134 | |||
135 | if (!memcmp(gmem->zsbuf_cpp, zsbuf_cpp, sizeof(zsbuf_cpp)) && |
||
136 | !memcmp(gmem->cbuf_cpp, cbuf_cpp, sizeof(cbuf_cpp)) && |
||
137 | !memcmp(&gmem->scissor, scissor, sizeof(gmem->scissor))) { |
||
138 | /* everything is up-to-date */ |
||
139 | return; |
||
140 | } |
||
141 | |||
142 | if (fd_mesa_debug & FD_DBG_NOSCIS) { |
||
143 | minx = 0; |
||
144 | miny = 0; |
||
145 | width = pfb->width; |
||
146 | height = pfb->height; |
||
147 | } else { |
||
148 | minx = scissor->minx & ~31; /* round down to multiple of 32 */ |
||
149 | miny = scissor->miny & ~31; |
||
150 | width = scissor->maxx - minx; |
||
151 | height = scissor->maxy - miny; |
||
152 | } |
||
153 | |||
154 | bin_w = align(width, 32); |
||
155 | bin_h = align(height, 32); |
||
156 | |||
157 | /* first, find a bin width that satisfies the maximum width |
||
158 | * restrictions: |
||
159 | */ |
||
160 | while (bin_w > max_width) { |
||
161 | nbins_x++; |
||
162 | bin_w = align(width / nbins_x, 32); |
||
163 | } |
||
164 | |||
165 | /* then find a bin width/height that satisfies the memory |
||
166 | * constraints: |
||
167 | */ |
||
168 | DBG("binning input: cbuf cpp: %d %d %d %d, zsbuf cpp: %d; %dx%d", |
||
169 | cbuf_cpp[0], cbuf_cpp[1], cbuf_cpp[2], cbuf_cpp[3], zsbuf_cpp[0], |
||
170 | width, height); |
||
171 | while (total_size(cbuf_cpp, zsbuf_cpp, bin_w, bin_h, gmem) > gmem_size) { |
||
172 | if (bin_w > bin_h) { |
||
173 | nbins_x++; |
||
174 | bin_w = align(width / nbins_x, 32); |
||
175 | } else { |
||
176 | nbins_y++; |
||
177 | bin_h = align(height / nbins_y, 32); |
||
178 | } |
||
179 | } |
||
180 | |||
181 | DBG("using %d bins of size %dx%d", nbins_x*nbins_y, bin_w, bin_h); |
||
182 | |||
183 | gmem->scissor = *scissor; |
||
184 | memcpy(gmem->cbuf_cpp, cbuf_cpp, sizeof(cbuf_cpp)); |
||
185 | memcpy(gmem->zsbuf_cpp, zsbuf_cpp, sizeof(zsbuf_cpp)); |
||
186 | gmem->bin_h = bin_h; |
||
187 | gmem->bin_w = bin_w; |
||
188 | gmem->nbins_x = nbins_x; |
||
189 | gmem->nbins_y = nbins_y; |
||
190 | gmem->minx = minx; |
||
191 | gmem->miny = miny; |
||
192 | gmem->width = width; |
||
193 | gmem->height = height; |
||
194 | |||
195 | /* |
||
196 | * Assign tiles and pipes: |
||
197 | * |
||
198 | * At some point it might be worth playing with different |
||
199 | * strategies and seeing if that makes much impact on |
||
200 | * performance. |
||
201 | */ |
||
202 | |||
203 | #define div_round_up(v, a) (((v) + (a) - 1) / (a)) |
||
204 | /* figure out number of tiles per pipe: */ |
||
205 | tpp_x = tpp_y = 1; |
||
206 | while (div_round_up(nbins_y, tpp_y) > 8) |
||
207 | tpp_y += 2; |
||
208 | while ((div_round_up(nbins_y, tpp_y) * |
||
209 | div_round_up(nbins_x, tpp_x)) > 8) |
||
210 | tpp_x += 1; |
||
211 | |||
212 | /* configure pipes: */ |
||
213 | xoff = yoff = 0; |
||
214 | for (i = 0; i < ARRAY_SIZE(ctx->pipe); i++) { |
||
215 | struct fd_vsc_pipe *pipe = &ctx->pipe[i]; |
||
216 | |||
217 | if (xoff >= nbins_x) { |
||
218 | xoff = 0; |
||
219 | yoff += tpp_y; |
||
220 | } |
||
221 | |||
222 | if (yoff >= nbins_y) { |
||
223 | break; |
||
224 | } |
||
225 | |||
226 | pipe->x = xoff; |
||
227 | pipe->y = yoff; |
||
228 | pipe->w = MIN2(tpp_x, nbins_x - xoff); |
||
229 | pipe->h = MIN2(tpp_y, nbins_y - yoff); |
||
230 | |||
231 | xoff += tpp_x; |
||
232 | } |
||
233 | |||
234 | for (; i < ARRAY_SIZE(ctx->pipe); i++) { |
||
235 | struct fd_vsc_pipe *pipe = &ctx->pipe[i]; |
||
236 | pipe->x = pipe->y = pipe->w = pipe->h = 0; |
||
237 | } |
||
238 | |||
239 | #if 0 /* debug */ |
||
240 | printf("%dx%d ... tpp=%dx%d\n", nbins_x, nbins_y, tpp_x, tpp_y); |
||
241 | for (i = 0; i < 8; i++) { |
||
242 | struct fd_vsc_pipe *pipe = &ctx->pipe[i]; |
||
243 | printf("pipe[%d]: %ux%u @ %u,%u\n", i, |
||
244 | pipe->w, pipe->h, pipe->x, pipe->y); |
||
245 | } |
||
246 | #endif |
||
247 | |||
248 | /* configure tiles: */ |
||
249 | t = 0; |
||
250 | yoff = miny; |
||
251 | memset(tile_n, 0, sizeof(tile_n)); |
||
252 | for (i = 0; i < nbins_y; i++) { |
||
253 | uint32_t bw, bh; |
||
254 | |||
255 | xoff = minx; |
||
256 | |||
257 | /* clip bin height: */ |
||
258 | bh = MIN2(bin_h, miny + height - yoff); |
||
259 | |||
260 | for (j = 0; j < nbins_x; j++) { |
||
261 | struct fd_tile *tile = &ctx->tile[t]; |
||
262 | uint32_t p; |
||
263 | |||
264 | assert(t < ARRAY_SIZE(ctx->tile)); |
||
265 | |||
266 | /* pipe number: */ |
||
267 | p = ((i / tpp_y) * div_round_up(nbins_x, tpp_x)) + (j / tpp_x); |
||
268 | |||
269 | /* clip bin width: */ |
||
270 | bw = MIN2(bin_w, minx + width - xoff); |
||
271 | |||
272 | tile->n = tile_n[p]++; |
||
273 | tile->p = p; |
||
274 | tile->bin_w = bw; |
||
275 | tile->bin_h = bh; |
||
276 | tile->xoff = xoff; |
||
277 | tile->yoff = yoff; |
||
278 | |||
279 | t++; |
||
280 | |||
281 | xoff += bw; |
||
282 | } |
||
283 | |||
284 | yoff += bh; |
||
285 | } |
||
286 | |||
287 | #if 0 /* debug */ |
||
288 | t = 0; |
||
289 | for (i = 0; i < nbins_y; i++) { |
||
290 | for (j = 0; j < nbins_x; j++) { |
||
291 | struct fd_tile *tile = &ctx->tile[t++]; |
||
292 | printf("|p:%u n:%u|", tile->p, tile->n); |
||
293 | } |
||
294 | printf("\n"); |
||
295 | } |
||
296 | #endif |
||
297 | } |
||
298 | |||
299 | static void |
||
300 | render_tiles(struct fd_context *ctx) |
||
301 | { |
||
302 | struct fd_gmem_stateobj *gmem = &ctx->gmem; |
||
303 | int i; |
||
304 | |||
305 | ctx->emit_tile_init(ctx); |
||
306 | |||
307 | if (ctx->restore) |
||
308 | ctx->stats.batch_restore++; |
||
309 | |||
310 | for (i = 0; i < (gmem->nbins_x * gmem->nbins_y); i++) { |
||
311 | struct fd_tile *tile = &ctx->tile[i]; |
||
312 | |||
313 | DBG("bin_h=%d, yoff=%d, bin_w=%d, xoff=%d", |
||
314 | tile->bin_h, tile->yoff, tile->bin_w, tile->xoff); |
||
315 | |||
316 | ctx->emit_tile_prep(ctx, tile); |
||
317 | |||
318 | if (ctx->restore) { |
||
319 | fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_MEM2GMEM); |
||
320 | ctx->emit_tile_mem2gmem(ctx, tile); |
||
321 | fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL); |
||
322 | } |
||
323 | |||
324 | ctx->emit_tile_renderprep(ctx, tile); |
||
325 | |||
326 | fd_hw_query_prepare_tile(ctx, i, ctx->ring); |
||
327 | |||
328 | /* emit IB to drawcmds: */ |
||
329 | OUT_IB(ctx->ring, ctx->draw_start, ctx->draw_end); |
||
330 | fd_reset_wfi(ctx); |
||
331 | |||
332 | /* emit gmem2mem to transfer tile back to system memory: */ |
||
333 | fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_GMEM2MEM); |
||
334 | ctx->emit_tile_gmem2mem(ctx, tile); |
||
335 | fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL); |
||
336 | } |
||
337 | } |
||
338 | |||
339 | static void |
||
340 | render_sysmem(struct fd_context *ctx) |
||
341 | { |
||
342 | ctx->emit_sysmem_prep(ctx); |
||
343 | |||
344 | fd_hw_query_prepare_tile(ctx, 0, ctx->ring); |
||
345 | |||
346 | /* emit IB to drawcmds: */ |
||
347 | OUT_IB(ctx->ring, ctx->draw_start, ctx->draw_end); |
||
348 | fd_reset_wfi(ctx); |
||
349 | } |
||
350 | |||
351 | void |
||
352 | fd_gmem_render_tiles(struct fd_context *ctx) |
||
353 | { |
||
354 | struct pipe_framebuffer_state *pfb = &ctx->framebuffer; |
||
355 | bool sysmem = false; |
||
356 | |||
357 | if (ctx->emit_sysmem_prep) { |
||
358 | if (ctx->cleared || ctx->gmem_reason || (ctx->num_draws > 5)) { |
||
359 | DBG("GMEM: cleared=%x, gmem_reason=%x, num_draws=%u", |
||
360 | ctx->cleared, ctx->gmem_reason, ctx->num_draws); |
||
361 | } else if (!(fd_mesa_debug & FD_DBG_NOBYPASS)) { |
||
362 | sysmem = true; |
||
363 | } |
||
364 | } |
||
365 | |||
366 | /* close out the draw cmds by making sure any active queries are |
||
367 | * paused: |
||
368 | */ |
||
369 | fd_hw_query_set_stage(ctx, ctx->ring, FD_STAGE_NULL); |
||
370 | |||
371 | /* mark the end of the clear/draw cmds before emitting per-tile cmds: */ |
||
372 | fd_ringmarker_mark(ctx->draw_end); |
||
373 | fd_ringmarker_mark(ctx->binning_end); |
||
374 | |||
375 | fd_reset_wfi(ctx); |
||
376 | |||
377 | ctx->stats.batch_total++; |
||
378 | |||
379 | if (sysmem) { |
||
380 | DBG("rendering sysmem (%s/%s)", |
||
381 | util_format_short_name(pipe_surface_format(pfb->cbufs[0])), |
||
382 | util_format_short_name(pipe_surface_format(pfb->zsbuf))); |
||
383 | fd_hw_query_prepare(ctx, 1); |
||
384 | render_sysmem(ctx); |
||
385 | ctx->stats.batch_sysmem++; |
||
386 | } else { |
||
387 | struct fd_gmem_stateobj *gmem = &ctx->gmem; |
||
388 | calculate_tiles(ctx); |
||
389 | DBG("rendering %dx%d tiles (%s/%s)", gmem->nbins_x, gmem->nbins_y, |
||
390 | util_format_short_name(pipe_surface_format(pfb->cbufs[0])), |
||
391 | util_format_short_name(pipe_surface_format(pfb->zsbuf))); |
||
392 | fd_hw_query_prepare(ctx, gmem->nbins_x * gmem->nbins_y); |
||
393 | render_tiles(ctx); |
||
394 | ctx->stats.batch_gmem++; |
||
395 | } |
||
396 | |||
397 | /* GPU executes starting from tile cmds, which IB back to draw cmds: */ |
||
398 | fd_ringmarker_flush(ctx->draw_end); |
||
399 | |||
400 | /* mark start for next draw/binning cmds: */ |
||
401 | fd_ringmarker_mark(ctx->draw_start); |
||
402 | fd_ringmarker_mark(ctx->binning_start); |
||
403 | |||
404 | fd_reset_wfi(ctx); |
||
405 | |||
406 | /* reset maximal bounds: */ |
||
407 | ctx->max_scissor.minx = ctx->max_scissor.miny = ~0; |
||
408 | ctx->max_scissor.maxx = ctx->max_scissor.maxy = 0; |
||
409 | |||
410 | ctx->dirty = ~0; |
||
411 | } |
||
412 | |||
413 | /* tile needs restore if it isn't completely contained within the |
||
414 | * cleared scissor: |
||
415 | */ |
||
416 | static bool |
||
417 | skip_restore(struct pipe_scissor_state *scissor, struct fd_tile *tile) |
||
418 | { |
||
419 | unsigned minx = tile->xoff; |
||
420 | unsigned maxx = tile->xoff + tile->bin_w; |
||
421 | unsigned miny = tile->yoff; |
||
422 | unsigned maxy = tile->yoff + tile->bin_h; |
||
423 | return (minx >= scissor->minx) && (maxx <= scissor->maxx) && |
||
424 | (miny >= scissor->miny) && (maxy <= scissor->maxy); |
||
425 | } |
||
426 | |||
427 | /* When deciding whether a tile needs mem2gmem, we need to take into |
||
428 | * account the scissor rect(s) that were cleared. To simplify we only |
||
429 | * consider the last scissor rect for each buffer, since the common |
||
430 | * case would be a single clear. |
||
431 | */ |
||
432 | bool |
||
433 | fd_gmem_needs_restore(struct fd_context *ctx, struct fd_tile *tile, |
||
434 | uint32_t buffers) |
||
435 | { |
||
436 | if (!(ctx->restore & buffers)) |
||
437 | return false; |
||
438 | |||
439 | /* if buffers partially cleared, then slow-path to figure out |
||
440 | * if this particular tile needs restoring: |
||
441 | */ |
||
442 | if ((buffers & FD_BUFFER_COLOR) && |
||
443 | (ctx->partial_cleared & FD_BUFFER_COLOR) && |
||
444 | skip_restore(&ctx->cleared_scissor.color, tile)) |
||
445 | return false; |
||
446 | if ((buffers & FD_BUFFER_DEPTH) && |
||
447 | (ctx->partial_cleared & FD_BUFFER_DEPTH) && |
||
448 | skip_restore(&ctx->cleared_scissor.depth, tile)) |
||
449 | return false; |
||
450 | if ((buffers & FD_BUFFER_STENCIL) && |
||
451 | (ctx->partial_cleared & FD_BUFFER_STENCIL) && |
||
452 | skip_restore(&ctx->cleared_scissor.stencil, tile)) |
||
453 | return false; |
||
454 | |||
455 | return true; |
||
456 | }=>=>>>>>>>>>>>>--+ |