Rev 4358 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */ |
2 | |||
3 | /* |
||
4 | * Copyright (C) 2012 Rob Clark |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
7 | * copy of this software and associated documentation files (the "Software"), |
||
8 | * to deal in the Software without restriction, including without limitation |
||
9 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
10 | * and/or sell copies of the Software, and to permit persons to whom the |
||
11 | * Software is furnished to do so, subject to the following conditions: |
||
12 | * |
||
13 | * The above copyright notice and this permission notice (including the next |
||
14 | * paragraph) shall be included in all copies or substantial portions of the |
||
15 | * Software. |
||
16 | * |
||
17 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
18 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
19 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
20 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
21 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, |
||
22 | * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE |
||
23 | * SOFTWARE. |
||
24 | * |
||
25 | * Authors: |
||
26 | * Rob Clark |
||
27 | */ |
||
28 | |||
29 | #include "pipe/p_state.h" |
||
30 | #include "util/u_string.h" |
||
31 | #include "util/u_memory.h" |
||
32 | #include "util/u_inlines.h" |
||
33 | #include "util/u_format.h" |
||
34 | |||
35 | #include "freedreno_gmem.h" |
||
36 | #include "freedreno_context.h" |
||
37 | #include "freedreno_resource.h" |
||
38 | #include "freedreno_util.h" |
||
39 | |||
40 | /* |
||
41 | * GMEM is the small (ie. 256KiB for a200, 512KiB for a220, etc) tile buffer |
||
42 | * inside the GPU. All rendering happens to GMEM. Larger render targets |
||
43 | * are split into tiles that are small enough for the color (and depth and/or |
||
44 | * stencil, if enabled) buffers to fit within GMEM. Before rendering a tile, |
||
45 | * if there was not a clear invalidating the previous tile contents, we need |
||
46 | * to restore the previous tiles contents (system mem -> GMEM), and after all |
||
47 | * the draw calls, before moving to the next tile, we need to save the tile |
||
48 | * contents (GMEM -> system mem). |
||
49 | * |
||
50 | * The code in this file handles dealing with GMEM and tiling. |
||
51 | * |
||
52 | * The structure of the ringbuffer ends up being: |
||
53 | * |
||
54 | * +--<---<-- IB ---<---+---<---+---<---<---<--+ |
||
55 | * | | | | |
||
56 | * v ^ ^ ^ |
||
57 | * ------------------------------------------------------ |
||
58 | * | clear/draw cmds | Tile0 | Tile1 | .... | TileN | |
||
59 | * ------------------------------------------------------ |
||
60 | * ^ |
||
61 | * | |
||
62 | * address submitted in issueibcmds |
||
63 | * |
||
64 | * Where the per-tile section handles scissor setup, mem2gmem restore (if |
||
65 | * needed), IB to draw cmds earlier in the ringbuffer, and then gmem2mem |
||
66 | * resolve. |
||
67 | */ |
||
68 | |||
69 | static void |
||
70 | calculate_tiles(struct fd_context *ctx) |
||
71 | { |
||
72 | struct fd_gmem_stateobj *gmem = &ctx->gmem; |
||
73 | struct pipe_scissor_state *scissor = &ctx->max_scissor; |
||
4401 | Serge | 74 | struct pipe_framebuffer_state *pfb = &ctx->framebuffer; |
4358 | Serge | 75 | uint32_t gmem_size = ctx->screen->gmemsize_bytes; |
76 | uint32_t minx, miny, width, height; |
||
77 | uint32_t nbins_x = 1, nbins_y = 1; |
||
78 | uint32_t bin_w, bin_h; |
||
79 | uint32_t max_width = 992; |
||
4401 | Serge | 80 | uint32_t cpp = 4; |
4358 | Serge | 81 | |
4401 | Serge | 82 | if (pfb->cbufs[0]) |
83 | cpp = util_format_get_blocksize(pfb->cbufs[0]->format); |
||
84 | |||
4358 | Serge | 85 | if ((gmem->cpp == cpp) && |
86 | !memcmp(&gmem->scissor, scissor, sizeof(gmem->scissor))) { |
||
87 | /* everything is up-to-date */ |
||
88 | return; |
||
89 | } |
||
90 | |||
4401 | Serge | 91 | if (fd_mesa_debug & FD_DBG_DSCIS) { |
92 | minx = 0; |
||
93 | miny = 0; |
||
94 | width = pfb->width; |
||
95 | height = pfb->height; |
||
96 | } else { |
||
97 | minx = scissor->minx & ~31; /* round down to multiple of 32 */ |
||
98 | miny = scissor->miny & ~31; |
||
99 | width = scissor->maxx - minx; |
||
100 | height = scissor->maxy - miny; |
||
101 | } |
||
4358 | Serge | 102 | |
103 | // TODO we probably could optimize this a bit if we know that |
||
104 | // Z or stencil is not enabled for any of the draw calls.. |
||
105 | // if (fd_stencil_enabled(ctx->zsa) || fd_depth_enabled(ctx->zsa)) { |
||
106 | gmem_size /= 2; |
||
107 | max_width = 256; |
||
108 | // } |
||
109 | |||
110 | bin_w = align(width, 32); |
||
111 | bin_h = align(height, 32); |
||
112 | |||
113 | /* first, find a bin width that satisfies the maximum width |
||
114 | * restrictions: |
||
115 | */ |
||
116 | while (bin_w > max_width) { |
||
117 | nbins_x++; |
||
118 | bin_w = align(width / nbins_x, 32); |
||
119 | } |
||
120 | |||
121 | /* then find a bin height that satisfies the memory constraints: |
||
122 | */ |
||
123 | while ((bin_w * bin_h * cpp) > gmem_size) { |
||
124 | nbins_y++; |
||
125 | bin_h = align(height / nbins_y, 32); |
||
126 | } |
||
127 | |||
128 | DBG("using %d bins of size %dx%d", nbins_x*nbins_y, bin_w, bin_h); |
||
129 | |||
130 | gmem->scissor = *scissor; |
||
131 | gmem->cpp = cpp; |
||
132 | gmem->minx = minx; |
||
133 | gmem->miny = miny; |
||
134 | gmem->bin_h = bin_h; |
||
135 | gmem->bin_w = bin_w; |
||
136 | gmem->nbins_x = nbins_x; |
||
137 | gmem->nbins_y = nbins_y; |
||
138 | gmem->width = width; |
||
139 | gmem->height = height; |
||
140 | } |
||
141 | |||
142 | static void |
||
143 | render_tiles(struct fd_context *ctx) |
||
144 | { |
||
145 | struct fd_gmem_stateobj *gmem = &ctx->gmem; |
||
4401 | Serge | 146 | uint32_t i, yoff = gmem->miny; |
4358 | Serge | 147 | |
148 | ctx->emit_tile_init(ctx); |
||
149 | |||
150 | for (i = 0; i < gmem->nbins_y; i++) { |
||
151 | uint32_t j, xoff = gmem->minx; |
||
152 | uint32_t bh = gmem->bin_h; |
||
153 | |||
154 | /* clip bin height: */ |
||
4401 | Serge | 155 | bh = MIN2(bh, gmem->miny + gmem->height - yoff); |
4358 | Serge | 156 | |
157 | for (j = 0; j < gmem->nbins_x; j++) { |
||
158 | uint32_t bw = gmem->bin_w; |
||
159 | |||
160 | /* clip bin width: */ |
||
4401 | Serge | 161 | bw = MIN2(bw, gmem->minx + gmem->width - xoff); |
4358 | Serge | 162 | |
163 | DBG("bin_h=%d, yoff=%d, bin_w=%d, xoff=%d", |
||
164 | bh, yoff, bw, xoff); |
||
165 | |||
166 | ctx->emit_tile_prep(ctx, xoff, yoff, bw, bh); |
||
167 | |||
168 | if (ctx->restore) |
||
169 | ctx->emit_tile_mem2gmem(ctx, xoff, yoff, bw, bh); |
||
170 | |||
171 | ctx->emit_tile_renderprep(ctx, xoff, yoff, bw, bh); |
||
172 | |||
173 | /* emit IB to drawcmds: */ |
||
174 | OUT_IB(ctx->ring, ctx->draw_start, ctx->draw_end); |
||
175 | |||
176 | /* emit gmem2mem to transfer tile back to system memory: */ |
||
177 | ctx->emit_tile_gmem2mem(ctx, xoff, yoff, bw, bh); |
||
178 | |||
179 | xoff += bw; |
||
180 | } |
||
181 | |||
182 | yoff += bh; |
||
183 | } |
||
184 | } |
||
185 | |||
186 | static void |
||
187 | render_sysmem(struct fd_context *ctx) |
||
188 | { |
||
189 | ctx->emit_sysmem_prep(ctx); |
||
190 | |||
191 | /* emit IB to drawcmds: */ |
||
192 | OUT_IB(ctx->ring, ctx->draw_start, ctx->draw_end); |
||
193 | } |
||
194 | |||
195 | void |
||
196 | fd_gmem_render_tiles(struct pipe_context *pctx) |
||
197 | { |
||
198 | struct fd_context *ctx = fd_context(pctx); |
||
199 | struct pipe_framebuffer_state *pfb = &ctx->framebuffer; |
||
200 | uint32_t timestamp = 0; |
||
201 | bool sysmem = false; |
||
202 | |||
203 | if (ctx->emit_sysmem_prep) { |
||
204 | if (ctx->cleared || ctx->gmem_reason || (ctx->num_draws > 5)) { |
||
205 | DBG("GMEM: cleared=%x, gmem_reason=%x, num_draws=%u", |
||
206 | ctx->cleared, ctx->gmem_reason, ctx->num_draws); |
||
207 | } else { |
||
208 | sysmem = true; |
||
209 | } |
||
210 | } |
||
211 | |||
212 | /* mark the end of the clear/draw cmds before emitting per-tile cmds: */ |
||
213 | fd_ringmarker_mark(ctx->draw_end); |
||
214 | |||
215 | if (sysmem) { |
||
216 | DBG("rendering sysmem (%s/%s)", |
||
4401 | Serge | 217 | util_format_short_name(pipe_surface_format(pfb->cbufs[0])), |
218 | util_format_short_name(pipe_surface_format(pfb->zsbuf))); |
||
4358 | Serge | 219 | render_sysmem(ctx); |
220 | } else { |
||
221 | struct fd_gmem_stateobj *gmem = &ctx->gmem; |
||
4401 | Serge | 222 | calculate_tiles(ctx); |
4358 | Serge | 223 | DBG("rendering %dx%d tiles (%s/%s)", gmem->nbins_x, gmem->nbins_y, |
4401 | Serge | 224 | util_format_short_name(pipe_surface_format(pfb->cbufs[0])), |
225 | util_format_short_name(pipe_surface_format(pfb->zsbuf))); |
||
4358 | Serge | 226 | render_tiles(ctx); |
227 | } |
||
228 | |||
229 | /* GPU executes starting from tile cmds, which IB back to draw cmds: */ |
||
230 | fd_ringmarker_flush(ctx->draw_end); |
||
231 | |||
232 | /* mark start for next draw cmds: */ |
||
233 | fd_ringmarker_mark(ctx->draw_start); |
||
234 | |||
235 | /* update timestamps on render targets: */ |
||
236 | timestamp = fd_ringbuffer_timestamp(ctx->ring); |
||
4401 | Serge | 237 | if (pfb->cbufs[0]) |
238 | fd_resource(pfb->cbufs[0]->texture)->timestamp = timestamp; |
||
4358 | Serge | 239 | if (pfb->zsbuf) |
240 | fd_resource(pfb->zsbuf->texture)->timestamp = timestamp; |
||
241 | |||
242 | /* reset maximal bounds: */ |
||
243 | ctx->max_scissor.minx = ctx->max_scissor.miny = ~0; |
||
244 | ctx->max_scissor.maxx = ctx->max_scissor.maxy = 0; |
||
245 | |||
246 | /* Note that because the per-tile setup and mem2gmem/gmem2mem are emitted |
||
247 | * after the draw/clear calls, but executed before, we need to preemptively |
||
248 | * flag some state as dirty before the first draw/clear call. |
||
249 | * |
||
250 | * TODO maybe we need to mark all state as dirty to not worry about state |
||
251 | * being clobbered by other contexts? |
||
252 | */ |
||
253 | ctx->dirty |= FD_DIRTY_ZSA | |
||
254 | FD_DIRTY_RASTERIZER | |
||
255 | FD_DIRTY_FRAMEBUFFER | |
||
256 | FD_DIRTY_SAMPLE_MASK | |
||
257 | FD_DIRTY_VIEWPORT | |
||
258 | FD_DIRTY_CONSTBUF | |
||
259 | FD_DIRTY_PROG | |
||
260 | FD_DIRTY_SCISSOR | |
||
261 | /* probably only needed if we need to mem2gmem on the next |
||
262 | * draw.. but not sure if there is a good way to know? |
||
263 | */ |
||
264 | FD_DIRTY_VERTTEX | |
||
265 | FD_DIRTY_FRAGTEX | |
||
266 | FD_DIRTY_BLEND; |
||
267 | |||
268 | if (fd_mesa_debug & FD_DBG_DGMEM) |
||
269 | ctx->dirty = 0xffffffff; |
||
270 | }>>--+ |