Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | /* |
2 | Copyright (C) Intel Corp. 2006. All Rights Reserved. |
||
3 | Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to |
||
4 | develop this 3D driver. |
||
5 | |||
6 | Permission is hereby granted, free of charge, to any person obtaining |
||
7 | a copy of this software and associated documentation files (the |
||
8 | "Software"), to deal in the Software without restriction, including |
||
9 | without limitation the rights to use, copy, modify, merge, publish, |
||
10 | distribute, sublicense, and/or sell copies of the Software, and to |
||
11 | permit persons to whom the Software is furnished to do so, subject to |
||
12 | the following conditions: |
||
13 | |||
14 | The above copyright notice and this permission notice (including the |
||
15 | next paragraph) shall be included in all copies or substantial |
||
16 | portions of the Software. |
||
17 | |||
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||
19 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
20 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||
21 | IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
||
22 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
||
23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
||
24 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
25 | |||
26 | **********************************************************************/ |
||
27 | /* |
||
28 | * Authors: |
||
29 | * Keith Whitwell |
||
30 | */ |
||
31 | |||
32 | |||
33 | |||
34 | #include "intel_batchbuffer.h" |
||
35 | #include "intel_fbo.h" |
||
36 | #include "intel_mipmap_tree.h" |
||
37 | #include "intel_regions.h" |
||
38 | |||
39 | #include "brw_context.h" |
||
40 | #include "brw_state.h" |
||
41 | #include "brw_defines.h" |
||
42 | |||
43 | #include "main/fbobject.h" |
||
44 | #include "main/glformats.h" |
||
45 | |||
46 | /* Constant single cliprect for framebuffer object or DRI2 drawing */ |
||
47 | static void upload_drawing_rect(struct brw_context *brw) |
||
48 | { |
||
49 | struct gl_context *ctx = &brw->ctx; |
||
50 | |||
51 | BEGIN_BATCH(4); |
||
52 | OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); |
||
53 | OUT_BATCH(0); /* xmin, ymin */ |
||
54 | OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) | |
||
55 | ((ctx->DrawBuffer->Height - 1) << 16)); |
||
56 | OUT_BATCH(0); |
||
57 | ADVANCE_BATCH(); |
||
58 | } |
||
59 | |||
60 | const struct brw_tracked_state brw_drawing_rect = { |
||
61 | .dirty = { |
||
62 | .mesa = _NEW_BUFFERS, |
||
63 | .brw = BRW_NEW_CONTEXT, |
||
64 | .cache = 0 |
||
65 | }, |
||
66 | .emit = upload_drawing_rect |
||
67 | }; |
||
68 | |||
69 | /** |
||
70 | * Upload the binding table pointers, which point each stage's array of surface |
||
71 | * state pointers. |
||
72 | * |
||
73 | * The binding table pointers are relative to the surface state base address, |
||
74 | * which points at the batchbuffer containing the streamed batch state. |
||
75 | */ |
||
76 | static void upload_binding_table_pointers(struct brw_context *brw) |
||
77 | { |
||
78 | BEGIN_BATCH(6); |
||
79 | OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2)); |
||
80 | OUT_BATCH(brw->vs.bind_bo_offset); |
||
81 | OUT_BATCH(0); /* gs */ |
||
82 | OUT_BATCH(0); /* clip */ |
||
83 | OUT_BATCH(0); /* sf */ |
||
84 | OUT_BATCH(brw->wm.bind_bo_offset); |
||
85 | ADVANCE_BATCH(); |
||
86 | } |
||
87 | |||
88 | const struct brw_tracked_state brw_binding_table_pointers = { |
||
89 | .dirty = { |
||
90 | .mesa = 0, |
||
91 | .brw = (BRW_NEW_BATCH | |
||
92 | BRW_NEW_STATE_BASE_ADDRESS | |
||
93 | BRW_NEW_VS_BINDING_TABLE | |
||
94 | BRW_NEW_GS_BINDING_TABLE | |
||
95 | BRW_NEW_PS_BINDING_TABLE), |
||
96 | .cache = 0, |
||
97 | }, |
||
98 | .emit = upload_binding_table_pointers, |
||
99 | }; |
||
100 | |||
101 | /** |
||
102 | * Upload the binding table pointers, which point each stage's array of surface |
||
103 | * state pointers. |
||
104 | * |
||
105 | * The binding table pointers are relative to the surface state base address, |
||
106 | * which points at the batchbuffer containing the streamed batch state. |
||
107 | */ |
||
108 | static void upload_gen6_binding_table_pointers(struct brw_context *brw) |
||
109 | { |
||
110 | BEGIN_BATCH(4); |
||
111 | OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | |
||
112 | GEN6_BINDING_TABLE_MODIFY_VS | |
||
113 | GEN6_BINDING_TABLE_MODIFY_GS | |
||
114 | GEN6_BINDING_TABLE_MODIFY_PS | |
||
115 | (4 - 2)); |
||
116 | OUT_BATCH(brw->vs.bind_bo_offset); /* vs */ |
||
117 | OUT_BATCH(brw->gs.bind_bo_offset); /* gs */ |
||
118 | OUT_BATCH(brw->wm.bind_bo_offset); /* wm/ps */ |
||
119 | ADVANCE_BATCH(); |
||
120 | } |
||
121 | |||
122 | const struct brw_tracked_state gen6_binding_table_pointers = { |
||
123 | .dirty = { |
||
124 | .mesa = 0, |
||
125 | .brw = (BRW_NEW_BATCH | |
||
126 | BRW_NEW_STATE_BASE_ADDRESS | |
||
127 | BRW_NEW_VS_BINDING_TABLE | |
||
128 | BRW_NEW_GS_BINDING_TABLE | |
||
129 | BRW_NEW_PS_BINDING_TABLE), |
||
130 | .cache = 0, |
||
131 | }, |
||
132 | .emit = upload_gen6_binding_table_pointers, |
||
133 | }; |
||
134 | |||
135 | /** |
||
136 | * Upload pointers to the per-stage state. |
||
137 | * |
||
138 | * The state pointers in this packet are all relative to the general state |
||
139 | * base address set by CMD_STATE_BASE_ADDRESS, which is 0. |
||
140 | */ |
||
141 | static void upload_pipelined_state_pointers(struct brw_context *brw ) |
||
142 | { |
||
143 | if (brw->gen == 5) { |
||
144 | /* Need to flush before changing clip max threads for errata. */ |
||
145 | BEGIN_BATCH(1); |
||
146 | OUT_BATCH(MI_FLUSH); |
||
147 | ADVANCE_BATCH(); |
||
148 | } |
||
149 | |||
150 | BEGIN_BATCH(7); |
||
151 | OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2)); |
||
152 | OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, |
||
153 | brw->vs.state_offset); |
||
154 | if (brw->gs.prog_active) |
||
155 | OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, |
||
156 | brw->gs.state_offset | 1); |
||
157 | else |
||
158 | OUT_BATCH(0); |
||
159 | OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, |
||
160 | brw->clip.state_offset | 1); |
||
161 | OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, |
||
162 | brw->sf.state_offset); |
||
163 | OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, |
||
164 | brw->wm.state_offset); |
||
165 | OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, |
||
166 | brw->cc.state_offset); |
||
167 | ADVANCE_BATCH(); |
||
168 | |||
169 | brw->state.dirty.brw |= BRW_NEW_PSP; |
||
170 | } |
||
171 | |||
172 | static void upload_psp_urb_cbs(struct brw_context *brw ) |
||
173 | { |
||
174 | upload_pipelined_state_pointers(brw); |
||
175 | brw_upload_urb_fence(brw); |
||
176 | brw_upload_cs_urb_state(brw); |
||
177 | } |
||
178 | |||
179 | const struct brw_tracked_state brw_psp_urb_cbs = { |
||
180 | .dirty = { |
||
181 | .mesa = 0, |
||
182 | .brw = (BRW_NEW_URB_FENCE | |
||
183 | BRW_NEW_BATCH | |
||
184 | BRW_NEW_STATE_BASE_ADDRESS), |
||
185 | .cache = (CACHE_NEW_VS_UNIT | |
||
186 | CACHE_NEW_GS_UNIT | |
||
187 | CACHE_NEW_GS_PROG | |
||
188 | CACHE_NEW_CLIP_UNIT | |
||
189 | CACHE_NEW_SF_UNIT | |
||
190 | CACHE_NEW_WM_UNIT | |
||
191 | CACHE_NEW_CC_UNIT) |
||
192 | }, |
||
193 | .emit = upload_psp_urb_cbs, |
||
194 | }; |
||
195 | |||
196 | uint32_t |
||
197 | brw_depthbuffer_format(struct brw_context *brw) |
||
198 | { |
||
199 | struct gl_context *ctx = &brw->ctx; |
||
200 | struct gl_framebuffer *fb = ctx->DrawBuffer; |
||
201 | struct intel_renderbuffer *drb = intel_get_renderbuffer(fb, BUFFER_DEPTH); |
||
202 | struct intel_renderbuffer *srb; |
||
203 | |||
204 | if (!drb && |
||
205 | (srb = intel_get_renderbuffer(fb, BUFFER_STENCIL)) && |
||
206 | !srb->mt->stencil_mt && |
||
207 | (intel_rb_format(srb) == MESA_FORMAT_S8_Z24 || |
||
208 | intel_rb_format(srb) == MESA_FORMAT_Z32_FLOAT_X24S8)) { |
||
209 | drb = srb; |
||
210 | } |
||
211 | |||
212 | if (!drb) |
||
213 | return BRW_DEPTHFORMAT_D32_FLOAT; |
||
214 | |||
215 | switch (drb->mt->format) { |
||
216 | case MESA_FORMAT_Z16: |
||
217 | return BRW_DEPTHFORMAT_D16_UNORM; |
||
218 | case MESA_FORMAT_Z32_FLOAT: |
||
219 | return BRW_DEPTHFORMAT_D32_FLOAT; |
||
220 | case MESA_FORMAT_X8_Z24: |
||
221 | if (brw->gen >= 6) { |
||
222 | return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT; |
||
223 | } else { |
||
224 | /* Use D24_UNORM_S8, not D24_UNORM_X8. |
||
225 | * |
||
226 | * D24_UNORM_X8 was not introduced until Gen5. (See the Ironlake PRM, |
||
227 | * Volume 2, Part 1, Section 8.4.6 "Depth/Stencil Buffer State", Bits |
||
228 | * 3DSTATE_DEPTH_BUFFER.Surface_Format). |
||
229 | * |
||
230 | * However, on Gen5, D24_UNORM_X8 may be used only if separate |
||
231 | * stencil is enabled, and we never enable it. From the Ironlake PRM, |
||
232 | * same section as above, Bit 3DSTATE_DEPTH_BUFFER.Separate_Stencil_Buffer_Enable: |
||
233 | * If this field is disabled, the Surface Format of the depth |
||
234 | * buffer cannot be D24_UNORM_X8_UINT. |
||
235 | */ |
||
236 | return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; |
||
237 | } |
||
238 | case MESA_FORMAT_S8_Z24: |
||
239 | return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; |
||
240 | case MESA_FORMAT_Z32_FLOAT_X24S8: |
||
241 | return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT; |
||
242 | default: |
||
243 | _mesa_problem(ctx, "Unexpected depth format %s\n", |
||
244 | _mesa_get_format_name(intel_rb_format(drb))); |
||
245 | return BRW_DEPTHFORMAT_D16_UNORM; |
||
246 | } |
||
247 | } |
||
248 | |||
249 | /** |
||
250 | * Returns the mask of how many bits of x and y must be handled through the |
||
251 | * depthbuffer's draw offset x and y fields. |
||
252 | * |
||
253 | * The draw offset x/y field of the depthbuffer packet is unfortunately shared |
||
254 | * between the depth, hiz, and stencil buffers. Because it can be hard to get |
||
255 | * all 3 to agree on this value, we want to do as much drawing offset |
||
256 | * adjustment as possible by moving the base offset of the 3 buffers, which is |
||
257 | * restricted to tile boundaries. |
||
258 | * |
||
259 | * For each buffer, the remainder must be applied through the x/y draw offset. |
||
260 | * This returns the worst-case mask of the low bits that have to go into the |
||
261 | * packet. If the 3 buffers don't agree on the drawing offset ANDed with this |
||
262 | * mask, then we're in trouble. |
||
263 | */ |
||
264 | void |
||
265 | brw_get_depthstencil_tile_masks(struct intel_mipmap_tree *depth_mt, |
||
266 | uint32_t depth_level, |
||
267 | uint32_t depth_layer, |
||
268 | struct intel_mipmap_tree *stencil_mt, |
||
269 | uint32_t *out_tile_mask_x, |
||
270 | uint32_t *out_tile_mask_y) |
||
271 | { |
||
272 | uint32_t tile_mask_x = 0, tile_mask_y = 0; |
||
273 | |||
274 | if (depth_mt) { |
||
275 | intel_region_get_tile_masks(depth_mt->region, |
||
276 | &tile_mask_x, &tile_mask_y, false); |
||
277 | |||
278 | if (intel_miptree_slice_has_hiz(depth_mt, depth_level, depth_layer)) { |
||
279 | uint32_t hiz_tile_mask_x, hiz_tile_mask_y; |
||
280 | intel_region_get_tile_masks(depth_mt->hiz_mt->region, |
||
281 | &hiz_tile_mask_x, &hiz_tile_mask_y, false); |
||
282 | |||
283 | /* Each HiZ row represents 2 rows of pixels */ |
||
284 | hiz_tile_mask_y = hiz_tile_mask_y << 1 | 1; |
||
285 | |||
286 | tile_mask_x |= hiz_tile_mask_x; |
||
287 | tile_mask_y |= hiz_tile_mask_y; |
||
288 | } |
||
289 | } |
||
290 | |||
291 | if (stencil_mt) { |
||
292 | if (stencil_mt->stencil_mt) |
||
293 | stencil_mt = stencil_mt->stencil_mt; |
||
294 | |||
295 | if (stencil_mt->format == MESA_FORMAT_S8) { |
||
296 | /* Separate stencil buffer uses 64x64 tiles. */ |
||
297 | tile_mask_x |= 63; |
||
298 | tile_mask_y |= 63; |
||
299 | } else { |
||
300 | uint32_t stencil_tile_mask_x, stencil_tile_mask_y; |
||
301 | intel_region_get_tile_masks(stencil_mt->region, |
||
302 | &stencil_tile_mask_x, |
||
303 | &stencil_tile_mask_y, false); |
||
304 | |||
305 | tile_mask_x |= stencil_tile_mask_x; |
||
306 | tile_mask_y |= stencil_tile_mask_y; |
||
307 | } |
||
308 | } |
||
309 | |||
310 | *out_tile_mask_x = tile_mask_x; |
||
311 | *out_tile_mask_y = tile_mask_y; |
||
312 | } |
||
313 | |||
314 | static struct intel_mipmap_tree * |
||
315 | get_stencil_miptree(struct intel_renderbuffer *irb) |
||
316 | { |
||
317 | if (!irb) |
||
318 | return NULL; |
||
319 | if (irb->mt->stencil_mt) |
||
320 | return irb->mt->stencil_mt; |
||
321 | return irb->mt; |
||
322 | } |
||
323 | |||
324 | void |
||
325 | brw_workaround_depthstencil_alignment(struct brw_context *brw, |
||
326 | GLbitfield clear_mask) |
||
327 | { |
||
328 | struct gl_context *ctx = &brw->ctx; |
||
329 | struct gl_framebuffer *fb = ctx->DrawBuffer; |
||
330 | bool rebase_depth = false; |
||
331 | bool rebase_stencil = false; |
||
332 | struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH); |
||
333 | struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL); |
||
334 | struct intel_mipmap_tree *depth_mt = NULL; |
||
335 | struct intel_mipmap_tree *stencil_mt = get_stencil_miptree(stencil_irb); |
||
336 | uint32_t tile_x = 0, tile_y = 0, stencil_tile_x = 0, stencil_tile_y = 0; |
||
337 | uint32_t stencil_draw_x = 0, stencil_draw_y = 0; |
||
338 | bool invalidate_depth = clear_mask & BUFFER_BIT_DEPTH; |
||
339 | bool invalidate_stencil = clear_mask & BUFFER_BIT_STENCIL; |
||
340 | |||
341 | if (depth_irb) |
||
342 | depth_mt = depth_irb->mt; |
||
343 | |||
344 | /* Check if depth buffer is in depth/stencil format. If so, then it's only |
||
345 | * safe to invalidate it if we're also clearing stencil, and both depth_irb |
||
346 | * and stencil_irb point to the same miptree. |
||
347 | * |
||
348 | * Note: it's not sufficient to check for the case where |
||
349 | * _mesa_get_format_base_format(depth_mt->format) == GL_DEPTH_STENCIL, |
||
350 | * because this fails to catch depth/stencil buffers on hardware that uses |
||
351 | * separate stencil. To catch that case, we check whether |
||
352 | * depth_mt->stencil_mt is non-NULL. |
||
353 | */ |
||
354 | if (depth_irb && invalidate_depth && |
||
355 | (_mesa_get_format_base_format(depth_mt->format) == GL_DEPTH_STENCIL || |
||
356 | depth_mt->stencil_mt)) { |
||
357 | invalidate_depth = invalidate_stencil && depth_irb && stencil_irb |
||
358 | && depth_irb->mt == stencil_irb->mt; |
||
359 | } |
||
360 | |||
361 | uint32_t tile_mask_x, tile_mask_y; |
||
362 | brw_get_depthstencil_tile_masks(depth_mt, |
||
363 | depth_mt ? depth_irb->mt_level : 0, |
||
364 | depth_mt ? depth_irb->mt_layer : 0, |
||
365 | stencil_mt, |
||
366 | &tile_mask_x, &tile_mask_y); |
||
367 | |||
368 | if (depth_irb) { |
||
369 | tile_x = depth_irb->draw_x & tile_mask_x; |
||
370 | tile_y = depth_irb->draw_y & tile_mask_y; |
||
371 | |||
372 | /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327 |
||
373 | * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth |
||
374 | * Coordinate Offset X/Y": |
||
375 | * |
||
376 | * "The 3 LSBs of both offsets must be zero to ensure correct |
||
377 | * alignment" |
||
378 | */ |
||
379 | if (tile_x & 7 || tile_y & 7) |
||
380 | rebase_depth = true; |
||
381 | |||
382 | /* We didn't even have intra-tile offsets before g45. */ |
||
383 | if (brw->gen == 4 && !brw->is_g4x) { |
||
384 | if (tile_x || tile_y) |
||
385 | rebase_depth = true; |
||
386 | } |
||
387 | |||
388 | if (rebase_depth) { |
||
389 | perf_debug("HW workaround: blitting depth level %d to a temporary " |
||
390 | "to fix alignment (depth tile offset %d,%d)\n", |
||
391 | depth_irb->mt_level, tile_x, tile_y); |
||
392 | intel_renderbuffer_move_to_temp(brw, depth_irb, invalidate_depth); |
||
393 | /* In the case of stencil_irb being the same packed depth/stencil |
||
394 | * texture but not the same rb, make it point at our rebased mt, too. |
||
395 | */ |
||
396 | if (stencil_irb && |
||
397 | stencil_irb != depth_irb && |
||
398 | stencil_irb->mt == depth_mt) { |
||
399 | intel_miptree_reference(&stencil_irb->mt, depth_irb->mt); |
||
400 | intel_renderbuffer_set_draw_offset(stencil_irb); |
||
401 | } |
||
402 | |||
403 | stencil_mt = get_stencil_miptree(stencil_irb); |
||
404 | |||
405 | tile_x = depth_irb->draw_x & tile_mask_x; |
||
406 | tile_y = depth_irb->draw_y & tile_mask_y; |
||
407 | } |
||
408 | |||
409 | if (stencil_irb) { |
||
410 | stencil_mt = get_stencil_miptree(stencil_irb); |
||
411 | intel_miptree_get_image_offset(stencil_mt, |
||
412 | stencil_irb->mt_level, |
||
413 | stencil_irb->mt_layer, |
||
414 | &stencil_draw_x, &stencil_draw_y); |
||
415 | int stencil_tile_x = stencil_draw_x & tile_mask_x; |
||
416 | int stencil_tile_y = stencil_draw_y & tile_mask_y; |
||
417 | |||
418 | /* If stencil doesn't match depth, then we'll need to rebase stencil |
||
419 | * as well. (if we hadn't decided to rebase stencil before, the |
||
420 | * post-stencil depth test will also rebase depth to try to match it |
||
421 | * up). |
||
422 | */ |
||
423 | if (tile_x != stencil_tile_x || |
||
424 | tile_y != stencil_tile_y) { |
||
425 | rebase_stencil = true; |
||
426 | } |
||
427 | } |
||
428 | } |
||
429 | |||
430 | /* If we have (just) stencil, check it for ignored low bits as well */ |
||
431 | if (stencil_irb) { |
||
432 | intel_miptree_get_image_offset(stencil_mt, |
||
433 | stencil_irb->mt_level, |
||
434 | stencil_irb->mt_layer, |
||
435 | &stencil_draw_x, &stencil_draw_y); |
||
436 | stencil_tile_x = stencil_draw_x & tile_mask_x; |
||
437 | stencil_tile_y = stencil_draw_y & tile_mask_y; |
||
438 | |||
439 | if (stencil_tile_x & 7 || stencil_tile_y & 7) |
||
440 | rebase_stencil = true; |
||
441 | |||
442 | if (brw->gen == 4 && !brw->is_g4x) { |
||
443 | if (stencil_tile_x || stencil_tile_y) |
||
444 | rebase_stencil = true; |
||
445 | } |
||
446 | } |
||
447 | |||
448 | if (rebase_stencil) { |
||
449 | perf_debug("HW workaround: blitting stencil level %d to a temporary " |
||
450 | "to fix alignment (stencil tile offset %d,%d)\n", |
||
451 | stencil_irb->mt_level, stencil_tile_x, stencil_tile_y); |
||
452 | |||
453 | intel_renderbuffer_move_to_temp(brw, stencil_irb, invalidate_stencil); |
||
454 | stencil_mt = get_stencil_miptree(stencil_irb); |
||
455 | |||
456 | intel_miptree_get_image_offset(stencil_mt, |
||
457 | stencil_irb->mt_level, |
||
458 | stencil_irb->mt_layer, |
||
459 | &stencil_draw_x, &stencil_draw_y); |
||
460 | stencil_tile_x = stencil_draw_x & tile_mask_x; |
||
461 | stencil_tile_y = stencil_draw_y & tile_mask_y; |
||
462 | |||
463 | if (depth_irb && depth_irb->mt == stencil_irb->mt) { |
||
464 | intel_miptree_reference(&depth_irb->mt, stencil_irb->mt); |
||
465 | intel_renderbuffer_set_draw_offset(depth_irb); |
||
466 | } else if (depth_irb && !rebase_depth) { |
||
467 | if (tile_x != stencil_tile_x || |
||
468 | tile_y != stencil_tile_y) { |
||
469 | perf_debug("HW workaround: blitting depth level %d to a temporary " |
||
470 | "to match stencil level %d alignment (depth tile offset " |
||
471 | "%d,%d, stencil offset %d,%d)\n", |
||
472 | depth_irb->mt_level, |
||
473 | stencil_irb->mt_level, |
||
474 | tile_x, tile_y, |
||
475 | stencil_tile_x, stencil_tile_y); |
||
476 | |||
477 | intel_renderbuffer_move_to_temp(brw, depth_irb, invalidate_depth); |
||
478 | |||
479 | tile_x = depth_irb->draw_x & tile_mask_x; |
||
480 | tile_y = depth_irb->draw_y & tile_mask_y; |
||
481 | |||
482 | if (stencil_irb && stencil_irb->mt == depth_mt) { |
||
483 | intel_miptree_reference(&stencil_irb->mt, depth_irb->mt); |
||
484 | intel_renderbuffer_set_draw_offset(stencil_irb); |
||
485 | } |
||
486 | |||
487 | WARN_ONCE(stencil_tile_x != tile_x || |
||
488 | stencil_tile_y != tile_y, |
||
489 | "Rebased stencil tile offset (%d,%d) doesn't match depth " |
||
490 | "tile offset (%d,%d).\n", |
||
491 | stencil_tile_x, stencil_tile_y, |
||
492 | tile_x, tile_y); |
||
493 | } |
||
494 | } |
||
495 | } |
||
496 | |||
497 | if (!depth_irb) { |
||
498 | tile_x = stencil_tile_x; |
||
499 | tile_y = stencil_tile_y; |
||
500 | } |
||
501 | |||
502 | /* While we just tried to get everything aligned, we may have failed to do |
||
503 | * so in the case of rendering to array or 3D textures, where nonzero faces |
||
504 | * will still have an offset post-rebase. At least give an informative |
||
505 | * warning. |
||
506 | */ |
||
507 | WARN_ONCE((tile_x & 7) || (tile_y & 7), |
||
508 | "Depth/stencil buffer needs alignment to 8-pixel boundaries.\n" |
||
509 | "Truncating offset, bad rendering may occur.\n"); |
||
510 | tile_x &= ~7; |
||
511 | tile_y &= ~7; |
||
512 | |||
513 | /* Now, after rebasing, save off the new dephtstencil state so the hardware |
||
514 | * packets can just dereference that without re-calculating tile offsets. |
||
515 | */ |
||
516 | brw->depthstencil.tile_x = tile_x; |
||
517 | brw->depthstencil.tile_y = tile_y; |
||
518 | brw->depthstencil.depth_offset = 0; |
||
519 | brw->depthstencil.stencil_offset = 0; |
||
520 | brw->depthstencil.hiz_offset = 0; |
||
521 | brw->depthstencil.depth_mt = NULL; |
||
522 | brw->depthstencil.stencil_mt = NULL; |
||
523 | if (depth_irb) { |
||
524 | depth_mt = depth_irb->mt; |
||
525 | brw->depthstencil.depth_mt = depth_mt; |
||
526 | brw->depthstencil.depth_offset = |
||
527 | intel_region_get_aligned_offset(depth_mt->region, |
||
528 | depth_irb->draw_x & ~tile_mask_x, |
||
529 | depth_irb->draw_y & ~tile_mask_y, |
||
530 | false); |
||
531 | if (intel_renderbuffer_has_hiz(depth_irb)) { |
||
532 | brw->depthstencil.hiz_offset = |
||
533 | intel_region_get_aligned_offset(depth_mt->region, |
||
534 | depth_irb->draw_x & ~tile_mask_x, |
||
535 | (depth_irb->draw_y & ~tile_mask_y) / |
||
536 | 2, |
||
537 | false); |
||
538 | } |
||
539 | } |
||
540 | if (stencil_irb) { |
||
541 | stencil_mt = get_stencil_miptree(stencil_irb); |
||
542 | |||
543 | brw->depthstencil.stencil_mt = stencil_mt; |
||
544 | if (stencil_mt->format == MESA_FORMAT_S8) { |
||
545 | /* Note: we can't compute the stencil offset using |
||
546 | * intel_region_get_aligned_offset(), because stencil_region claims |
||
547 | * that the region is untiled even though it's W tiled. |
||
548 | */ |
||
549 | brw->depthstencil.stencil_offset = |
||
550 | (stencil_draw_y & ~tile_mask_y) * stencil_mt->region->pitch + |
||
551 | (stencil_draw_x & ~tile_mask_x) * 64; |
||
552 | } |
||
553 | } |
||
554 | } |
||
555 | |||
556 | void |
||
557 | brw_emit_depthbuffer(struct brw_context *brw) |
||
558 | { |
||
559 | struct gl_context *ctx = &brw->ctx; |
||
560 | struct gl_framebuffer *fb = ctx->DrawBuffer; |
||
561 | /* _NEW_BUFFERS */ |
||
562 | struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH); |
||
563 | struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL); |
||
564 | struct intel_mipmap_tree *depth_mt = brw->depthstencil.depth_mt; |
||
565 | struct intel_mipmap_tree *stencil_mt = brw->depthstencil.stencil_mt; |
||
566 | uint32_t tile_x = brw->depthstencil.tile_x; |
||
567 | uint32_t tile_y = brw->depthstencil.tile_y; |
||
568 | bool hiz = depth_irb && intel_renderbuffer_has_hiz(depth_irb); |
||
569 | bool separate_stencil = false; |
||
570 | uint32_t depth_surface_type = BRW_SURFACE_NULL; |
||
571 | uint32_t depthbuffer_format = BRW_DEPTHFORMAT_D32_FLOAT; |
||
572 | uint32_t depth_offset = 0; |
||
573 | uint32_t width = 1, height = 1; |
||
574 | |||
575 | if (stencil_mt) { |
||
576 | separate_stencil = stencil_mt->format == MESA_FORMAT_S8; |
||
577 | |||
578 | /* Gen7 supports only separate stencil */ |
||
579 | assert(separate_stencil || brw->gen < 7); |
||
580 | } |
||
581 | |||
582 | /* If there's a packed depth/stencil bound to stencil only, we need to |
||
583 | * emit the packed depth/stencil buffer packet. |
||
584 | */ |
||
585 | if (!depth_irb && stencil_irb && !separate_stencil) { |
||
586 | depth_irb = stencil_irb; |
||
587 | depth_mt = stencil_mt; |
||
588 | } |
||
589 | |||
590 | if (depth_irb && depth_mt) { |
||
591 | /* When 3DSTATE_DEPTH_BUFFER.Separate_Stencil_Enable is set, then |
||
592 | * 3DSTATE_DEPTH_BUFFER.Surface_Format is not permitted to be a packed |
||
593 | * depthstencil format. |
||
594 | * |
||
595 | * Gens prior to 7 require that HiZ_Enable and Separate_Stencil_Enable be |
||
596 | * set to the same value. Gens after 7 implicitly always set |
||
597 | * Separate_Stencil_Enable; software cannot disable it. |
||
598 | */ |
||
599 | if ((brw->gen < 7 && hiz) || brw->gen >= 7) { |
||
600 | assert(!_mesa_is_format_packed_depth_stencil(depth_mt->format)); |
||
601 | } |
||
602 | |||
603 | /* Prior to Gen7, if using separate stencil, hiz must be enabled. */ |
||
604 | assert(brw->gen >= 7 || !separate_stencil || hiz); |
||
605 | |||
606 | assert(brw->gen < 6 || depth_mt->region->tiling == I915_TILING_Y); |
||
607 | assert(!hiz || depth_mt->region->tiling == I915_TILING_Y); |
||
608 | |||
609 | depthbuffer_format = brw_depthbuffer_format(brw); |
||
610 | depth_surface_type = BRW_SURFACE_2D; |
||
611 | depth_offset = brw->depthstencil.depth_offset; |
||
612 | width = depth_irb->Base.Base.Width; |
||
613 | height = depth_irb->Base.Base.Height; |
||
614 | } else if (separate_stencil) { |
||
615 | /* |
||
616 | * There exists a separate stencil buffer but no depth buffer. |
||
617 | * |
||
618 | * The stencil buffer inherits most of its fields from |
||
619 | * 3DSTATE_DEPTH_BUFFER: namely the tile walk, surface type, width, and |
||
620 | * height. |
||
621 | * |
||
622 | * The tiled bit must be set. From the Sandybridge PRM, Volume 2, Part 1, |
||
623 | * Section 7.5.5.1.1 3DSTATE_DEPTH_BUFFER, Bit 1.27 Tiled Surface: |
||
624 | * [DevGT+]: This field must be set to TRUE. |
||
625 | */ |
||
626 | assert(brw->has_separate_stencil); |
||
627 | |||
628 | depth_surface_type = BRW_SURFACE_2D; |
||
629 | width = stencil_irb->Base.Base.Width; |
||
630 | height = stencil_irb->Base.Base.Height; |
||
631 | } |
||
632 | |||
633 | brw->vtbl.emit_depth_stencil_hiz(brw, depth_mt, depth_offset, |
||
634 | depthbuffer_format, depth_surface_type, |
||
635 | stencil_mt, hiz, separate_stencil, |
||
636 | width, height, tile_x, tile_y); |
||
637 | } |
||
638 | |||
639 | void |
||
640 | brw_emit_depth_stencil_hiz(struct brw_context *brw, |
||
641 | struct intel_mipmap_tree *depth_mt, |
||
642 | uint32_t depth_offset, uint32_t depthbuffer_format, |
||
643 | uint32_t depth_surface_type, |
||
644 | struct intel_mipmap_tree *stencil_mt, |
||
645 | bool hiz, bool separate_stencil, |
||
646 | uint32_t width, uint32_t height, |
||
647 | uint32_t tile_x, uint32_t tile_y) |
||
648 | { |
||
649 | /* Enable the hiz bit if we're doing separate stencil, because it and the |
||
650 | * separate stencil bit must have the same value. From Section 2.11.5.6.1.1 |
||
651 | * 3DSTATE_DEPTH_BUFFER, Bit 1.21 "Separate Stencil Enable": |
||
652 | * [DevIL]: If this field is enabled, Hierarchical Depth Buffer |
||
653 | * Enable must also be enabled. |
||
654 | * |
||
655 | * [DevGT]: This field must be set to the same value (enabled or |
||
656 | * disabled) as Hierarchical Depth Buffer Enable |
||
657 | */ |
||
658 | bool enable_hiz_ss = hiz || separate_stencil; |
||
659 | |||
660 | |||
661 | /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both |
||
662 | * non-pipelined state that will need the PIPE_CONTROL workaround. |
||
663 | */ |
||
664 | if (brw->gen == 6) { |
||
665 | intel_emit_post_sync_nonzero_flush(brw); |
||
666 | intel_emit_depth_stall_flushes(brw); |
||
667 | } |
||
668 | |||
669 | unsigned int len; |
||
670 | if (brw->gen >= 6) |
||
671 | len = 7; |
||
672 | else if (brw->is_g4x || brw->gen == 5) |
||
673 | len = 6; |
||
674 | else |
||
675 | len = 5; |
||
676 | |||
677 | BEGIN_BATCH(len); |
||
678 | OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2)); |
||
679 | OUT_BATCH((depth_mt ? depth_mt->region->pitch - 1 : 0) | |
||
680 | (depthbuffer_format << 18) | |
||
681 | ((enable_hiz_ss ? 1 : 0) << 21) | /* separate stencil enable */ |
||
682 | ((enable_hiz_ss ? 1 : 0) << 22) | /* hiz enable */ |
||
683 | (BRW_TILEWALK_YMAJOR << 26) | |
||
684 | ((depth_mt ? depth_mt->region->tiling != I915_TILING_NONE : 1) |
||
685 | << 27) | |
||
686 | (depth_surface_type << 29)); |
||
687 | |||
688 | if (depth_mt) { |
||
689 | OUT_RELOC(depth_mt->region->bo, |
||
690 | I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, |
||
691 | depth_offset); |
||
692 | } else { |
||
693 | OUT_BATCH(0); |
||
694 | } |
||
695 | |||
696 | OUT_BATCH(((width + tile_x - 1) << 6) | |
||
697 | ((height + tile_y - 1) << 19)); |
||
698 | OUT_BATCH(0); |
||
699 | |||
700 | if (brw->is_g4x || brw->gen >= 5) |
||
701 | OUT_BATCH(tile_x | (tile_y << 16)); |
||
702 | else |
||
703 | assert(tile_x == 0 && tile_y == 0); |
||
704 | |||
705 | if (brw->gen >= 6) |
||
706 | OUT_BATCH(0); |
||
707 | |||
708 | ADVANCE_BATCH(); |
||
709 | |||
710 | if (hiz || separate_stencil) { |
||
711 | /* |
||
712 | * In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate |
||
713 | * stencil enable' and 'hiz enable' bits were set. Therefore we must |
||
714 | * emit 3DSTATE_HIER_DEPTH_BUFFER and 3DSTATE_STENCIL_BUFFER. Even if |
||
715 | * there is no stencil buffer, 3DSTATE_STENCIL_BUFFER must be emitted; |
||
716 | * failure to do so causes hangs on gen5 and a stall on gen6. |
||
717 | */ |
||
718 | |||
719 | /* Emit hiz buffer. */ |
||
720 | if (hiz) { |
||
721 | struct intel_mipmap_tree *hiz_mt = depth_mt->hiz_mt; |
||
722 | BEGIN_BATCH(3); |
||
723 | OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); |
||
724 | OUT_BATCH(hiz_mt->region->pitch - 1); |
||
725 | OUT_RELOC(hiz_mt->region->bo, |
||
726 | I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, |
||
727 | brw->depthstencil.hiz_offset); |
||
728 | ADVANCE_BATCH(); |
||
729 | } else { |
||
730 | BEGIN_BATCH(3); |
||
731 | OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); |
||
732 | OUT_BATCH(0); |
||
733 | OUT_BATCH(0); |
||
734 | ADVANCE_BATCH(); |
||
735 | } |
||
736 | |||
737 | /* Emit stencil buffer. */ |
||
738 | if (separate_stencil) { |
||
739 | struct intel_region *region = stencil_mt->region; |
||
740 | |||
741 | BEGIN_BATCH(3); |
||
742 | OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); |
||
743 | /* The stencil buffer has quirky pitch requirements. From Vol 2a, |
||
744 | * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch": |
||
745 | * The pitch must be set to 2x the value computed based on width, as |
||
746 | * the stencil buffer is stored with two rows interleaved. |
||
747 | */ |
||
748 | OUT_BATCH(2 * region->pitch - 1); |
||
749 | OUT_RELOC(region->bo, |
||
750 | I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, |
||
751 | brw->depthstencil.stencil_offset); |
||
752 | ADVANCE_BATCH(); |
||
753 | } else { |
||
754 | BEGIN_BATCH(3); |
||
755 | OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); |
||
756 | OUT_BATCH(0); |
||
757 | OUT_BATCH(0); |
||
758 | ADVANCE_BATCH(); |
||
759 | } |
||
760 | } |
||
761 | |||
762 | /* |
||
763 | * On Gen >= 6, emit clear params for safety. If using hiz, then clear |
||
764 | * params must be emitted. |
||
765 | * |
||
766 | * From Section 2.11.5.6.4.1 3DSTATE_CLEAR_PARAMS: |
||
767 | * 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet |
||
768 | * when HiZ is enabled and the DEPTH_BUFFER_STATE changes. |
||
769 | */ |
||
770 | if (brw->gen >= 6 || hiz) { |
||
771 | if (brw->gen == 6) |
||
772 | intel_emit_post_sync_nonzero_flush(brw); |
||
773 | |||
774 | BEGIN_BATCH(2); |
||
775 | OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | |
||
776 | GEN5_DEPTH_CLEAR_VALID | |
||
777 | (2 - 2)); |
||
778 | OUT_BATCH(depth_mt ? depth_mt->depth_clear_value : 0); |
||
779 | ADVANCE_BATCH(); |
||
780 | } |
||
781 | } |
||
782 | |||
783 | const struct brw_tracked_state brw_depthbuffer = { |
||
784 | .dirty = { |
||
785 | .mesa = _NEW_BUFFERS, |
||
786 | .brw = BRW_NEW_BATCH, |
||
787 | .cache = 0, |
||
788 | }, |
||
789 | .emit = brw_emit_depthbuffer, |
||
790 | }; |
||
791 | |||
792 | |||
793 | |||
794 | /*********************************************************************** |
||
795 | * Polygon stipple packet |
||
796 | */ |
||
797 | |||
798 | static void upload_polygon_stipple(struct brw_context *brw) |
||
799 | { |
||
800 | struct gl_context *ctx = &brw->ctx; |
||
801 | GLuint i; |
||
802 | |||
803 | /* _NEW_POLYGON */ |
||
804 | if (!ctx->Polygon.StippleFlag) |
||
805 | return; |
||
806 | |||
807 | if (brw->gen == 6) |
||
808 | intel_emit_post_sync_nonzero_flush(brw); |
||
809 | |||
810 | BEGIN_BATCH(33); |
||
811 | OUT_BATCH(_3DSTATE_POLY_STIPPLE_PATTERN << 16 | (33 - 2)); |
||
812 | |||
813 | /* Polygon stipple is provided in OpenGL order, i.e. bottom |
||
814 | * row first. If we're rendering to a window (i.e. the |
||
815 | * default frame buffer object, 0), then we need to invert |
||
816 | * it to match our pixel layout. But if we're rendering |
||
817 | * to a FBO (i.e. any named frame buffer object), we *don't* |
||
818 | * need to invert - we already match the layout. |
||
819 | */ |
||
820 | if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) { |
||
821 | for (i = 0; i < 32; i++) |
||
822 | OUT_BATCH(ctx->PolygonStipple[31 - i]); /* invert */ |
||
823 | } |
||
824 | else { |
||
825 | for (i = 0; i < 32; i++) |
||
826 | OUT_BATCH(ctx->PolygonStipple[i]); |
||
827 | } |
||
828 | CACHED_BATCH(); |
||
829 | } |
||
830 | |||
831 | const struct brw_tracked_state brw_polygon_stipple = { |
||
832 | .dirty = { |
||
833 | .mesa = (_NEW_POLYGONSTIPPLE | |
||
834 | _NEW_POLYGON), |
||
835 | .brw = BRW_NEW_CONTEXT, |
||
836 | .cache = 0 |
||
837 | }, |
||
838 | .emit = upload_polygon_stipple |
||
839 | }; |
||
840 | |||
841 | |||
842 | /*********************************************************************** |
||
843 | * Polygon stipple offset packet |
||
844 | */ |
||
845 | |||
846 | static void upload_polygon_stipple_offset(struct brw_context *brw) |
||
847 | { |
||
848 | struct gl_context *ctx = &brw->ctx; |
||
849 | |||
850 | /* _NEW_POLYGON */ |
||
851 | if (!ctx->Polygon.StippleFlag) |
||
852 | return; |
||
853 | |||
854 | if (brw->gen == 6) |
||
855 | intel_emit_post_sync_nonzero_flush(brw); |
||
856 | |||
857 | BEGIN_BATCH(2); |
||
858 | OUT_BATCH(_3DSTATE_POLY_STIPPLE_OFFSET << 16 | (2-2)); |
||
859 | |||
860 | /* _NEW_BUFFERS |
||
861 | * |
||
862 | * If we're drawing to a system window we have to invert the Y axis |
||
863 | * in order to match the OpenGL pixel coordinate system, and our |
||
864 | * offset must be matched to the window position. If we're drawing |
||
865 | * to a user-created FBO then our native pixel coordinate system |
||
866 | * works just fine, and there's no window system to worry about. |
||
867 | */ |
||
868 | if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) |
||
869 | OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31); |
||
870 | else |
||
871 | OUT_BATCH(0); |
||
872 | CACHED_BATCH(); |
||
873 | } |
||
874 | |||
875 | const struct brw_tracked_state brw_polygon_stipple_offset = { |
||
876 | .dirty = { |
||
877 | .mesa = (_NEW_BUFFERS | |
||
878 | _NEW_POLYGON), |
||
879 | .brw = BRW_NEW_CONTEXT, |
||
880 | .cache = 0 |
||
881 | }, |
||
882 | .emit = upload_polygon_stipple_offset |
||
883 | }; |
||
884 | |||
885 | /********************************************************************** |
||
886 | * AA Line parameters |
||
887 | */ |
||
888 | static void upload_aa_line_parameters(struct brw_context *brw) |
||
889 | { |
||
890 | struct gl_context *ctx = &brw->ctx; |
||
891 | |||
892 | if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters) |
||
893 | return; |
||
894 | |||
895 | if (brw->gen == 6) |
||
896 | intel_emit_post_sync_nonzero_flush(brw); |
||
897 | |||
898 | OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2)); |
||
899 | /* use legacy aa line coverage computation */ |
||
900 | OUT_BATCH(0); |
||
901 | OUT_BATCH(0); |
||
902 | CACHED_BATCH(); |
||
903 | } |
||
904 | |||
905 | const struct brw_tracked_state brw_aa_line_parameters = { |
||
906 | .dirty = { |
||
907 | .mesa = _NEW_LINE, |
||
908 | .brw = BRW_NEW_CONTEXT, |
||
909 | .cache = 0 |
||
910 | }, |
||
911 | .emit = upload_aa_line_parameters |
||
912 | }; |
||
913 | |||
914 | /*********************************************************************** |
||
915 | * Line stipple packet |
||
916 | */ |
||
917 | |||
918 | static void upload_line_stipple(struct brw_context *brw) |
||
919 | { |
||
920 | struct gl_context *ctx = &brw->ctx; |
||
921 | GLfloat tmp; |
||
922 | GLint tmpi; |
||
923 | |||
924 | if (!ctx->Line.StippleFlag) |
||
925 | return; |
||
926 | |||
927 | if (brw->gen == 6) |
||
928 | intel_emit_post_sync_nonzero_flush(brw); |
||
929 | |||
930 | BEGIN_BATCH(3); |
||
931 | OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2)); |
||
932 | OUT_BATCH(ctx->Line.StipplePattern); |
||
933 | |||
934 | if (brw->gen >= 7) { |
||
935 | /* in U1.16 */ |
||
936 | tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor; |
||
937 | tmpi = tmp * (1<<16); |
||
938 | OUT_BATCH(tmpi << 15 | ctx->Line.StippleFactor); |
||
939 | } |
||
940 | else { |
||
941 | /* in U1.13 */ |
||
942 | tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor; |
||
943 | tmpi = tmp * (1<<13); |
||
944 | OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor); |
||
945 | } |
||
946 | |||
947 | CACHED_BATCH(); |
||
948 | } |
||
949 | |||
950 | const struct brw_tracked_state brw_line_stipple = { |
||
951 | .dirty = { |
||
952 | .mesa = _NEW_LINE, |
||
953 | .brw = BRW_NEW_CONTEXT, |
||
954 | .cache = 0 |
||
955 | }, |
||
956 | .emit = upload_line_stipple |
||
957 | }; |
||
958 | |||
959 | |||
960 | /*********************************************************************** |
||
961 | * Misc invariant state packets |
||
962 | */ |
||
963 | |||
964 | void |
||
965 | brw_upload_invariant_state(struct brw_context *brw) |
||
966 | { |
||
967 | /* 3DSTATE_SIP, 3DSTATE_MULTISAMPLE, etc. are nonpipelined. */ |
||
968 | if (brw->gen == 6) |
||
969 | intel_emit_post_sync_nonzero_flush(brw); |
||
970 | |||
971 | /* Select the 3D pipeline (as opposed to media) */ |
||
972 | BEGIN_BATCH(1); |
||
973 | OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16 | 0); |
||
974 | ADVANCE_BATCH(); |
||
975 | |||
976 | if (brw->gen < 6) { |
||
977 | /* Disable depth offset clamping. */ |
||
978 | BEGIN_BATCH(2); |
||
979 | OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2)); |
||
980 | OUT_BATCH_F(0.0); |
||
981 | ADVANCE_BATCH(); |
||
982 | } |
||
983 | |||
984 | BEGIN_BATCH(2); |
||
985 | OUT_BATCH(CMD_STATE_SIP << 16 | (2 - 2)); |
||
986 | OUT_BATCH(0); |
||
987 | ADVANCE_BATCH(); |
||
988 | |||
989 | BEGIN_BATCH(1); |
||
990 | OUT_BATCH(brw->CMD_VF_STATISTICS << 16 | |
||
991 | (unlikely(INTEL_DEBUG & DEBUG_STATS) ? 1 : 0)); |
||
992 | ADVANCE_BATCH(); |
||
993 | } |
||
994 | |||
995 | const struct brw_tracked_state brw_invariant_state = { |
||
996 | .dirty = { |
||
997 | .mesa = 0, |
||
998 | .brw = BRW_NEW_CONTEXT, |
||
999 | .cache = 0 |
||
1000 | }, |
||
1001 | .emit = brw_upload_invariant_state |
||
1002 | }; |
||
1003 | |||
1004 | /** |
||
1005 | * Define the base addresses which some state is referenced from. |
||
1006 | * |
||
1007 | * This allows us to avoid having to emit relocations for the objects, |
||
1008 | * and is actually required for binding table pointers on gen6. |
||
1009 | * |
||
1010 | * Surface state base address covers binding table pointers and |
||
1011 | * surface state objects, but not the surfaces that the surface state |
||
1012 | * objects point to. |
||
1013 | */ |
||
1014 | static void upload_state_base_address( struct brw_context *brw ) |
||
1015 | { |
||
1016 | /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of |
||
1017 | * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be |
||
1018 | * programmed prior to STATE_BASE_ADDRESS. |
||
1019 | * |
||
1020 | * However, given that the instruction SBA (general state base |
||
1021 | * address) on this chipset is always set to 0 across X and GL, |
||
1022 | * maybe this isn't required for us in particular. |
||
1023 | */ |
||
1024 | |||
1025 | if (brw->gen >= 6) { |
||
1026 | if (brw->gen == 6) |
||
1027 | intel_emit_post_sync_nonzero_flush(brw); |
||
1028 | |||
1029 | BEGIN_BATCH(10); |
||
1030 | OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2)); |
||
1031 | /* General state base address: stateless DP read/write requests */ |
||
1032 | OUT_BATCH(1); |
||
1033 | /* Surface state base address: |
||
1034 | * BINDING_TABLE_STATE |
||
1035 | * SURFACE_STATE |
||
1036 | */ |
||
1037 | OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1); |
||
1038 | /* Dynamic state base address: |
||
1039 | * SAMPLER_STATE |
||
1040 | * SAMPLER_BORDER_COLOR_STATE |
||
1041 | * CLIP, SF, WM/CC viewport state |
||
1042 | * COLOR_CALC_STATE |
||
1043 | * DEPTH_STENCIL_STATE |
||
1044 | * BLEND_STATE |
||
1045 | * Push constants (when INSTPM: CONSTANT_BUFFER Address Offset |
||
1046 | * Disable is clear, which we rely on) |
||
1047 | */ |
||
1048 | OUT_RELOC(brw->batch.bo, (I915_GEM_DOMAIN_RENDER | |
||
1049 | I915_GEM_DOMAIN_INSTRUCTION), 0, 1); |
||
1050 | |||
1051 | OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */ |
||
1052 | OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, |
||
1053 | 1); /* Instruction base address: shader kernels (incl. SIP) */ |
||
1054 | |||
1055 | OUT_BATCH(1); /* General state upper bound */ |
||
1056 | /* Dynamic state upper bound. Although the documentation says that |
||
1057 | * programming it to zero will cause it to be ignored, that is a lie. |
||
1058 | * If this isn't programmed to a real bound, the sampler border color |
||
1059 | * pointer is rejected, causing border color to mysteriously fail. |
||
1060 | */ |
||
1061 | OUT_BATCH(0xfffff001); |
||
1062 | OUT_BATCH(1); /* Indirect object upper bound */ |
||
1063 | OUT_BATCH(1); /* Instruction access upper bound */ |
||
1064 | ADVANCE_BATCH(); |
||
1065 | } else if (brw->gen == 5) { |
||
1066 | BEGIN_BATCH(8); |
||
1067 | OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2)); |
||
1068 | OUT_BATCH(1); /* General state base address */ |
||
1069 | OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, |
||
1070 | 1); /* Surface state base address */ |
||
1071 | OUT_BATCH(1); /* Indirect object base address */ |
||
1072 | OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, |
||
1073 | 1); /* Instruction base address */ |
||
1074 | OUT_BATCH(0xfffff001); /* General state upper bound */ |
||
1075 | OUT_BATCH(1); /* Indirect object upper bound */ |
||
1076 | OUT_BATCH(1); /* Instruction access upper bound */ |
||
1077 | ADVANCE_BATCH(); |
||
1078 | } else { |
||
1079 | BEGIN_BATCH(6); |
||
1080 | OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); |
||
1081 | OUT_BATCH(1); /* General state base address */ |
||
1082 | OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, |
||
1083 | 1); /* Surface state base address */ |
||
1084 | OUT_BATCH(1); /* Indirect object base address */ |
||
1085 | OUT_BATCH(1); /* General state upper bound */ |
||
1086 | OUT_BATCH(1); /* Indirect object upper bound */ |
||
1087 | ADVANCE_BATCH(); |
||
1088 | } |
||
1089 | |||
1090 | /* According to section 3.6.1 of VOL1 of the 965 PRM, |
||
1091 | * STATE_BASE_ADDRESS updates require a reissue of: |
||
1092 | * |
||
1093 | * 3DSTATE_PIPELINE_POINTERS |
||
1094 | * 3DSTATE_BINDING_TABLE_POINTERS |
||
1095 | * MEDIA_STATE_POINTERS |
||
1096 | * |
||
1097 | * and this continues through Ironlake. The Sandy Bridge PRM, vol |
||
1098 | * 1 part 1 says that the folowing packets must be reissued: |
||
1099 | * |
||
1100 | * 3DSTATE_CC_POINTERS |
||
1101 | * 3DSTATE_BINDING_TABLE_POINTERS |
||
1102 | * 3DSTATE_SAMPLER_STATE_POINTERS |
||
1103 | * 3DSTATE_VIEWPORT_STATE_POINTERS |
||
1104 | * MEDIA_STATE_POINTERS |
||
1105 | * |
||
1106 | * Those are always reissued following SBA updates anyway (new |
||
1107 | * batch time), except in the case of the program cache BO |
||
1108 | * changing. Having a separate state flag makes the sequence more |
||
1109 | * obvious. |
||
1110 | */ |
||
1111 | |||
1112 | brw->state.dirty.brw |= BRW_NEW_STATE_BASE_ADDRESS; |
||
1113 | } |
||
1114 | |||
1115 | const struct brw_tracked_state brw_state_base_address = { |
||
1116 | .dirty = { |
||
1117 | .mesa = 0, |
||
1118 | .brw = (BRW_NEW_BATCH | |
||
1119 | BRW_NEW_PROGRAM_CACHE), |
||
1120 | .cache = 0, |
||
1121 | }, |
||
1122 | .emit = upload_state_base_address |
||
1123 | };><>><>><>><>><>><>>><>><>13); |