Rev 4358 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | /* |
2 | Copyright (C) Intel Corp. 2006. All Rights Reserved. |
||
3 | Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to |
||
4 | develop this 3D driver. |
||
5 | |||
6 | Permission is hereby granted, free of charge, to any person obtaining |
||
7 | a copy of this software and associated documentation files (the |
||
8 | "Software"), to deal in the Software without restriction, including |
||
9 | without limitation the rights to use, copy, modify, merge, publish, |
||
10 | distribute, sublicense, and/or sell copies of the Software, and to |
||
11 | permit persons to whom the Software is furnished to do so, subject to |
||
12 | the following conditions: |
||
13 | |||
14 | The above copyright notice and this permission notice (including the |
||
15 | next paragraph) shall be included in all copies or substantial |
||
16 | portions of the Software. |
||
17 | |||
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||
19 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
20 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||
21 | IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
||
22 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
||
23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
||
24 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
25 | |||
26 | **********************************************************************/ |
||
27 | /* |
||
28 | * Authors: |
||
29 | * Keith Whitwell |
||
30 | */ |
||
31 | |||
32 | |||
33 | |||
34 | #include "intel_batchbuffer.h" |
||
35 | #include "intel_fbo.h" |
||
36 | #include "intel_mipmap_tree.h" |
||
37 | #include "intel_regions.h" |
||
38 | |||
39 | #include "brw_context.h" |
||
40 | #include "brw_state.h" |
||
41 | #include "brw_defines.h" |
||
42 | |||
43 | #include "main/fbobject.h" |
||
44 | #include "main/glformats.h" |
||
45 | |||
46 | /* Constant single cliprect for framebuffer object or DRI2 drawing */ |
||
47 | static void upload_drawing_rect(struct brw_context *brw) |
||
48 | { |
||
49 | struct gl_context *ctx = &brw->ctx; |
||
50 | |||
4401 | Serge | 51 | /* 3DSTATE_DRAWING_RECTANGLE is non-pipelined. */ |
52 | if (brw->gen == 6) |
||
53 | intel_emit_post_sync_nonzero_flush(brw); |
||
54 | |||
4358 | Serge | 55 | BEGIN_BATCH(4); |
56 | OUT_BATCH(_3DSTATE_DRAWING_RECTANGLE << 16 | (4 - 2)); |
||
57 | OUT_BATCH(0); /* xmin, ymin */ |
||
58 | OUT_BATCH(((ctx->DrawBuffer->Width - 1) & 0xffff) | |
||
59 | ((ctx->DrawBuffer->Height - 1) << 16)); |
||
60 | OUT_BATCH(0); |
||
61 | ADVANCE_BATCH(); |
||
62 | } |
||
63 | |||
64 | const struct brw_tracked_state brw_drawing_rect = { |
||
65 | .dirty = { |
||
66 | .mesa = _NEW_BUFFERS, |
||
67 | .brw = BRW_NEW_CONTEXT, |
||
68 | .cache = 0 |
||
69 | }, |
||
70 | .emit = upload_drawing_rect |
||
71 | }; |
||
72 | |||
73 | /** |
||
74 | * Upload the binding table pointers, which point each stage's array of surface |
||
75 | * state pointers. |
||
76 | * |
||
77 | * The binding table pointers are relative to the surface state base address, |
||
78 | * which points at the batchbuffer containing the streamed batch state. |
||
79 | */ |
||
80 | static void upload_binding_table_pointers(struct brw_context *brw) |
||
81 | { |
||
82 | BEGIN_BATCH(6); |
||
83 | OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | (6 - 2)); |
||
84 | OUT_BATCH(brw->vs.bind_bo_offset); |
||
85 | OUT_BATCH(0); /* gs */ |
||
86 | OUT_BATCH(0); /* clip */ |
||
87 | OUT_BATCH(0); /* sf */ |
||
88 | OUT_BATCH(brw->wm.bind_bo_offset); |
||
89 | ADVANCE_BATCH(); |
||
90 | } |
||
91 | |||
92 | const struct brw_tracked_state brw_binding_table_pointers = { |
||
93 | .dirty = { |
||
94 | .mesa = 0, |
||
95 | .brw = (BRW_NEW_BATCH | |
||
96 | BRW_NEW_STATE_BASE_ADDRESS | |
||
97 | BRW_NEW_VS_BINDING_TABLE | |
||
98 | BRW_NEW_GS_BINDING_TABLE | |
||
99 | BRW_NEW_PS_BINDING_TABLE), |
||
100 | .cache = 0, |
||
101 | }, |
||
102 | .emit = upload_binding_table_pointers, |
||
103 | }; |
||
104 | |||
105 | /** |
||
106 | * Upload the binding table pointers, which point each stage's array of surface |
||
107 | * state pointers. |
||
108 | * |
||
109 | * The binding table pointers are relative to the surface state base address, |
||
110 | * which points at the batchbuffer containing the streamed batch state. |
||
111 | */ |
||
112 | static void upload_gen6_binding_table_pointers(struct brw_context *brw) |
||
113 | { |
||
114 | BEGIN_BATCH(4); |
||
115 | OUT_BATCH(_3DSTATE_BINDING_TABLE_POINTERS << 16 | |
||
116 | GEN6_BINDING_TABLE_MODIFY_VS | |
||
117 | GEN6_BINDING_TABLE_MODIFY_GS | |
||
118 | GEN6_BINDING_TABLE_MODIFY_PS | |
||
119 | (4 - 2)); |
||
120 | OUT_BATCH(brw->vs.bind_bo_offset); /* vs */ |
||
121 | OUT_BATCH(brw->gs.bind_bo_offset); /* gs */ |
||
122 | OUT_BATCH(brw->wm.bind_bo_offset); /* wm/ps */ |
||
123 | ADVANCE_BATCH(); |
||
124 | } |
||
125 | |||
126 | const struct brw_tracked_state gen6_binding_table_pointers = { |
||
127 | .dirty = { |
||
128 | .mesa = 0, |
||
129 | .brw = (BRW_NEW_BATCH | |
||
130 | BRW_NEW_STATE_BASE_ADDRESS | |
||
131 | BRW_NEW_VS_BINDING_TABLE | |
||
132 | BRW_NEW_GS_BINDING_TABLE | |
||
133 | BRW_NEW_PS_BINDING_TABLE), |
||
134 | .cache = 0, |
||
135 | }, |
||
136 | .emit = upload_gen6_binding_table_pointers, |
||
137 | }; |
||
138 | |||
139 | /** |
||
140 | * Upload pointers to the per-stage state. |
||
141 | * |
||
142 | * The state pointers in this packet are all relative to the general state |
||
143 | * base address set by CMD_STATE_BASE_ADDRESS, which is 0. |
||
144 | */ |
||
145 | static void upload_pipelined_state_pointers(struct brw_context *brw ) |
||
146 | { |
||
147 | if (brw->gen == 5) { |
||
148 | /* Need to flush before changing clip max threads for errata. */ |
||
149 | BEGIN_BATCH(1); |
||
150 | OUT_BATCH(MI_FLUSH); |
||
151 | ADVANCE_BATCH(); |
||
152 | } |
||
153 | |||
154 | BEGIN_BATCH(7); |
||
155 | OUT_BATCH(_3DSTATE_PIPELINED_POINTERS << 16 | (7 - 2)); |
||
156 | OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, |
||
157 | brw->vs.state_offset); |
||
158 | if (brw->gs.prog_active) |
||
159 | OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, |
||
160 | brw->gs.state_offset | 1); |
||
161 | else |
||
162 | OUT_BATCH(0); |
||
163 | OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, |
||
164 | brw->clip.state_offset | 1); |
||
165 | OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, |
||
166 | brw->sf.state_offset); |
||
167 | OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, |
||
168 | brw->wm.state_offset); |
||
169 | OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, |
||
170 | brw->cc.state_offset); |
||
171 | ADVANCE_BATCH(); |
||
172 | |||
173 | brw->state.dirty.brw |= BRW_NEW_PSP; |
||
174 | } |
||
175 | |||
176 | static void upload_psp_urb_cbs(struct brw_context *brw ) |
||
177 | { |
||
178 | upload_pipelined_state_pointers(brw); |
||
179 | brw_upload_urb_fence(brw); |
||
180 | brw_upload_cs_urb_state(brw); |
||
181 | } |
||
182 | |||
183 | const struct brw_tracked_state brw_psp_urb_cbs = { |
||
184 | .dirty = { |
||
185 | .mesa = 0, |
||
186 | .brw = (BRW_NEW_URB_FENCE | |
||
187 | BRW_NEW_BATCH | |
||
188 | BRW_NEW_STATE_BASE_ADDRESS), |
||
189 | .cache = (CACHE_NEW_VS_UNIT | |
||
190 | CACHE_NEW_GS_UNIT | |
||
191 | CACHE_NEW_GS_PROG | |
||
192 | CACHE_NEW_CLIP_UNIT | |
||
193 | CACHE_NEW_SF_UNIT | |
||
194 | CACHE_NEW_WM_UNIT | |
||
195 | CACHE_NEW_CC_UNIT) |
||
196 | }, |
||
197 | .emit = upload_psp_urb_cbs, |
||
198 | }; |
||
199 | |||
200 | uint32_t |
||
201 | brw_depthbuffer_format(struct brw_context *brw) |
||
202 | { |
||
203 | struct gl_context *ctx = &brw->ctx; |
||
204 | struct gl_framebuffer *fb = ctx->DrawBuffer; |
||
205 | struct intel_renderbuffer *drb = intel_get_renderbuffer(fb, BUFFER_DEPTH); |
||
206 | struct intel_renderbuffer *srb; |
||
207 | |||
208 | if (!drb && |
||
209 | (srb = intel_get_renderbuffer(fb, BUFFER_STENCIL)) && |
||
210 | !srb->mt->stencil_mt && |
||
211 | (intel_rb_format(srb) == MESA_FORMAT_S8_Z24 || |
||
212 | intel_rb_format(srb) == MESA_FORMAT_Z32_FLOAT_X24S8)) { |
||
213 | drb = srb; |
||
214 | } |
||
215 | |||
216 | if (!drb) |
||
217 | return BRW_DEPTHFORMAT_D32_FLOAT; |
||
218 | |||
219 | switch (drb->mt->format) { |
||
220 | case MESA_FORMAT_Z16: |
||
221 | return BRW_DEPTHFORMAT_D16_UNORM; |
||
222 | case MESA_FORMAT_Z32_FLOAT: |
||
223 | return BRW_DEPTHFORMAT_D32_FLOAT; |
||
224 | case MESA_FORMAT_X8_Z24: |
||
225 | if (brw->gen >= 6) { |
||
226 | return BRW_DEPTHFORMAT_D24_UNORM_X8_UINT; |
||
227 | } else { |
||
228 | /* Use D24_UNORM_S8, not D24_UNORM_X8. |
||
229 | * |
||
230 | * D24_UNORM_X8 was not introduced until Gen5. (See the Ironlake PRM, |
||
231 | * Volume 2, Part 1, Section 8.4.6 "Depth/Stencil Buffer State", Bits |
||
232 | * 3DSTATE_DEPTH_BUFFER.Surface_Format). |
||
233 | * |
||
234 | * However, on Gen5, D24_UNORM_X8 may be used only if separate |
||
235 | * stencil is enabled, and we never enable it. From the Ironlake PRM, |
||
236 | * same section as above, Bit 3DSTATE_DEPTH_BUFFER.Separate_Stencil_Buffer_Enable: |
||
237 | * If this field is disabled, the Surface Format of the depth |
||
238 | * buffer cannot be D24_UNORM_X8_UINT. |
||
239 | */ |
||
240 | return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; |
||
241 | } |
||
242 | case MESA_FORMAT_S8_Z24: |
||
243 | return BRW_DEPTHFORMAT_D24_UNORM_S8_UINT; |
||
244 | case MESA_FORMAT_Z32_FLOAT_X24S8: |
||
245 | return BRW_DEPTHFORMAT_D32_FLOAT_S8X24_UINT; |
||
246 | default: |
||
247 | _mesa_problem(ctx, "Unexpected depth format %s\n", |
||
248 | _mesa_get_format_name(intel_rb_format(drb))); |
||
249 | return BRW_DEPTHFORMAT_D16_UNORM; |
||
250 | } |
||
251 | } |
||
252 | |||
253 | /** |
||
254 | * Returns the mask of how many bits of x and y must be handled through the |
||
255 | * depthbuffer's draw offset x and y fields. |
||
256 | * |
||
257 | * The draw offset x/y field of the depthbuffer packet is unfortunately shared |
||
258 | * between the depth, hiz, and stencil buffers. Because it can be hard to get |
||
259 | * all 3 to agree on this value, we want to do as much drawing offset |
||
260 | * adjustment as possible by moving the base offset of the 3 buffers, which is |
||
261 | * restricted to tile boundaries. |
||
262 | * |
||
263 | * For each buffer, the remainder must be applied through the x/y draw offset. |
||
264 | * This returns the worst-case mask of the low bits that have to go into the |
||
265 | * packet. If the 3 buffers don't agree on the drawing offset ANDed with this |
||
266 | * mask, then we're in trouble. |
||
267 | */ |
||
268 | void |
||
269 | brw_get_depthstencil_tile_masks(struct intel_mipmap_tree *depth_mt, |
||
270 | uint32_t depth_level, |
||
271 | uint32_t depth_layer, |
||
272 | struct intel_mipmap_tree *stencil_mt, |
||
273 | uint32_t *out_tile_mask_x, |
||
274 | uint32_t *out_tile_mask_y) |
||
275 | { |
||
276 | uint32_t tile_mask_x = 0, tile_mask_y = 0; |
||
277 | |||
278 | if (depth_mt) { |
||
279 | intel_region_get_tile_masks(depth_mt->region, |
||
280 | &tile_mask_x, &tile_mask_y, false); |
||
281 | |||
282 | if (intel_miptree_slice_has_hiz(depth_mt, depth_level, depth_layer)) { |
||
283 | uint32_t hiz_tile_mask_x, hiz_tile_mask_y; |
||
284 | intel_region_get_tile_masks(depth_mt->hiz_mt->region, |
||
285 | &hiz_tile_mask_x, &hiz_tile_mask_y, false); |
||
286 | |||
287 | /* Each HiZ row represents 2 rows of pixels */ |
||
288 | hiz_tile_mask_y = hiz_tile_mask_y << 1 | 1; |
||
289 | |||
290 | tile_mask_x |= hiz_tile_mask_x; |
||
291 | tile_mask_y |= hiz_tile_mask_y; |
||
292 | } |
||
293 | } |
||
294 | |||
295 | if (stencil_mt) { |
||
296 | if (stencil_mt->stencil_mt) |
||
297 | stencil_mt = stencil_mt->stencil_mt; |
||
298 | |||
299 | if (stencil_mt->format == MESA_FORMAT_S8) { |
||
300 | /* Separate stencil buffer uses 64x64 tiles. */ |
||
301 | tile_mask_x |= 63; |
||
302 | tile_mask_y |= 63; |
||
303 | } else { |
||
304 | uint32_t stencil_tile_mask_x, stencil_tile_mask_y; |
||
305 | intel_region_get_tile_masks(stencil_mt->region, |
||
306 | &stencil_tile_mask_x, |
||
307 | &stencil_tile_mask_y, false); |
||
308 | |||
309 | tile_mask_x |= stencil_tile_mask_x; |
||
310 | tile_mask_y |= stencil_tile_mask_y; |
||
311 | } |
||
312 | } |
||
313 | |||
314 | *out_tile_mask_x = tile_mask_x; |
||
315 | *out_tile_mask_y = tile_mask_y; |
||
316 | } |
||
317 | |||
318 | static struct intel_mipmap_tree * |
||
319 | get_stencil_miptree(struct intel_renderbuffer *irb) |
||
320 | { |
||
321 | if (!irb) |
||
322 | return NULL; |
||
323 | if (irb->mt->stencil_mt) |
||
324 | return irb->mt->stencil_mt; |
||
325 | return irb->mt; |
||
326 | } |
||
327 | |||
328 | void |
||
329 | brw_workaround_depthstencil_alignment(struct brw_context *brw, |
||
330 | GLbitfield clear_mask) |
||
331 | { |
||
332 | struct gl_context *ctx = &brw->ctx; |
||
333 | struct gl_framebuffer *fb = ctx->DrawBuffer; |
||
334 | bool rebase_depth = false; |
||
335 | bool rebase_stencil = false; |
||
336 | struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH); |
||
337 | struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL); |
||
338 | struct intel_mipmap_tree *depth_mt = NULL; |
||
339 | struct intel_mipmap_tree *stencil_mt = get_stencil_miptree(stencil_irb); |
||
340 | uint32_t tile_x = 0, tile_y = 0, stencil_tile_x = 0, stencil_tile_y = 0; |
||
341 | uint32_t stencil_draw_x = 0, stencil_draw_y = 0; |
||
342 | bool invalidate_depth = clear_mask & BUFFER_BIT_DEPTH; |
||
343 | bool invalidate_stencil = clear_mask & BUFFER_BIT_STENCIL; |
||
344 | |||
345 | if (depth_irb) |
||
346 | depth_mt = depth_irb->mt; |
||
347 | |||
348 | /* Check if depth buffer is in depth/stencil format. If so, then it's only |
||
349 | * safe to invalidate it if we're also clearing stencil, and both depth_irb |
||
350 | * and stencil_irb point to the same miptree. |
||
351 | * |
||
352 | * Note: it's not sufficient to check for the case where |
||
353 | * _mesa_get_format_base_format(depth_mt->format) == GL_DEPTH_STENCIL, |
||
354 | * because this fails to catch depth/stencil buffers on hardware that uses |
||
355 | * separate stencil. To catch that case, we check whether |
||
356 | * depth_mt->stencil_mt is non-NULL. |
||
357 | */ |
||
358 | if (depth_irb && invalidate_depth && |
||
359 | (_mesa_get_format_base_format(depth_mt->format) == GL_DEPTH_STENCIL || |
||
360 | depth_mt->stencil_mt)) { |
||
361 | invalidate_depth = invalidate_stencil && depth_irb && stencil_irb |
||
362 | && depth_irb->mt == stencil_irb->mt; |
||
363 | } |
||
364 | |||
365 | uint32_t tile_mask_x, tile_mask_y; |
||
366 | brw_get_depthstencil_tile_masks(depth_mt, |
||
367 | depth_mt ? depth_irb->mt_level : 0, |
||
368 | depth_mt ? depth_irb->mt_layer : 0, |
||
369 | stencil_mt, |
||
370 | &tile_mask_x, &tile_mask_y); |
||
371 | |||
372 | if (depth_irb) { |
||
373 | tile_x = depth_irb->draw_x & tile_mask_x; |
||
374 | tile_y = depth_irb->draw_y & tile_mask_y; |
||
375 | |||
376 | /* According to the Sandy Bridge PRM, volume 2 part 1, pp326-327 |
||
377 | * (3DSTATE_DEPTH_BUFFER dw5), in the documentation for "Depth |
||
378 | * Coordinate Offset X/Y": |
||
379 | * |
||
380 | * "The 3 LSBs of both offsets must be zero to ensure correct |
||
381 | * alignment" |
||
382 | */ |
||
383 | if (tile_x & 7 || tile_y & 7) |
||
384 | rebase_depth = true; |
||
385 | |||
386 | /* We didn't even have intra-tile offsets before g45. */ |
||
387 | if (brw->gen == 4 && !brw->is_g4x) { |
||
388 | if (tile_x || tile_y) |
||
389 | rebase_depth = true; |
||
390 | } |
||
391 | |||
392 | if (rebase_depth) { |
||
393 | perf_debug("HW workaround: blitting depth level %d to a temporary " |
||
394 | "to fix alignment (depth tile offset %d,%d)\n", |
||
395 | depth_irb->mt_level, tile_x, tile_y); |
||
396 | intel_renderbuffer_move_to_temp(brw, depth_irb, invalidate_depth); |
||
397 | /* In the case of stencil_irb being the same packed depth/stencil |
||
398 | * texture but not the same rb, make it point at our rebased mt, too. |
||
399 | */ |
||
400 | if (stencil_irb && |
||
401 | stencil_irb != depth_irb && |
||
402 | stencil_irb->mt == depth_mt) { |
||
403 | intel_miptree_reference(&stencil_irb->mt, depth_irb->mt); |
||
404 | intel_renderbuffer_set_draw_offset(stencil_irb); |
||
405 | } |
||
406 | |||
407 | stencil_mt = get_stencil_miptree(stencil_irb); |
||
408 | |||
409 | tile_x = depth_irb->draw_x & tile_mask_x; |
||
410 | tile_y = depth_irb->draw_y & tile_mask_y; |
||
411 | } |
||
412 | |||
413 | if (stencil_irb) { |
||
414 | stencil_mt = get_stencil_miptree(stencil_irb); |
||
415 | intel_miptree_get_image_offset(stencil_mt, |
||
416 | stencil_irb->mt_level, |
||
417 | stencil_irb->mt_layer, |
||
418 | &stencil_draw_x, &stencil_draw_y); |
||
419 | int stencil_tile_x = stencil_draw_x & tile_mask_x; |
||
420 | int stencil_tile_y = stencil_draw_y & tile_mask_y; |
||
421 | |||
422 | /* If stencil doesn't match depth, then we'll need to rebase stencil |
||
423 | * as well. (if we hadn't decided to rebase stencil before, the |
||
424 | * post-stencil depth test will also rebase depth to try to match it |
||
425 | * up). |
||
426 | */ |
||
427 | if (tile_x != stencil_tile_x || |
||
428 | tile_y != stencil_tile_y) { |
||
429 | rebase_stencil = true; |
||
430 | } |
||
431 | } |
||
432 | } |
||
433 | |||
434 | /* If we have (just) stencil, check it for ignored low bits as well */ |
||
435 | if (stencil_irb) { |
||
436 | intel_miptree_get_image_offset(stencil_mt, |
||
437 | stencil_irb->mt_level, |
||
438 | stencil_irb->mt_layer, |
||
439 | &stencil_draw_x, &stencil_draw_y); |
||
440 | stencil_tile_x = stencil_draw_x & tile_mask_x; |
||
441 | stencil_tile_y = stencil_draw_y & tile_mask_y; |
||
442 | |||
443 | if (stencil_tile_x & 7 || stencil_tile_y & 7) |
||
444 | rebase_stencil = true; |
||
445 | |||
446 | if (brw->gen == 4 && !brw->is_g4x) { |
||
447 | if (stencil_tile_x || stencil_tile_y) |
||
448 | rebase_stencil = true; |
||
449 | } |
||
450 | } |
||
451 | |||
452 | if (rebase_stencil) { |
||
453 | perf_debug("HW workaround: blitting stencil level %d to a temporary " |
||
454 | "to fix alignment (stencil tile offset %d,%d)\n", |
||
455 | stencil_irb->mt_level, stencil_tile_x, stencil_tile_y); |
||
456 | |||
457 | intel_renderbuffer_move_to_temp(brw, stencil_irb, invalidate_stencil); |
||
458 | stencil_mt = get_stencil_miptree(stencil_irb); |
||
459 | |||
460 | intel_miptree_get_image_offset(stencil_mt, |
||
461 | stencil_irb->mt_level, |
||
462 | stencil_irb->mt_layer, |
||
463 | &stencil_draw_x, &stencil_draw_y); |
||
464 | stencil_tile_x = stencil_draw_x & tile_mask_x; |
||
465 | stencil_tile_y = stencil_draw_y & tile_mask_y; |
||
466 | |||
467 | if (depth_irb && depth_irb->mt == stencil_irb->mt) { |
||
468 | intel_miptree_reference(&depth_irb->mt, stencil_irb->mt); |
||
469 | intel_renderbuffer_set_draw_offset(depth_irb); |
||
470 | } else if (depth_irb && !rebase_depth) { |
||
471 | if (tile_x != stencil_tile_x || |
||
472 | tile_y != stencil_tile_y) { |
||
473 | perf_debug("HW workaround: blitting depth level %d to a temporary " |
||
474 | "to match stencil level %d alignment (depth tile offset " |
||
475 | "%d,%d, stencil offset %d,%d)\n", |
||
476 | depth_irb->mt_level, |
||
477 | stencil_irb->mt_level, |
||
478 | tile_x, tile_y, |
||
479 | stencil_tile_x, stencil_tile_y); |
||
480 | |||
481 | intel_renderbuffer_move_to_temp(brw, depth_irb, invalidate_depth); |
||
482 | |||
483 | tile_x = depth_irb->draw_x & tile_mask_x; |
||
484 | tile_y = depth_irb->draw_y & tile_mask_y; |
||
485 | |||
486 | if (stencil_irb && stencil_irb->mt == depth_mt) { |
||
487 | intel_miptree_reference(&stencil_irb->mt, depth_irb->mt); |
||
488 | intel_renderbuffer_set_draw_offset(stencil_irb); |
||
489 | } |
||
490 | |||
491 | WARN_ONCE(stencil_tile_x != tile_x || |
||
492 | stencil_tile_y != tile_y, |
||
493 | "Rebased stencil tile offset (%d,%d) doesn't match depth " |
||
494 | "tile offset (%d,%d).\n", |
||
495 | stencil_tile_x, stencil_tile_y, |
||
496 | tile_x, tile_y); |
||
497 | } |
||
498 | } |
||
499 | } |
||
500 | |||
501 | if (!depth_irb) { |
||
502 | tile_x = stencil_tile_x; |
||
503 | tile_y = stencil_tile_y; |
||
504 | } |
||
505 | |||
506 | /* While we just tried to get everything aligned, we may have failed to do |
||
507 | * so in the case of rendering to array or 3D textures, where nonzero faces |
||
508 | * will still have an offset post-rebase. At least give an informative |
||
509 | * warning. |
||
510 | */ |
||
511 | WARN_ONCE((tile_x & 7) || (tile_y & 7), |
||
512 | "Depth/stencil buffer needs alignment to 8-pixel boundaries.\n" |
||
513 | "Truncating offset, bad rendering may occur.\n"); |
||
514 | tile_x &= ~7; |
||
515 | tile_y &= ~7; |
||
516 | |||
517 | /* Now, after rebasing, save off the new dephtstencil state so the hardware |
||
518 | * packets can just dereference that without re-calculating tile offsets. |
||
519 | */ |
||
520 | brw->depthstencil.tile_x = tile_x; |
||
521 | brw->depthstencil.tile_y = tile_y; |
||
522 | brw->depthstencil.depth_offset = 0; |
||
523 | brw->depthstencil.stencil_offset = 0; |
||
524 | brw->depthstencil.hiz_offset = 0; |
||
525 | brw->depthstencil.depth_mt = NULL; |
||
526 | brw->depthstencil.stencil_mt = NULL; |
||
527 | if (depth_irb) { |
||
528 | depth_mt = depth_irb->mt; |
||
529 | brw->depthstencil.depth_mt = depth_mt; |
||
530 | brw->depthstencil.depth_offset = |
||
531 | intel_region_get_aligned_offset(depth_mt->region, |
||
532 | depth_irb->draw_x & ~tile_mask_x, |
||
533 | depth_irb->draw_y & ~tile_mask_y, |
||
534 | false); |
||
535 | if (intel_renderbuffer_has_hiz(depth_irb)) { |
||
536 | brw->depthstencil.hiz_offset = |
||
537 | intel_region_get_aligned_offset(depth_mt->region, |
||
538 | depth_irb->draw_x & ~tile_mask_x, |
||
539 | (depth_irb->draw_y & ~tile_mask_y) / |
||
540 | 2, |
||
541 | false); |
||
542 | } |
||
543 | } |
||
544 | if (stencil_irb) { |
||
545 | stencil_mt = get_stencil_miptree(stencil_irb); |
||
546 | |||
547 | brw->depthstencil.stencil_mt = stencil_mt; |
||
548 | if (stencil_mt->format == MESA_FORMAT_S8) { |
||
549 | /* Note: we can't compute the stencil offset using |
||
550 | * intel_region_get_aligned_offset(), because stencil_region claims |
||
551 | * that the region is untiled even though it's W tiled. |
||
552 | */ |
||
553 | brw->depthstencil.stencil_offset = |
||
554 | (stencil_draw_y & ~tile_mask_y) * stencil_mt->region->pitch + |
||
555 | (stencil_draw_x & ~tile_mask_x) * 64; |
||
556 | } |
||
557 | } |
||
558 | } |
||
559 | |||
560 | void |
||
561 | brw_emit_depthbuffer(struct brw_context *brw) |
||
562 | { |
||
563 | struct gl_context *ctx = &brw->ctx; |
||
564 | struct gl_framebuffer *fb = ctx->DrawBuffer; |
||
565 | /* _NEW_BUFFERS */ |
||
566 | struct intel_renderbuffer *depth_irb = intel_get_renderbuffer(fb, BUFFER_DEPTH); |
||
567 | struct intel_renderbuffer *stencil_irb = intel_get_renderbuffer(fb, BUFFER_STENCIL); |
||
568 | struct intel_mipmap_tree *depth_mt = brw->depthstencil.depth_mt; |
||
569 | struct intel_mipmap_tree *stencil_mt = brw->depthstencil.stencil_mt; |
||
570 | uint32_t tile_x = brw->depthstencil.tile_x; |
||
571 | uint32_t tile_y = brw->depthstencil.tile_y; |
||
572 | bool hiz = depth_irb && intel_renderbuffer_has_hiz(depth_irb); |
||
573 | bool separate_stencil = false; |
||
574 | uint32_t depth_surface_type = BRW_SURFACE_NULL; |
||
575 | uint32_t depthbuffer_format = BRW_DEPTHFORMAT_D32_FLOAT; |
||
576 | uint32_t depth_offset = 0; |
||
577 | uint32_t width = 1, height = 1; |
||
578 | |||
579 | if (stencil_mt) { |
||
580 | separate_stencil = stencil_mt->format == MESA_FORMAT_S8; |
||
581 | |||
582 | /* Gen7 supports only separate stencil */ |
||
583 | assert(separate_stencil || brw->gen < 7); |
||
584 | } |
||
585 | |||
586 | /* If there's a packed depth/stencil bound to stencil only, we need to |
||
587 | * emit the packed depth/stencil buffer packet. |
||
588 | */ |
||
589 | if (!depth_irb && stencil_irb && !separate_stencil) { |
||
590 | depth_irb = stencil_irb; |
||
591 | depth_mt = stencil_mt; |
||
592 | } |
||
593 | |||
594 | if (depth_irb && depth_mt) { |
||
595 | /* When 3DSTATE_DEPTH_BUFFER.Separate_Stencil_Enable is set, then |
||
596 | * 3DSTATE_DEPTH_BUFFER.Surface_Format is not permitted to be a packed |
||
597 | * depthstencil format. |
||
598 | * |
||
599 | * Gens prior to 7 require that HiZ_Enable and Separate_Stencil_Enable be |
||
600 | * set to the same value. Gens after 7 implicitly always set |
||
601 | * Separate_Stencil_Enable; software cannot disable it. |
||
602 | */ |
||
603 | if ((brw->gen < 7 && hiz) || brw->gen >= 7) { |
||
604 | assert(!_mesa_is_format_packed_depth_stencil(depth_mt->format)); |
||
605 | } |
||
606 | |||
607 | /* Prior to Gen7, if using separate stencil, hiz must be enabled. */ |
||
608 | assert(brw->gen >= 7 || !separate_stencil || hiz); |
||
609 | |||
610 | assert(brw->gen < 6 || depth_mt->region->tiling == I915_TILING_Y); |
||
611 | assert(!hiz || depth_mt->region->tiling == I915_TILING_Y); |
||
612 | |||
613 | depthbuffer_format = brw_depthbuffer_format(brw); |
||
614 | depth_surface_type = BRW_SURFACE_2D; |
||
615 | depth_offset = brw->depthstencil.depth_offset; |
||
616 | width = depth_irb->Base.Base.Width; |
||
617 | height = depth_irb->Base.Base.Height; |
||
618 | } else if (separate_stencil) { |
||
619 | /* |
||
620 | * There exists a separate stencil buffer but no depth buffer. |
||
621 | * |
||
622 | * The stencil buffer inherits most of its fields from |
||
623 | * 3DSTATE_DEPTH_BUFFER: namely the tile walk, surface type, width, and |
||
624 | * height. |
||
625 | * |
||
626 | * The tiled bit must be set. From the Sandybridge PRM, Volume 2, Part 1, |
||
627 | * Section 7.5.5.1.1 3DSTATE_DEPTH_BUFFER, Bit 1.27 Tiled Surface: |
||
628 | * [DevGT+]: This field must be set to TRUE. |
||
629 | */ |
||
630 | assert(brw->has_separate_stencil); |
||
631 | |||
632 | depth_surface_type = BRW_SURFACE_2D; |
||
633 | width = stencil_irb->Base.Base.Width; |
||
634 | height = stencil_irb->Base.Base.Height; |
||
635 | } |
||
636 | |||
637 | brw->vtbl.emit_depth_stencil_hiz(brw, depth_mt, depth_offset, |
||
638 | depthbuffer_format, depth_surface_type, |
||
639 | stencil_mt, hiz, separate_stencil, |
||
640 | width, height, tile_x, tile_y); |
||
641 | } |
||
642 | |||
643 | void |
||
644 | brw_emit_depth_stencil_hiz(struct brw_context *brw, |
||
645 | struct intel_mipmap_tree *depth_mt, |
||
646 | uint32_t depth_offset, uint32_t depthbuffer_format, |
||
647 | uint32_t depth_surface_type, |
||
648 | struct intel_mipmap_tree *stencil_mt, |
||
649 | bool hiz, bool separate_stencil, |
||
650 | uint32_t width, uint32_t height, |
||
651 | uint32_t tile_x, uint32_t tile_y) |
||
652 | { |
||
653 | /* Enable the hiz bit if we're doing separate stencil, because it and the |
||
654 | * separate stencil bit must have the same value. From Section 2.11.5.6.1.1 |
||
655 | * 3DSTATE_DEPTH_BUFFER, Bit 1.21 "Separate Stencil Enable": |
||
656 | * [DevIL]: If this field is enabled, Hierarchical Depth Buffer |
||
657 | * Enable must also be enabled. |
||
658 | * |
||
659 | * [DevGT]: This field must be set to the same value (enabled or |
||
660 | * disabled) as Hierarchical Depth Buffer Enable |
||
661 | */ |
||
662 | bool enable_hiz_ss = hiz || separate_stencil; |
||
663 | |||
664 | |||
665 | /* 3DSTATE_DEPTH_BUFFER, 3DSTATE_STENCIL_BUFFER are both |
||
666 | * non-pipelined state that will need the PIPE_CONTROL workaround. |
||
667 | */ |
||
668 | if (brw->gen == 6) { |
||
669 | intel_emit_post_sync_nonzero_flush(brw); |
||
670 | intel_emit_depth_stall_flushes(brw); |
||
671 | } |
||
672 | |||
673 | unsigned int len; |
||
674 | if (brw->gen >= 6) |
||
675 | len = 7; |
||
676 | else if (brw->is_g4x || brw->gen == 5) |
||
677 | len = 6; |
||
678 | else |
||
679 | len = 5; |
||
680 | |||
681 | BEGIN_BATCH(len); |
||
682 | OUT_BATCH(_3DSTATE_DEPTH_BUFFER << 16 | (len - 2)); |
||
683 | OUT_BATCH((depth_mt ? depth_mt->region->pitch - 1 : 0) | |
||
684 | (depthbuffer_format << 18) | |
||
685 | ((enable_hiz_ss ? 1 : 0) << 21) | /* separate stencil enable */ |
||
686 | ((enable_hiz_ss ? 1 : 0) << 22) | /* hiz enable */ |
||
687 | (BRW_TILEWALK_YMAJOR << 26) | |
||
688 | ((depth_mt ? depth_mt->region->tiling != I915_TILING_NONE : 1) |
||
689 | << 27) | |
||
690 | (depth_surface_type << 29)); |
||
691 | |||
692 | if (depth_mt) { |
||
693 | OUT_RELOC(depth_mt->region->bo, |
||
694 | I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, |
||
695 | depth_offset); |
||
696 | } else { |
||
697 | OUT_BATCH(0); |
||
698 | } |
||
699 | |||
700 | OUT_BATCH(((width + tile_x - 1) << 6) | |
||
701 | ((height + tile_y - 1) << 19)); |
||
702 | OUT_BATCH(0); |
||
703 | |||
704 | if (brw->is_g4x || brw->gen >= 5) |
||
705 | OUT_BATCH(tile_x | (tile_y << 16)); |
||
706 | else |
||
707 | assert(tile_x == 0 && tile_y == 0); |
||
708 | |||
709 | if (brw->gen >= 6) |
||
710 | OUT_BATCH(0); |
||
711 | |||
712 | ADVANCE_BATCH(); |
||
713 | |||
714 | if (hiz || separate_stencil) { |
||
715 | /* |
||
716 | * In the 3DSTATE_DEPTH_BUFFER batch emitted above, the 'separate |
||
717 | * stencil enable' and 'hiz enable' bits were set. Therefore we must |
||
718 | * emit 3DSTATE_HIER_DEPTH_BUFFER and 3DSTATE_STENCIL_BUFFER. Even if |
||
719 | * there is no stencil buffer, 3DSTATE_STENCIL_BUFFER must be emitted; |
||
720 | * failure to do so causes hangs on gen5 and a stall on gen6. |
||
721 | */ |
||
722 | |||
723 | /* Emit hiz buffer. */ |
||
724 | if (hiz) { |
||
725 | struct intel_mipmap_tree *hiz_mt = depth_mt->hiz_mt; |
||
726 | BEGIN_BATCH(3); |
||
727 | OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); |
||
728 | OUT_BATCH(hiz_mt->region->pitch - 1); |
||
729 | OUT_RELOC(hiz_mt->region->bo, |
||
730 | I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, |
||
731 | brw->depthstencil.hiz_offset); |
||
732 | ADVANCE_BATCH(); |
||
733 | } else { |
||
734 | BEGIN_BATCH(3); |
||
735 | OUT_BATCH((_3DSTATE_HIER_DEPTH_BUFFER << 16) | (3 - 2)); |
||
736 | OUT_BATCH(0); |
||
737 | OUT_BATCH(0); |
||
738 | ADVANCE_BATCH(); |
||
739 | } |
||
740 | |||
741 | /* Emit stencil buffer. */ |
||
742 | if (separate_stencil) { |
||
743 | struct intel_region *region = stencil_mt->region; |
||
744 | |||
745 | BEGIN_BATCH(3); |
||
746 | OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); |
||
747 | /* The stencil buffer has quirky pitch requirements. From Vol 2a, |
||
748 | * 11.5.6.2.1 3DSTATE_STENCIL_BUFFER, field "Surface Pitch": |
||
749 | * The pitch must be set to 2x the value computed based on width, as |
||
750 | * the stencil buffer is stored with two rows interleaved. |
||
751 | */ |
||
752 | OUT_BATCH(2 * region->pitch - 1); |
||
753 | OUT_RELOC(region->bo, |
||
754 | I915_GEM_DOMAIN_RENDER, I915_GEM_DOMAIN_RENDER, |
||
755 | brw->depthstencil.stencil_offset); |
||
756 | ADVANCE_BATCH(); |
||
757 | } else { |
||
758 | BEGIN_BATCH(3); |
||
759 | OUT_BATCH((_3DSTATE_STENCIL_BUFFER << 16) | (3 - 2)); |
||
760 | OUT_BATCH(0); |
||
761 | OUT_BATCH(0); |
||
762 | ADVANCE_BATCH(); |
||
763 | } |
||
764 | } |
||
765 | |||
766 | /* |
||
767 | * On Gen >= 6, emit clear params for safety. If using hiz, then clear |
||
768 | * params must be emitted. |
||
769 | * |
||
770 | * From Section 2.11.5.6.4.1 3DSTATE_CLEAR_PARAMS: |
||
771 | * 3DSTATE_CLEAR_PARAMS packet must follow the DEPTH_BUFFER_STATE packet |
||
772 | * when HiZ is enabled and the DEPTH_BUFFER_STATE changes. |
||
773 | */ |
||
774 | if (brw->gen >= 6 || hiz) { |
||
775 | if (brw->gen == 6) |
||
776 | intel_emit_post_sync_nonzero_flush(brw); |
||
777 | |||
778 | BEGIN_BATCH(2); |
||
779 | OUT_BATCH(_3DSTATE_CLEAR_PARAMS << 16 | |
||
780 | GEN5_DEPTH_CLEAR_VALID | |
||
781 | (2 - 2)); |
||
782 | OUT_BATCH(depth_mt ? depth_mt->depth_clear_value : 0); |
||
783 | ADVANCE_BATCH(); |
||
784 | } |
||
785 | } |
||
786 | |||
787 | const struct brw_tracked_state brw_depthbuffer = { |
||
788 | .dirty = { |
||
789 | .mesa = _NEW_BUFFERS, |
||
790 | .brw = BRW_NEW_BATCH, |
||
791 | .cache = 0, |
||
792 | }, |
||
793 | .emit = brw_emit_depthbuffer, |
||
794 | }; |
||
795 | |||
796 | |||
797 | |||
798 | /*********************************************************************** |
||
799 | * Polygon stipple packet |
||
800 | */ |
||
801 | |||
802 | static void upload_polygon_stipple(struct brw_context *brw) |
||
803 | { |
||
804 | struct gl_context *ctx = &brw->ctx; |
||
805 | GLuint i; |
||
806 | |||
807 | /* _NEW_POLYGON */ |
||
808 | if (!ctx->Polygon.StippleFlag) |
||
809 | return; |
||
810 | |||
811 | if (brw->gen == 6) |
||
812 | intel_emit_post_sync_nonzero_flush(brw); |
||
813 | |||
814 | BEGIN_BATCH(33); |
||
815 | OUT_BATCH(_3DSTATE_POLY_STIPPLE_PATTERN << 16 | (33 - 2)); |
||
816 | |||
817 | /* Polygon stipple is provided in OpenGL order, i.e. bottom |
||
818 | * row first. If we're rendering to a window (i.e. the |
||
819 | * default frame buffer object, 0), then we need to invert |
||
820 | * it to match our pixel layout. But if we're rendering |
||
821 | * to a FBO (i.e. any named frame buffer object), we *don't* |
||
822 | * need to invert - we already match the layout. |
||
823 | */ |
||
824 | if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) { |
||
825 | for (i = 0; i < 32; i++) |
||
826 | OUT_BATCH(ctx->PolygonStipple[31 - i]); /* invert */ |
||
827 | } |
||
828 | else { |
||
829 | for (i = 0; i < 32; i++) |
||
830 | OUT_BATCH(ctx->PolygonStipple[i]); |
||
831 | } |
||
832 | CACHED_BATCH(); |
||
833 | } |
||
834 | |||
835 | const struct brw_tracked_state brw_polygon_stipple = { |
||
836 | .dirty = { |
||
837 | .mesa = (_NEW_POLYGONSTIPPLE | |
||
838 | _NEW_POLYGON), |
||
839 | .brw = BRW_NEW_CONTEXT, |
||
840 | .cache = 0 |
||
841 | }, |
||
842 | .emit = upload_polygon_stipple |
||
843 | }; |
||
844 | |||
845 | |||
846 | /*********************************************************************** |
||
847 | * Polygon stipple offset packet |
||
848 | */ |
||
849 | |||
850 | static void upload_polygon_stipple_offset(struct brw_context *brw) |
||
851 | { |
||
852 | struct gl_context *ctx = &brw->ctx; |
||
853 | |||
854 | /* _NEW_POLYGON */ |
||
855 | if (!ctx->Polygon.StippleFlag) |
||
856 | return; |
||
857 | |||
858 | if (brw->gen == 6) |
||
859 | intel_emit_post_sync_nonzero_flush(brw); |
||
860 | |||
861 | BEGIN_BATCH(2); |
||
862 | OUT_BATCH(_3DSTATE_POLY_STIPPLE_OFFSET << 16 | (2-2)); |
||
863 | |||
864 | /* _NEW_BUFFERS |
||
865 | * |
||
866 | * If we're drawing to a system window we have to invert the Y axis |
||
867 | * in order to match the OpenGL pixel coordinate system, and our |
||
868 | * offset must be matched to the window position. If we're drawing |
||
869 | * to a user-created FBO then our native pixel coordinate system |
||
870 | * works just fine, and there's no window system to worry about. |
||
871 | */ |
||
872 | if (_mesa_is_winsys_fbo(ctx->DrawBuffer)) |
||
873 | OUT_BATCH((32 - (ctx->DrawBuffer->Height & 31)) & 31); |
||
874 | else |
||
875 | OUT_BATCH(0); |
||
876 | CACHED_BATCH(); |
||
877 | } |
||
878 | |||
879 | const struct brw_tracked_state brw_polygon_stipple_offset = { |
||
880 | .dirty = { |
||
881 | .mesa = (_NEW_BUFFERS | |
||
882 | _NEW_POLYGON), |
||
883 | .brw = BRW_NEW_CONTEXT, |
||
884 | .cache = 0 |
||
885 | }, |
||
886 | .emit = upload_polygon_stipple_offset |
||
887 | }; |
||
888 | |||
889 | /********************************************************************** |
||
890 | * AA Line parameters |
||
891 | */ |
||
892 | static void upload_aa_line_parameters(struct brw_context *brw) |
||
893 | { |
||
894 | struct gl_context *ctx = &brw->ctx; |
||
895 | |||
896 | if (!ctx->Line.SmoothFlag || !brw->has_aa_line_parameters) |
||
897 | return; |
||
898 | |||
899 | if (brw->gen == 6) |
||
900 | intel_emit_post_sync_nonzero_flush(brw); |
||
901 | |||
902 | OUT_BATCH(_3DSTATE_AA_LINE_PARAMETERS << 16 | (3 - 2)); |
||
903 | /* use legacy aa line coverage computation */ |
||
904 | OUT_BATCH(0); |
||
905 | OUT_BATCH(0); |
||
906 | CACHED_BATCH(); |
||
907 | } |
||
908 | |||
909 | const struct brw_tracked_state brw_aa_line_parameters = { |
||
910 | .dirty = { |
||
911 | .mesa = _NEW_LINE, |
||
912 | .brw = BRW_NEW_CONTEXT, |
||
913 | .cache = 0 |
||
914 | }, |
||
915 | .emit = upload_aa_line_parameters |
||
916 | }; |
||
917 | |||
918 | /*********************************************************************** |
||
919 | * Line stipple packet |
||
920 | */ |
||
921 | |||
922 | static void upload_line_stipple(struct brw_context *brw) |
||
923 | { |
||
924 | struct gl_context *ctx = &brw->ctx; |
||
925 | GLfloat tmp; |
||
926 | GLint tmpi; |
||
927 | |||
928 | if (!ctx->Line.StippleFlag) |
||
929 | return; |
||
930 | |||
931 | if (brw->gen == 6) |
||
932 | intel_emit_post_sync_nonzero_flush(brw); |
||
933 | |||
934 | BEGIN_BATCH(3); |
||
935 | OUT_BATCH(_3DSTATE_LINE_STIPPLE_PATTERN << 16 | (3 - 2)); |
||
936 | OUT_BATCH(ctx->Line.StipplePattern); |
||
937 | |||
938 | if (brw->gen >= 7) { |
||
939 | /* in U1.16 */ |
||
940 | tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor; |
||
941 | tmpi = tmp * (1<<16); |
||
942 | OUT_BATCH(tmpi << 15 | ctx->Line.StippleFactor); |
||
943 | } |
||
944 | else { |
||
945 | /* in U1.13 */ |
||
946 | tmp = 1.0 / (GLfloat) ctx->Line.StippleFactor; |
||
947 | tmpi = tmp * (1<<13); |
||
948 | OUT_BATCH(tmpi << 16 | ctx->Line.StippleFactor); |
||
949 | } |
||
950 | |||
951 | CACHED_BATCH(); |
||
952 | } |
||
953 | |||
954 | const struct brw_tracked_state brw_line_stipple = { |
||
955 | .dirty = { |
||
956 | .mesa = _NEW_LINE, |
||
957 | .brw = BRW_NEW_CONTEXT, |
||
958 | .cache = 0 |
||
959 | }, |
||
960 | .emit = upload_line_stipple |
||
961 | }; |
||
962 | |||
963 | |||
964 | /*********************************************************************** |
||
965 | * Misc invariant state packets |
||
966 | */ |
||
967 | |||
968 | void |
||
969 | brw_upload_invariant_state(struct brw_context *brw) |
||
970 | { |
||
971 | /* 3DSTATE_SIP, 3DSTATE_MULTISAMPLE, etc. are nonpipelined. */ |
||
972 | if (brw->gen == 6) |
||
973 | intel_emit_post_sync_nonzero_flush(brw); |
||
974 | |||
975 | /* Select the 3D pipeline (as opposed to media) */ |
||
976 | BEGIN_BATCH(1); |
||
977 | OUT_BATCH(brw->CMD_PIPELINE_SELECT << 16 | 0); |
||
978 | ADVANCE_BATCH(); |
||
979 | |||
980 | if (brw->gen < 6) { |
||
981 | /* Disable depth offset clamping. */ |
||
982 | BEGIN_BATCH(2); |
||
983 | OUT_BATCH(_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP << 16 | (2 - 2)); |
||
984 | OUT_BATCH_F(0.0); |
||
985 | ADVANCE_BATCH(); |
||
986 | } |
||
987 | |||
988 | BEGIN_BATCH(2); |
||
989 | OUT_BATCH(CMD_STATE_SIP << 16 | (2 - 2)); |
||
990 | OUT_BATCH(0); |
||
991 | ADVANCE_BATCH(); |
||
992 | |||
993 | BEGIN_BATCH(1); |
||
994 | OUT_BATCH(brw->CMD_VF_STATISTICS << 16 | |
||
995 | (unlikely(INTEL_DEBUG & DEBUG_STATS) ? 1 : 0)); |
||
996 | ADVANCE_BATCH(); |
||
997 | } |
||
998 | |||
999 | const struct brw_tracked_state brw_invariant_state = { |
||
1000 | .dirty = { |
||
1001 | .mesa = 0, |
||
1002 | .brw = BRW_NEW_CONTEXT, |
||
1003 | .cache = 0 |
||
1004 | }, |
||
1005 | .emit = brw_upload_invariant_state |
||
1006 | }; |
||
1007 | |||
1008 | /** |
||
1009 | * Define the base addresses which some state is referenced from. |
||
1010 | * |
||
1011 | * This allows us to avoid having to emit relocations for the objects, |
||
1012 | * and is actually required for binding table pointers on gen6. |
||
1013 | * |
||
1014 | * Surface state base address covers binding table pointers and |
||
1015 | * surface state objects, but not the surfaces that the surface state |
||
1016 | * objects point to. |
||
1017 | */ |
||
1018 | static void upload_state_base_address( struct brw_context *brw ) |
||
1019 | { |
||
1020 | /* FINISHME: According to section 3.6.1 "STATE_BASE_ADDRESS" of |
||
1021 | * vol1a of the G45 PRM, MI_FLUSH with the ISC invalidate should be |
||
1022 | * programmed prior to STATE_BASE_ADDRESS. |
||
1023 | * |
||
1024 | * However, given that the instruction SBA (general state base |
||
1025 | * address) on this chipset is always set to 0 across X and GL, |
||
1026 | * maybe this isn't required for us in particular. |
||
1027 | */ |
||
1028 | |||
1029 | if (brw->gen >= 6) { |
||
1030 | if (brw->gen == 6) |
||
1031 | intel_emit_post_sync_nonzero_flush(brw); |
||
1032 | |||
1033 | BEGIN_BATCH(10); |
||
1034 | OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (10 - 2)); |
||
1035 | /* General state base address: stateless DP read/write requests */ |
||
1036 | OUT_BATCH(1); |
||
1037 | /* Surface state base address: |
||
1038 | * BINDING_TABLE_STATE |
||
1039 | * SURFACE_STATE |
||
1040 | */ |
||
1041 | OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, 1); |
||
1042 | /* Dynamic state base address: |
||
1043 | * SAMPLER_STATE |
||
1044 | * SAMPLER_BORDER_COLOR_STATE |
||
1045 | * CLIP, SF, WM/CC viewport state |
||
1046 | * COLOR_CALC_STATE |
||
1047 | * DEPTH_STENCIL_STATE |
||
1048 | * BLEND_STATE |
||
1049 | * Push constants (when INSTPM: CONSTANT_BUFFER Address Offset |
||
1050 | * Disable is clear, which we rely on) |
||
1051 | */ |
||
1052 | OUT_RELOC(brw->batch.bo, (I915_GEM_DOMAIN_RENDER | |
||
1053 | I915_GEM_DOMAIN_INSTRUCTION), 0, 1); |
||
1054 | |||
1055 | OUT_BATCH(1); /* Indirect object base address: MEDIA_OBJECT data */ |
||
1056 | OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, |
||
1057 | 1); /* Instruction base address: shader kernels (incl. SIP) */ |
||
1058 | |||
1059 | OUT_BATCH(1); /* General state upper bound */ |
||
1060 | /* Dynamic state upper bound. Although the documentation says that |
||
1061 | * programming it to zero will cause it to be ignored, that is a lie. |
||
1062 | * If this isn't programmed to a real bound, the sampler border color |
||
1063 | * pointer is rejected, causing border color to mysteriously fail. |
||
1064 | */ |
||
1065 | OUT_BATCH(0xfffff001); |
||
1066 | OUT_BATCH(1); /* Indirect object upper bound */ |
||
1067 | OUT_BATCH(1); /* Instruction access upper bound */ |
||
1068 | ADVANCE_BATCH(); |
||
1069 | } else if (brw->gen == 5) { |
||
1070 | BEGIN_BATCH(8); |
||
1071 | OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (8 - 2)); |
||
1072 | OUT_BATCH(1); /* General state base address */ |
||
1073 | OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, |
||
1074 | 1); /* Surface state base address */ |
||
1075 | OUT_BATCH(1); /* Indirect object base address */ |
||
1076 | OUT_RELOC(brw->cache.bo, I915_GEM_DOMAIN_INSTRUCTION, 0, |
||
1077 | 1); /* Instruction base address */ |
||
1078 | OUT_BATCH(0xfffff001); /* General state upper bound */ |
||
1079 | OUT_BATCH(1); /* Indirect object upper bound */ |
||
1080 | OUT_BATCH(1); /* Instruction access upper bound */ |
||
1081 | ADVANCE_BATCH(); |
||
1082 | } else { |
||
1083 | BEGIN_BATCH(6); |
||
1084 | OUT_BATCH(CMD_STATE_BASE_ADDRESS << 16 | (6 - 2)); |
||
1085 | OUT_BATCH(1); /* General state base address */ |
||
1086 | OUT_RELOC(brw->batch.bo, I915_GEM_DOMAIN_SAMPLER, 0, |
||
1087 | 1); /* Surface state base address */ |
||
1088 | OUT_BATCH(1); /* Indirect object base address */ |
||
1089 | OUT_BATCH(1); /* General state upper bound */ |
||
1090 | OUT_BATCH(1); /* Indirect object upper bound */ |
||
1091 | ADVANCE_BATCH(); |
||
1092 | } |
||
1093 | |||
1094 | /* According to section 3.6.1 of VOL1 of the 965 PRM, |
||
1095 | * STATE_BASE_ADDRESS updates require a reissue of: |
||
1096 | * |
||
1097 | * 3DSTATE_PIPELINE_POINTERS |
||
1098 | * 3DSTATE_BINDING_TABLE_POINTERS |
||
1099 | * MEDIA_STATE_POINTERS |
||
1100 | * |
||
1101 | * and this continues through Ironlake. The Sandy Bridge PRM, vol |
||
1102 | * 1 part 1 says that the folowing packets must be reissued: |
||
1103 | * |
||
1104 | * 3DSTATE_CC_POINTERS |
||
1105 | * 3DSTATE_BINDING_TABLE_POINTERS |
||
1106 | * 3DSTATE_SAMPLER_STATE_POINTERS |
||
1107 | * 3DSTATE_VIEWPORT_STATE_POINTERS |
||
1108 | * MEDIA_STATE_POINTERS |
||
1109 | * |
||
1110 | * Those are always reissued following SBA updates anyway (new |
||
1111 | * batch time), except in the case of the program cache BO |
||
1112 | * changing. Having a separate state flag makes the sequence more |
||
1113 | * obvious. |
||
1114 | */ |
||
1115 | |||
1116 | brw->state.dirty.brw |= BRW_NEW_STATE_BASE_ADDRESS; |
||
1117 | } |
||
1118 | |||
1119 | const struct brw_tracked_state brw_state_base_address = { |
||
1120 | .dirty = { |
||
1121 | .mesa = 0, |
||
1122 | .brw = (BRW_NEW_BATCH | |
||
1123 | BRW_NEW_PROGRAM_CACHE), |
||
1124 | .cache = 0, |
||
1125 | }, |
||
1126 | .emit = upload_state_base_address |
||
1127 | };><>><>><>><>><>><>>><>><>13); |