Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | /* |
2 | Copyright (C) Intel Corp. 2006. All Rights Reserved. |
||
3 | Intel funded Tungsten Graphics (http://www.tungstengraphics.com) to |
||
4 | develop this 3D driver. |
||
5 | |||
6 | Permission is hereby granted, free of charge, to any person obtaining |
||
7 | a copy of this software and associated documentation files (the |
||
8 | "Software"), to deal in the Software without restriction, including |
||
9 | without limitation the rights to use, copy, modify, merge, publish, |
||
10 | distribute, sublicense, and/or sell copies of the Software, and to |
||
11 | permit persons to whom the Software is furnished to do so, subject to |
||
12 | the following conditions: |
||
13 | |||
14 | The above copyright notice and this permission notice (including the |
||
15 | next paragraph) shall be included in all copies or substantial |
||
16 | portions of the Software. |
||
17 | |||
18 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, |
||
19 | EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
20 | MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. |
||
21 | IN NO EVENT SHALL THE COPYRIGHT OWNER(S) AND/OR ITS SUPPLIERS BE |
||
22 | LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION |
||
23 | OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION |
||
24 | WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
25 | |||
26 | **********************************************************************/ |
||
27 | /* |
||
28 | * Authors: |
||
29 | * Keith Whitwell |
||
30 | */ |
||
31 | |||
32 | |||
33 | #include "main/glheader.h" |
||
34 | #include "main/macros.h" |
||
35 | #include "main/enums.h" |
||
36 | |||
37 | #include "program/program.h" |
||
38 | #include "intel_batchbuffer.h" |
||
39 | |||
40 | #include "brw_defines.h" |
||
41 | #include "brw_context.h" |
||
42 | #include "brw_eu.h" |
||
43 | #include "brw_gs.h" |
||
44 | |||
45 | /** |
||
46 | * Allocate registers for GS. |
||
47 | * |
||
48 | * If sol_program is true, then: |
||
49 | * |
||
50 | * - The thread will be spawned with the "SVBI Payload Enable" bit set, so GRF |
||
51 | * 1 needs to be set aside to hold the streamed vertex buffer indices. |
||
52 | * |
||
53 | * - The thread will need to use the destination_indices register. |
||
54 | */ |
||
55 | static void brw_gs_alloc_regs( struct brw_gs_compile *c, |
||
56 | GLuint nr_verts, |
||
57 | bool sol_program ) |
||
58 | { |
||
59 | GLuint i = 0,j; |
||
60 | |||
61 | /* Register usage is static, precompute here: |
||
62 | */ |
||
63 | c->reg.R0 = retype(brw_vec8_grf(i, 0), BRW_REGISTER_TYPE_UD); i++; |
||
64 | |||
65 | /* Streamed vertex buffer indices */ |
||
66 | if (sol_program) |
||
67 | c->reg.SVBI = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD); |
||
68 | |||
69 | /* Payload vertices plus space for more generated vertices: |
||
70 | */ |
||
71 | for (j = 0; j < nr_verts; j++) { |
||
72 | c->reg.vertex[j] = brw_vec4_grf(i, 0); |
||
73 | i += c->nr_regs; |
||
74 | } |
||
75 | |||
76 | c->reg.header = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD); |
||
77 | c->reg.temp = retype(brw_vec8_grf(i++, 0), BRW_REGISTER_TYPE_UD); |
||
78 | |||
79 | if (sol_program) { |
||
80 | c->reg.destination_indices = |
||
81 | retype(brw_vec4_grf(i++, 0), BRW_REGISTER_TYPE_UD); |
||
82 | } |
||
83 | |||
84 | c->prog_data.urb_read_length = c->nr_regs; |
||
85 | c->prog_data.total_grf = i; |
||
86 | } |
||
87 | |||
88 | |||
89 | /** |
||
90 | * Set up the initial value of c->reg.header register based on c->reg.R0. |
||
91 | * |
||
92 | * The following information is passed to the GS thread in R0, and needs to be |
||
93 | * included in the first URB_WRITE or FF_SYNC message sent by the GS: |
||
94 | * |
||
95 | * - DWORD 0 [31:0] handle info (Gen4 only) |
||
96 | * - DWORD 5 [7:0] FFTID |
||
97 | * - DWORD 6 [31:0] Debug info |
||
98 | * - DWORD 7 [31:0] Debug info |
||
99 | * |
||
100 | * This function sets up the above data by copying by copying the contents of |
||
101 | * R0 to the header register. |
||
102 | */ |
||
103 | static void brw_gs_initialize_header(struct brw_gs_compile *c) |
||
104 | { |
||
105 | struct brw_compile *p = &c->func; |
||
106 | brw_MOV(p, c->reg.header, c->reg.R0); |
||
107 | } |
||
108 | |||
109 | /** |
||
110 | * Overwrite DWORD 2 of c->reg.header with the given immediate unsigned value. |
||
111 | * |
||
112 | * In URB_WRITE messages, DWORD 2 contains the fields PrimType, PrimStart, |
||
113 | * PrimEnd, Increment CL_INVOCATIONS, and SONumPrimsWritten, many of which we |
||
114 | * need to be able to update on a per-vertex basis. |
||
115 | */ |
||
116 | static void brw_gs_overwrite_header_dw2(struct brw_gs_compile *c, |
||
117 | unsigned dw2) |
||
118 | { |
||
119 | struct brw_compile *p = &c->func; |
||
120 | brw_MOV(p, get_element_ud(c->reg.header, 2), brw_imm_ud(dw2)); |
||
121 | } |
||
122 | |||
123 | /** |
||
124 | * Overwrite DWORD 2 of c->reg.header with the primitive type from c->reg.R0. |
||
125 | * |
||
126 | * When the thread is spawned, GRF 0 contains the primitive type in bits 4:0 |
||
127 | * of DWORD 2. URB_WRITE messages need the primitive type in bits 6:2 of |
||
128 | * DWORD 2. So this function extracts the primitive type field, bitshifts it |
||
129 | * appropriately, and stores it in c->reg.header. |
||
130 | */ |
||
131 | static void brw_gs_overwrite_header_dw2_from_r0(struct brw_gs_compile *c) |
||
132 | { |
||
133 | struct brw_compile *p = &c->func; |
||
134 | brw_AND(p, get_element_ud(c->reg.header, 2), get_element_ud(c->reg.R0, 2), |
||
135 | brw_imm_ud(0x1f)); |
||
136 | brw_SHL(p, get_element_ud(c->reg.header, 2), |
||
137 | get_element_ud(c->reg.header, 2), brw_imm_ud(2)); |
||
138 | } |
||
139 | |||
140 | /** |
||
141 | * Apply an additive offset to DWORD 2 of c->reg.header. |
||
142 | * |
||
143 | * This is used to set/unset the "PrimStart" and "PrimEnd" flags appropriately |
||
144 | * for each vertex. |
||
145 | */ |
||
146 | static void brw_gs_offset_header_dw2(struct brw_gs_compile *c, int offset) |
||
147 | { |
||
148 | struct brw_compile *p = &c->func; |
||
149 | brw_ADD(p, get_element_d(c->reg.header, 2), get_element_d(c->reg.header, 2), |
||
150 | brw_imm_d(offset)); |
||
151 | } |
||
152 | |||
153 | |||
154 | /** |
||
155 | * Emit a vertex using the URB_WRITE message. Use the contents of |
||
156 | * c->reg.header for the message header, and the registers starting at \c vert |
||
157 | * for the vertex data. |
||
158 | * |
||
159 | * If \c last is true, then this is the last vertex, so no further URB space |
||
160 | * should be allocated, and this message should end the thread. |
||
161 | * |
||
162 | * If \c last is false, then a new URB entry will be allocated, and its handle |
||
163 | * will be stored in DWORD 0 of c->reg.header for use in the next URB_WRITE |
||
164 | * message. |
||
165 | */ |
||
166 | static void brw_gs_emit_vue(struct brw_gs_compile *c, |
||
167 | struct brw_reg vert, |
||
168 | bool last) |
||
169 | { |
||
170 | struct brw_compile *p = &c->func; |
||
171 | bool allocate = !last; |
||
172 | |||
173 | /* Copy the vertex from vertn into m1..mN+1: |
||
174 | */ |
||
175 | brw_copy8(p, brw_message_reg(1), vert, c->nr_regs); |
||
176 | |||
177 | /* Send each vertex as a seperate write to the urb. This is |
||
178 | * different to the concept in brw_sf_emit.c, where subsequent |
||
179 | * writes are used to build up a single urb entry. Each of these |
||
180 | * writes instantiates a seperate urb entry, and a new one must be |
||
181 | * allocated each time. |
||
182 | */ |
||
183 | brw_urb_WRITE(p, |
||
184 | allocate ? c->reg.temp |
||
185 | : retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), |
||
186 | 0, |
||
187 | c->reg.header, |
||
188 | allocate, |
||
189 | 1, /* used */ |
||
190 | c->nr_regs + 1, /* msg length */ |
||
191 | allocate ? 1 : 0, /* response length */ |
||
192 | allocate ? 0 : 1, /* eot */ |
||
193 | 1, /* writes_complete */ |
||
194 | 0, /* urb offset */ |
||
195 | BRW_URB_SWIZZLE_NONE); |
||
196 | |||
197 | if (allocate) { |
||
198 | brw_MOV(p, get_element_ud(c->reg.header, 0), |
||
199 | get_element_ud(c->reg.temp, 0)); |
||
200 | } |
||
201 | } |
||
202 | |||
203 | /** |
||
204 | * Send an FF_SYNC message to ensure that all previously spawned GS threads |
||
205 | * have finished sending primitives down the pipeline, and to allocate a URB |
||
206 | * entry for the first output vertex. Only needed on Ironlake+. |
||
207 | * |
||
208 | * This function modifies c->reg.header: in DWORD 1, it stores num_prim (which |
||
209 | * is needed by the FF_SYNC message), and in DWORD 0, it stores the handle to |
||
210 | * the allocated URB entry (which will be needed by the URB_WRITE meesage that |
||
211 | * follows). |
||
212 | */ |
||
213 | static void brw_gs_ff_sync(struct brw_gs_compile *c, int num_prim) |
||
214 | { |
||
215 | struct brw_compile *p = &c->func; |
||
216 | |||
217 | brw_MOV(p, get_element_ud(c->reg.header, 1), brw_imm_ud(num_prim)); |
||
218 | brw_ff_sync(p, |
||
219 | c->reg.temp, |
||
220 | 0, |
||
221 | c->reg.header, |
||
222 | 1, /* allocate */ |
||
223 | 1, /* response length */ |
||
224 | |||
225 | brw_MOV(p, get_element_ud(c->reg.header, 0), |
||
226 | get_element_ud(c->reg.temp, 0)); |
||
227 | } |
||
228 | |||
229 | |||
230 | void brw_gs_quads( struct brw_gs_compile *c, struct brw_gs_prog_key *key ) |
||
231 | { |
||
232 | struct brw_context *brw = c->func.brw; |
||
233 | |||
234 | brw_gs_alloc_regs(c, 4, false); |
||
235 | brw_gs_initialize_header(c); |
||
236 | /* Use polygons for correct edgeflag behaviour. Note that vertex 3 |
||
237 | * is the PV for quads, but vertex 0 for polygons: |
||
238 | */ |
||
239 | if (brw->gen == 5) |
||
240 | brw_gs_ff_sync(c, 1); |
||
241 | brw_gs_overwrite_header_dw2( |
||
242 | c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT) |
||
243 | | URB_WRITE_PRIM_START)); |
||
244 | if (key->pv_first) { |
||
245 | brw_gs_emit_vue(c, c->reg.vertex[0], 0); |
||
246 | brw_gs_overwrite_header_dw2( |
||
247 | c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT); |
||
248 | brw_gs_emit_vue(c, c->reg.vertex[1], 0); |
||
249 | brw_gs_emit_vue(c, c->reg.vertex[2], 0); |
||
250 | brw_gs_overwrite_header_dw2( |
||
251 | c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT) |
||
252 | | URB_WRITE_PRIM_END)); |
||
253 | brw_gs_emit_vue(c, c->reg.vertex[3], 1); |
||
254 | } |
||
255 | else { |
||
256 | brw_gs_emit_vue(c, c->reg.vertex[3], 0); |
||
257 | brw_gs_overwrite_header_dw2( |
||
258 | c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT); |
||
259 | brw_gs_emit_vue(c, c->reg.vertex[0], 0); |
||
260 | brw_gs_emit_vue(c, c->reg.vertex[1], 0); |
||
261 | brw_gs_overwrite_header_dw2( |
||
262 | c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT) |
||
263 | | URB_WRITE_PRIM_END)); |
||
264 | brw_gs_emit_vue(c, c->reg.vertex[2], 1); |
||
265 | } |
||
266 | } |
||
267 | |||
268 | void brw_gs_quad_strip( struct brw_gs_compile *c, struct brw_gs_prog_key *key ) |
||
269 | { |
||
270 | struct brw_context *brw = c->func.brw; |
||
271 | |||
272 | brw_gs_alloc_regs(c, 4, false); |
||
273 | brw_gs_initialize_header(c); |
||
274 | |||
275 | if (brw->gen == 5) |
||
276 | brw_gs_ff_sync(c, 1); |
||
277 | brw_gs_overwrite_header_dw2( |
||
278 | c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT) |
||
279 | | URB_WRITE_PRIM_START)); |
||
280 | if (key->pv_first) { |
||
281 | brw_gs_emit_vue(c, c->reg.vertex[0], 0); |
||
282 | brw_gs_overwrite_header_dw2( |
||
283 | c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT); |
||
284 | brw_gs_emit_vue(c, c->reg.vertex[1], 0); |
||
285 | brw_gs_emit_vue(c, c->reg.vertex[2], 0); |
||
286 | brw_gs_overwrite_header_dw2( |
||
287 | c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT) |
||
288 | | URB_WRITE_PRIM_END)); |
||
289 | brw_gs_emit_vue(c, c->reg.vertex[3], 1); |
||
290 | } |
||
291 | else { |
||
292 | brw_gs_emit_vue(c, c->reg.vertex[2], 0); |
||
293 | brw_gs_overwrite_header_dw2( |
||
294 | c, _3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT); |
||
295 | brw_gs_emit_vue(c, c->reg.vertex[3], 0); |
||
296 | brw_gs_emit_vue(c, c->reg.vertex[0], 0); |
||
297 | brw_gs_overwrite_header_dw2( |
||
298 | c, ((_3DPRIM_POLYGON << URB_WRITE_PRIM_TYPE_SHIFT) |
||
299 | | URB_WRITE_PRIM_END)); |
||
300 | brw_gs_emit_vue(c, c->reg.vertex[1], 1); |
||
301 | } |
||
302 | } |
||
303 | |||
304 | void brw_gs_lines( struct brw_gs_compile *c ) |
||
305 | { |
||
306 | struct brw_context *brw = c->func.brw; |
||
307 | |||
308 | brw_gs_alloc_regs(c, 2, false); |
||
309 | brw_gs_initialize_header(c); |
||
310 | |||
311 | if (brw->gen == 5) |
||
312 | brw_gs_ff_sync(c, 1); |
||
313 | brw_gs_overwrite_header_dw2( |
||
314 | c, ((_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT) |
||
315 | | URB_WRITE_PRIM_START)); |
||
316 | brw_gs_emit_vue(c, c->reg.vertex[0], 0); |
||
317 | brw_gs_overwrite_header_dw2( |
||
318 | c, ((_3DPRIM_LINESTRIP << URB_WRITE_PRIM_TYPE_SHIFT) |
||
319 | | URB_WRITE_PRIM_END)); |
||
320 | brw_gs_emit_vue(c, c->reg.vertex[1], 1); |
||
321 | } |
||
322 | |||
323 | /** |
||
324 | * Generate the geometry shader program used on Gen6 to perform stream output |
||
325 | * (transform feedback). |
||
326 | */ |
||
327 | void |
||
328 | gen6_sol_program(struct brw_gs_compile *c, struct brw_gs_prog_key *key, |
||
329 | unsigned num_verts, bool check_edge_flags) |
||
330 | { |
||
331 | struct brw_compile *p = &c->func; |
||
332 | c->prog_data.svbi_postincrement_value = num_verts; |
||
333 | |||
334 | brw_gs_alloc_regs(c, num_verts, true); |
||
335 | brw_gs_initialize_header(c); |
||
336 | |||
337 | if (key->num_transform_feedback_bindings > 0) { |
||
338 | unsigned vertex, binding; |
||
339 | struct brw_reg destination_indices_uw = |
||
340 | vec8(retype(c->reg.destination_indices, BRW_REGISTER_TYPE_UW)); |
||
341 | |||
342 | /* Note: since we use the binding table to keep track of buffer offsets |
||
343 | * and stride, the GS doesn't need to keep track of a separate pointer |
||
344 | * into each buffer; it uses a single pointer which increments by 1 for |
||
345 | * each vertex. So we use SVBI0 for this pointer, regardless of whether |
||
346 | * transform feedback is in interleaved or separate attribs mode. |
||
347 | * |
||
348 | * Make sure that the buffers have enough room for all the vertices. |
||
349 | */ |
||
350 | brw_ADD(p, get_element_ud(c->reg.temp, 0), |
||
351 | get_element_ud(c->reg.SVBI, 0), brw_imm_ud(num_verts)); |
||
352 | brw_CMP(p, vec1(brw_null_reg()), BRW_CONDITIONAL_LE, |
||
353 | get_element_ud(c->reg.temp, 0), |
||
354 | get_element_ud(c->reg.SVBI, 4)); |
||
355 | brw_IF(p, BRW_EXECUTE_1); |
||
356 | |||
357 | /* Compute the destination indices to write to. Usually we use SVBI[0] |
||
358 | * + (0, 1, 2). However, for odd-numbered triangles in tristrips, the |
||
359 | * vertices come down the pipeline in reversed winding order, so we need |
||
360 | * to flip the order when writing to the transform feedback buffer. To |
||
361 | * ensure that flatshading accuracy is preserved, we need to write them |
||
362 | * in order SVBI[0] + (0, 2, 1) if we're using the first provoking |
||
363 | * vertex convention, and in order SVBI[0] + (1, 0, 2) if we're using |
||
364 | * the last provoking vertex convention. |
||
365 | * |
||
366 | * Note: since brw_imm_v can only be used in instructions in |
||
367 | * packed-word execution mode, and SVBI is a double-word, we need to |
||
368 | * first move the appropriate immediate constant ((0, 1, 2), (0, 2, 1), |
||
369 | * or (1, 0, 2)) to the destination_indices register, and then add SVBI |
||
370 | * using a separate instruction. Also, since the immediate constant is |
||
371 | * expressed as packed words, and we need to load double-words into |
||
372 | * destination_indices, we need to intersperse zeros to fill the upper |
||
373 | * halves of each double-word. |
||
374 | */ |
||
375 | brw_MOV(p, destination_indices_uw, |
||
376 | brw_imm_v(0x00020100)); /* (0, 1, 2) */ |
||
377 | if (num_verts == 3) { |
||
378 | /* Get primitive type into temp register. */ |
||
379 | brw_AND(p, get_element_ud(c->reg.temp, 0), |
||
380 | get_element_ud(c->reg.R0, 2), brw_imm_ud(0x1f)); |
||
381 | |||
382 | /* Test if primitive type is TRISTRIP_REVERSE. We need to do this as |
||
383 | * an 8-wide comparison so that the conditional MOV that follows |
||
384 | * moves all 8 words correctly. |
||
385 | */ |
||
386 | brw_CMP(p, vec8(brw_null_reg()), BRW_CONDITIONAL_EQ, |
||
387 | get_element_ud(c->reg.temp, 0), |
||
388 | brw_imm_ud(_3DPRIM_TRISTRIP_REVERSE)); |
||
389 | |||
390 | /* If so, then overwrite destination_indices_uw with the appropriate |
||
391 | * reordering. |
||
392 | */ |
||
393 | brw_MOV(p, destination_indices_uw, |
||
394 | brw_imm_v(key->pv_first ? 0x00010200 /* (0, 2, 1) */ |
||
395 | : 0x00020001)); /* (1, 0, 2) */ |
||
396 | brw_set_predicate_control(p, BRW_PREDICATE_NONE); |
||
397 | } |
||
398 | brw_ADD(p, c->reg.destination_indices, |
||
399 | c->reg.destination_indices, get_element_ud(c->reg.SVBI, 0)); |
||
400 | |||
401 | /* For each vertex, generate code to output each varying using the |
||
402 | * appropriate binding table entry. |
||
403 | */ |
||
404 | for (vertex = 0; vertex < num_verts; ++vertex) { |
||
405 | /* Set up the correct destination index for this vertex */ |
||
406 | brw_MOV(p, get_element_ud(c->reg.header, 5), |
||
407 | get_element_ud(c->reg.destination_indices, vertex)); |
||
408 | |||
409 | for (binding = 0; binding < key->num_transform_feedback_bindings; |
||
410 | ++binding) { |
||
411 | unsigned char varying = |
||
412 | key->transform_feedback_bindings[binding]; |
||
413 | unsigned char slot = c->vue_map.varying_to_slot[varying]; |
||
414 | /* From the Sandybridge PRM, Volume 2, Part 1, Section 4.5.1: |
||
415 | * |
||
416 | * "Prior to End of Thread with a URB_WRITE, the kernel must |
||
417 | * ensure that all writes are complete by sending the final |
||
418 | * write as a committed write." |
||
419 | */ |
||
420 | bool final_write = |
||
421 | binding == key->num_transform_feedback_bindings - 1 && |
||
422 | vertex == num_verts - 1; |
||
423 | struct brw_reg vertex_slot = c->reg.vertex[vertex]; |
||
424 | vertex_slot.nr += slot / 2; |
||
425 | vertex_slot.subnr = (slot % 2) * 16; |
||
426 | /* gl_PointSize is stored in VARYING_SLOT_PSIZ.w. */ |
||
427 | vertex_slot.dw1.bits.swizzle = varying == VARYING_SLOT_PSIZ |
||
428 | ? BRW_SWIZZLE_WWWW : key->transform_feedback_swizzles[binding]; |
||
429 | brw_set_access_mode(p, BRW_ALIGN_16); |
||
430 | brw_MOV(p, stride(c->reg.header, 4, 4, 1), |
||
431 | retype(vertex_slot, BRW_REGISTER_TYPE_UD)); |
||
432 | brw_set_access_mode(p, BRW_ALIGN_1); |
||
433 | brw_svb_write(p, |
||
434 | final_write ? c->reg.temp : brw_null_reg(), /* dest */ |
||
435 | 1, /* msg_reg_nr */ |
||
436 | c->reg.header, /* src0 */ |
||
437 | SURF_INDEX_SOL_BINDING(binding), /* binding_table_index */ |
||
438 | final_write); /* send_commit_msg */ |
||
439 | } |
||
440 | } |
||
441 | brw_ENDIF(p); |
||
442 | |||
443 | /* Now, reinitialize the header register from R0 to restore the parts of |
||
444 | * the register that we overwrote while streaming out transform feedback |
||
445 | * data. |
||
446 | */ |
||
447 | brw_gs_initialize_header(c); |
||
448 | |||
449 | /* Finally, wait for the write commit to occur so that we can proceed to |
||
450 | * other things safely. |
||
451 | * |
||
452 | * From the Sandybridge PRM, Volume 4, Part 1, Section 3.3: |
||
453 | * |
||
454 | * The write commit does not modify the destination register, but |
||
455 | * merely clears the dependency associated with the destination |
||
456 | * register. Thus, a simple “mov” instruction using the register as a |
||
457 | * source is sufficient to wait for the write commit to occur. |
||
458 | */ |
||
459 | brw_MOV(p, c->reg.temp, c->reg.temp); |
||
460 | } |
||
461 | |||
462 | brw_gs_ff_sync(c, 1); |
||
463 | |||
464 | brw_gs_overwrite_header_dw2_from_r0(c); |
||
465 | switch (num_verts) { |
||
466 | case 1: |
||
467 | brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START | URB_WRITE_PRIM_END); |
||
468 | brw_gs_emit_vue(c, c->reg.vertex[0], true); |
||
469 | break; |
||
470 | case 2: |
||
471 | brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START); |
||
472 | brw_gs_emit_vue(c, c->reg.vertex[0], false); |
||
473 | brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_END - URB_WRITE_PRIM_START); |
||
474 | brw_gs_emit_vue(c, c->reg.vertex[1], true); |
||
475 | break; |
||
476 | case 3: |
||
477 | if (check_edge_flags) { |
||
478 | /* Only emit vertices 0 and 1 if this is the first triangle of the |
||
479 | * polygon. Otherwise they are redundant. |
||
480 | */ |
||
481 | brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); |
||
482 | brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), |
||
483 | get_element_ud(c->reg.R0, 2), |
||
484 | brw_imm_ud(BRW_GS_EDGE_INDICATOR_0)); |
||
485 | brw_IF(p, BRW_EXECUTE_1); |
||
486 | } |
||
487 | brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_START); |
||
488 | brw_gs_emit_vue(c, c->reg.vertex[0], false); |
||
489 | brw_gs_offset_header_dw2(c, -URB_WRITE_PRIM_START); |
||
490 | brw_gs_emit_vue(c, c->reg.vertex[1], false); |
||
491 | if (check_edge_flags) { |
||
492 | brw_ENDIF(p); |
||
493 | /* Only emit vertex 2 in PRIM_END mode if this is the last triangle |
||
494 | * of the polygon. Otherwise leave the primitive incomplete because |
||
495 | * there are more polygon vertices coming. |
||
496 | */ |
||
497 | brw_set_conditionalmod(p, BRW_CONDITIONAL_NZ); |
||
498 | brw_AND(p, retype(brw_null_reg(), BRW_REGISTER_TYPE_UD), |
||
499 | get_element_ud(c->reg.R0, 2), |
||
500 | brw_imm_ud(BRW_GS_EDGE_INDICATOR_1)); |
||
501 | brw_set_predicate_control(p, BRW_PREDICATE_NORMAL); |
||
502 | } |
||
503 | brw_gs_offset_header_dw2(c, URB_WRITE_PRIM_END); |
||
504 | brw_set_predicate_control(p, BRW_PREDICATE_NONE); |
||
505 | brw_gs_emit_vue(c, c->reg.vertex[2], true); |
||
506 | break; |
||
507 | } |
||
508 | }>>><>><>><>><>><>><>><>><>><>><>><>><>> |