Rev 4358 | Go to most recent revision | Details | Compare with Previous | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | /* |
2 | * Copyright © 2010 Intel Corporation |
||
3 | * |
||
4 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
5 | * copy of this software and associated documentation files (the "Software"), |
||
6 | * to deal in the Software without restriction, including without limitation |
||
7 | * the rights to use, copy, modify, merge, publish, distribute, sublicense, |
||
8 | * and/or sell copies of the Software, and to permit persons to whom the |
||
9 | * Software is furnished to do so, subject to the following conditions: |
||
10 | * |
||
11 | * The above copyright notice and this permission notice (including the next |
||
12 | * paragraph) shall be included in all copies or substantial portions of the |
||
13 | * Software. |
||
14 | * |
||
15 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR |
||
16 | * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, |
||
17 | * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL |
||
18 | * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER |
||
19 | * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING |
||
20 | * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS |
||
21 | * IN THE SOFTWARE. |
||
22 | */ |
||
23 | |||
24 | /** @file brw_fs_emit.cpp |
||
25 | * |
||
26 | * This file supports emitting code from the FS LIR to the actual |
||
27 | * native instructions. |
||
28 | */ |
||
29 | |||
30 | extern "C" { |
||
31 | #include "main/macros.h" |
||
32 | #include "brw_context.h" |
||
33 | #include "brw_eu.h" |
||
34 | } /* extern "C" */ |
||
35 | |||
36 | #include "brw_fs.h" |
||
37 | #include "brw_cfg.h" |
||
38 | |||
39 | fs_generator::fs_generator(struct brw_context *brw, |
||
40 | struct brw_wm_compile *c, |
||
41 | struct gl_shader_program *prog, |
||
42 | struct gl_fragment_program *fp, |
||
43 | bool dual_source_output) |
||
44 | |||
45 | : brw(brw), c(c), prog(prog), fp(fp), dual_source_output(dual_source_output) |
||
46 | { |
||
47 | ctx = &brw->ctx; |
||
48 | |||
49 | shader = prog ? prog->_LinkedShaders[MESA_SHADER_FRAGMENT] : NULL; |
||
50 | |||
51 | mem_ctx = c; |
||
52 | |||
53 | p = rzalloc(mem_ctx, struct brw_compile); |
||
54 | brw_init_compile(brw, p, mem_ctx); |
||
55 | } |
||
56 | |||
57 | fs_generator::~fs_generator() |
||
58 | { |
||
59 | } |
||
60 | |||
61 | void |
||
62 | fs_generator::patch_discard_jumps_to_fb_writes() |
||
63 | { |
||
64 | if (brw->gen < 6 || this->discard_halt_patches.is_empty()) |
||
65 | return; |
||
66 | |||
67 | /* There is a somewhat strange undocumented requirement of using |
||
68 | * HALT, according to the simulator. If some channel has HALTed to |
||
69 | * a particular UIP, then by the end of the program, every channel |
||
70 | * must have HALTed to that UIP. Furthermore, the tracking is a |
||
71 | * stack, so you can't do the final halt of a UIP after starting |
||
72 | * halting to a new UIP. |
||
73 | * |
||
74 | * Symptoms of not emitting this instruction on actual hardware |
||
75 | * included GPU hangs and sparkly rendering on the piglit discard |
||
76 | * tests. |
||
77 | */ |
||
78 | struct brw_instruction *last_halt = gen6_HALT(p); |
||
79 | last_halt->bits3.break_cont.uip = 2; |
||
80 | last_halt->bits3.break_cont.jip = 2; |
||
81 | |||
82 | int ip = p->nr_insn; |
||
83 | |||
84 | foreach_list(node, &this->discard_halt_patches) { |
||
85 | ip_record *patch_ip = (ip_record *)node; |
||
86 | struct brw_instruction *patch = &p->store[patch_ip->ip]; |
||
87 | |||
88 | assert(patch->header.opcode == BRW_OPCODE_HALT); |
||
89 | /* HALT takes a half-instruction distance from the pre-incremented IP. */ |
||
90 | patch->bits3.break_cont.uip = (ip - patch_ip->ip) * 2; |
||
91 | } |
||
92 | |||
93 | this->discard_halt_patches.make_empty(); |
||
94 | } |
||
95 | |||
96 | void |
||
97 | fs_generator::generate_fb_write(fs_inst *inst) |
||
98 | { |
||
99 | bool eot = inst->eot; |
||
100 | struct brw_reg implied_header; |
||
101 | uint32_t msg_control; |
||
102 | |||
103 | /* Header is 2 regs, g0 and g1 are the contents. g0 will be implied |
||
104 | * move, here's g1. |
||
105 | */ |
||
106 | brw_push_insn_state(p); |
||
107 | brw_set_mask_control(p, BRW_MASK_DISABLE); |
||
108 | brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
||
109 | |||
4401 | Serge | 110 | if (fp->UsesKill || c->key.alpha_test_func) { |
4358 | Serge | 111 | struct brw_reg pixel_mask; |
112 | |||
113 | if (brw->gen >= 6) |
||
114 | pixel_mask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW); |
||
115 | else |
||
116 | pixel_mask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); |
||
117 | |||
118 | brw_MOV(p, pixel_mask, brw_flag_reg(0, 1)); |
||
119 | } |
||
120 | |||
121 | if (inst->header_present) { |
||
122 | if (brw->gen >= 6) { |
||
123 | brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); |
||
124 | brw_MOV(p, |
||
125 | retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD), |
||
126 | retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); |
||
127 | brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
||
128 | |||
129 | if (inst->target > 0 && c->key.replicate_alpha) { |
||
130 | /* Set "Source0 Alpha Present to RenderTarget" bit in message |
||
131 | * header. |
||
132 | */ |
||
133 | brw_OR(p, |
||
134 | vec1(retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD)), |
||
135 | vec1(retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)), |
||
136 | brw_imm_ud(0x1 << 11)); |
||
137 | } |
||
138 | |||
139 | if (inst->target > 0) { |
||
140 | /* Set the render target index for choosing BLEND_STATE. */ |
||
141 | brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, |
||
142 | inst->base_mrf, 2), |
||
143 | BRW_REGISTER_TYPE_UD), |
||
144 | brw_imm_ud(inst->target)); |
||
145 | } |
||
146 | |||
147 | implied_header = brw_null_reg(); |
||
148 | } else { |
||
149 | implied_header = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW); |
||
150 | |||
151 | brw_MOV(p, |
||
152 | brw_message_reg(inst->base_mrf + 1), |
||
153 | brw_vec8_grf(1, 0)); |
||
154 | } |
||
155 | } else { |
||
156 | implied_header = brw_null_reg(); |
||
157 | } |
||
158 | |||
159 | if (this->dual_source_output) |
||
160 | msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01; |
||
161 | else if (dispatch_width == 16) |
||
162 | msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE; |
||
163 | else |
||
164 | msg_control = BRW_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01; |
||
165 | |||
166 | brw_pop_insn_state(p); |
||
167 | |||
168 | brw_fb_WRITE(p, |
||
169 | dispatch_width, |
||
170 | inst->base_mrf, |
||
171 | implied_header, |
||
172 | msg_control, |
||
173 | inst->target, |
||
174 | inst->mlen, |
||
175 | 0, |
||
176 | eot, |
||
177 | inst->header_present); |
||
178 | } |
||
179 | |||
180 | /* Computes the integer pixel x,y values from the origin. |
||
181 | * |
||
182 | * This is the basis of gl_FragCoord computation, but is also used |
||
183 | * pre-gen6 for computing the deltas from v0 for computing |
||
184 | * interpolation. |
||
185 | */ |
||
186 | void |
||
187 | fs_generator::generate_pixel_xy(struct brw_reg dst, bool is_x) |
||
188 | { |
||
189 | struct brw_reg g1_uw = retype(brw_vec1_grf(1, 0), BRW_REGISTER_TYPE_UW); |
||
190 | struct brw_reg src; |
||
191 | struct brw_reg deltas; |
||
192 | |||
193 | if (is_x) { |
||
194 | src = stride(suboffset(g1_uw, 4), 2, 4, 0); |
||
195 | deltas = brw_imm_v(0x10101010); |
||
196 | } else { |
||
197 | src = stride(suboffset(g1_uw, 5), 2, 4, 0); |
||
198 | deltas = brw_imm_v(0x11001100); |
||
199 | } |
||
200 | |||
201 | if (dispatch_width == 16) { |
||
202 | dst = vec16(dst); |
||
203 | } |
||
204 | |||
205 | /* We do this 8 or 16-wide, but since the destination is UW we |
||
206 | * don't do compression in the 16-wide case. |
||
207 | */ |
||
208 | brw_push_insn_state(p); |
||
209 | brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
||
210 | brw_ADD(p, dst, src, deltas); |
||
211 | brw_pop_insn_state(p); |
||
212 | } |
||
213 | |||
214 | void |
||
215 | fs_generator::generate_linterp(fs_inst *inst, |
||
216 | struct brw_reg dst, struct brw_reg *src) |
||
217 | { |
||
218 | struct brw_reg delta_x = src[0]; |
||
219 | struct brw_reg delta_y = src[1]; |
||
220 | struct brw_reg interp = src[2]; |
||
221 | |||
222 | if (brw->has_pln && |
||
223 | delta_y.nr == delta_x.nr + 1 && |
||
224 | (brw->gen >= 6 || (delta_x.nr & 1) == 0)) { |
||
225 | brw_PLN(p, dst, interp, delta_x); |
||
226 | } else { |
||
227 | brw_LINE(p, brw_null_reg(), interp, delta_x); |
||
228 | brw_MAC(p, dst, suboffset(interp, 1), delta_y); |
||
229 | } |
||
230 | } |
||
231 | |||
232 | void |
||
233 | fs_generator::generate_math1_gen7(fs_inst *inst, |
||
234 | struct brw_reg dst, |
||
235 | struct brw_reg src0) |
||
236 | { |
||
237 | assert(inst->mlen == 0); |
||
238 | brw_math(p, dst, |
||
239 | brw_math_function(inst->opcode), |
||
240 | 0, src0, |
||
241 | BRW_MATH_DATA_VECTOR, |
||
242 | BRW_MATH_PRECISION_FULL); |
||
243 | } |
||
244 | |||
245 | void |
||
246 | fs_generator::generate_math2_gen7(fs_inst *inst, |
||
247 | struct brw_reg dst, |
||
248 | struct brw_reg src0, |
||
249 | struct brw_reg src1) |
||
250 | { |
||
251 | assert(inst->mlen == 0); |
||
252 | brw_math2(p, dst, brw_math_function(inst->opcode), src0, src1); |
||
253 | } |
||
254 | |||
255 | void |
||
256 | fs_generator::generate_math1_gen6(fs_inst *inst, |
||
257 | struct brw_reg dst, |
||
258 | struct brw_reg src0) |
||
259 | { |
||
260 | int op = brw_math_function(inst->opcode); |
||
261 | |||
262 | assert(inst->mlen == 0); |
||
263 | |||
264 | brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
||
265 | brw_math(p, dst, |
||
266 | op, |
||
267 | 0, src0, |
||
268 | BRW_MATH_DATA_VECTOR, |
||
269 | BRW_MATH_PRECISION_FULL); |
||
270 | |||
271 | if (dispatch_width == 16) { |
||
272 | brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); |
||
273 | brw_math(p, sechalf(dst), |
||
274 | op, |
||
275 | 0, sechalf(src0), |
||
276 | BRW_MATH_DATA_VECTOR, |
||
277 | BRW_MATH_PRECISION_FULL); |
||
278 | brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); |
||
279 | } |
||
280 | } |
||
281 | |||
282 | void |
||
283 | fs_generator::generate_math2_gen6(fs_inst *inst, |
||
284 | struct brw_reg dst, |
||
285 | struct brw_reg src0, |
||
286 | struct brw_reg src1) |
||
287 | { |
||
288 | int op = brw_math_function(inst->opcode); |
||
289 | |||
290 | assert(inst->mlen == 0); |
||
291 | |||
292 | brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
||
293 | brw_math2(p, dst, op, src0, src1); |
||
294 | |||
295 | if (dispatch_width == 16) { |
||
296 | brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); |
||
297 | brw_math2(p, sechalf(dst), op, sechalf(src0), sechalf(src1)); |
||
298 | brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); |
||
299 | } |
||
300 | } |
||
301 | |||
302 | void |
||
303 | fs_generator::generate_math_gen4(fs_inst *inst, |
||
304 | struct brw_reg dst, |
||
305 | struct brw_reg src) |
||
306 | { |
||
307 | int op = brw_math_function(inst->opcode); |
||
308 | |||
309 | assert(inst->mlen >= 1); |
||
310 | |||
311 | brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
||
312 | brw_math(p, dst, |
||
313 | op, |
||
314 | inst->base_mrf, src, |
||
315 | BRW_MATH_DATA_VECTOR, |
||
316 | BRW_MATH_PRECISION_FULL); |
||
317 | |||
318 | if (dispatch_width == 16) { |
||
319 | brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); |
||
320 | brw_math(p, sechalf(dst), |
||
321 | op, |
||
322 | inst->base_mrf + 1, sechalf(src), |
||
323 | BRW_MATH_DATA_VECTOR, |
||
324 | BRW_MATH_PRECISION_FULL); |
||
325 | |||
326 | brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); |
||
327 | } |
||
328 | } |
||
329 | |||
330 | void |
||
331 | fs_generator::generate_math_g45(fs_inst *inst, |
||
332 | struct brw_reg dst, |
||
333 | struct brw_reg src) |
||
334 | { |
||
335 | if (inst->opcode == SHADER_OPCODE_POW || |
||
336 | inst->opcode == SHADER_OPCODE_INT_QUOTIENT || |
||
337 | inst->opcode == SHADER_OPCODE_INT_REMAINDER) { |
||
338 | generate_math_gen4(inst, dst, src); |
||
339 | return; |
||
340 | } |
||
341 | |||
342 | int op = brw_math_function(inst->opcode); |
||
343 | |||
344 | assert(inst->mlen >= 1); |
||
345 | |||
346 | brw_math(p, dst, |
||
347 | op, |
||
348 | inst->base_mrf, src, |
||
349 | BRW_MATH_DATA_VECTOR, |
||
350 | BRW_MATH_PRECISION_FULL); |
||
351 | } |
||
352 | |||
353 | void |
||
354 | fs_generator::generate_tex(fs_inst *inst, struct brw_reg dst, struct brw_reg src) |
||
355 | { |
||
356 | int msg_type = -1; |
||
357 | int rlen = 4; |
||
358 | uint32_t simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; |
||
359 | uint32_t return_format; |
||
360 | |||
361 | switch (dst.type) { |
||
362 | case BRW_REGISTER_TYPE_D: |
||
363 | return_format = BRW_SAMPLER_RETURN_FORMAT_SINT32; |
||
364 | break; |
||
365 | case BRW_REGISTER_TYPE_UD: |
||
366 | return_format = BRW_SAMPLER_RETURN_FORMAT_UINT32; |
||
367 | break; |
||
368 | default: |
||
369 | return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; |
||
370 | break; |
||
371 | } |
||
372 | |||
373 | if (dispatch_width == 16) |
||
374 | simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; |
||
375 | |||
376 | if (brw->gen >= 5) { |
||
377 | switch (inst->opcode) { |
||
378 | case SHADER_OPCODE_TEX: |
||
379 | if (inst->shadow_compare) { |
||
380 | msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_COMPARE; |
||
381 | } else { |
||
382 | msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE; |
||
383 | } |
||
384 | break; |
||
385 | case FS_OPCODE_TXB: |
||
386 | if (inst->shadow_compare) { |
||
387 | msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS_COMPARE; |
||
388 | } else { |
||
389 | msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_BIAS; |
||
390 | } |
||
391 | break; |
||
392 | case SHADER_OPCODE_TXL: |
||
393 | if (inst->shadow_compare) { |
||
394 | msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD_COMPARE; |
||
395 | } else { |
||
396 | msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LOD; |
||
397 | } |
||
398 | break; |
||
399 | case SHADER_OPCODE_TXS: |
||
400 | msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_RESINFO; |
||
401 | break; |
||
402 | case SHADER_OPCODE_TXD: |
||
403 | if (inst->shadow_compare) { |
||
404 | /* Gen7.5+. Otherwise, lowered by brw_lower_texture_gradients(). */ |
||
405 | assert(brw->is_haswell); |
||
406 | msg_type = HSW_SAMPLER_MESSAGE_SAMPLE_DERIV_COMPARE; |
||
407 | } else { |
||
408 | msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_DERIVS; |
||
409 | } |
||
410 | break; |
||
411 | case SHADER_OPCODE_TXF: |
||
412 | msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; |
||
413 | break; |
||
414 | case SHADER_OPCODE_TXF_MS: |
||
415 | if (brw->gen >= 7) |
||
416 | msg_type = GEN7_SAMPLER_MESSAGE_SAMPLE_LD2DMS; |
||
417 | else |
||
418 | msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; |
||
419 | break; |
||
420 | case SHADER_OPCODE_LOD: |
||
421 | msg_type = GEN5_SAMPLER_MESSAGE_LOD; |
||
422 | break; |
||
423 | default: |
||
424 | assert(!"not reached"); |
||
425 | break; |
||
426 | } |
||
427 | } else { |
||
428 | switch (inst->opcode) { |
||
429 | case SHADER_OPCODE_TEX: |
||
430 | /* Note that G45 and older determines shadow compare and dispatch width |
||
431 | * from message length for most messages. |
||
432 | */ |
||
433 | assert(dispatch_width == 8); |
||
434 | msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE; |
||
435 | if (inst->shadow_compare) { |
||
436 | assert(inst->mlen == 6); |
||
437 | } else { |
||
438 | assert(inst->mlen <= 4); |
||
439 | } |
||
440 | break; |
||
441 | case FS_OPCODE_TXB: |
||
442 | if (inst->shadow_compare) { |
||
443 | assert(inst->mlen == 6); |
||
444 | msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_BIAS_COMPARE; |
||
445 | } else { |
||
446 | assert(inst->mlen == 9); |
||
447 | msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS; |
||
448 | simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; |
||
449 | } |
||
450 | break; |
||
451 | case SHADER_OPCODE_TXL: |
||
452 | if (inst->shadow_compare) { |
||
453 | assert(inst->mlen == 6); |
||
454 | msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_LOD_COMPARE; |
||
455 | } else { |
||
456 | assert(inst->mlen == 9); |
||
457 | msg_type = BRW_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD; |
||
458 | simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; |
||
459 | } |
||
460 | break; |
||
461 | case SHADER_OPCODE_TXD: |
||
462 | /* There is no sample_d_c message; comparisons are done manually */ |
||
463 | assert(inst->mlen == 7 || inst->mlen == 10); |
||
464 | msg_type = BRW_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS; |
||
465 | break; |
||
466 | case SHADER_OPCODE_TXF: |
||
467 | assert(inst->mlen == 9); |
||
468 | msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD; |
||
469 | simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; |
||
470 | break; |
||
471 | case SHADER_OPCODE_TXS: |
||
472 | assert(inst->mlen == 3); |
||
473 | msg_type = BRW_SAMPLER_MESSAGE_SIMD16_RESINFO; |
||
474 | simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; |
||
475 | break; |
||
476 | default: |
||
477 | assert(!"not reached"); |
||
478 | break; |
||
479 | } |
||
480 | } |
||
481 | assert(msg_type != -1); |
||
482 | |||
483 | if (simd_mode == BRW_SAMPLER_SIMD_MODE_SIMD16) { |
||
484 | rlen = 8; |
||
485 | dst = vec16(dst); |
||
486 | } |
||
487 | |||
488 | /* Load the message header if present. If there's a texture offset, |
||
489 | * we need to set it up explicitly and load the offset bitfield. |
||
490 | * Otherwise, we can use an implied move from g0 to the first message reg. |
||
491 | */ |
||
492 | if (inst->texture_offset) { |
||
493 | brw_push_insn_state(p); |
||
494 | brw_set_mask_control(p, BRW_MASK_DISABLE); |
||
495 | brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
||
496 | /* Explicitly set up the message header by copying g0 to the MRF. */ |
||
497 | brw_MOV(p, retype(brw_message_reg(inst->base_mrf), BRW_REGISTER_TYPE_UD), |
||
498 | retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UD)); |
||
499 | |||
500 | /* Then set the offset bits in DWord 2. */ |
||
501 | brw_MOV(p, retype(brw_vec1_reg(BRW_MESSAGE_REGISTER_FILE, |
||
502 | inst->base_mrf, 2), BRW_REGISTER_TYPE_UD), |
||
503 | brw_imm_ud(inst->texture_offset)); |
||
504 | brw_pop_insn_state(p); |
||
505 | } else if (inst->header_present) { |
||
506 | /* Set up an implied move from g0 to the MRF. */ |
||
507 | src = retype(brw_vec8_grf(0, 0), BRW_REGISTER_TYPE_UW); |
||
508 | } |
||
509 | |||
510 | brw_SAMPLE(p, |
||
511 | retype(dst, BRW_REGISTER_TYPE_UW), |
||
512 | inst->base_mrf, |
||
513 | src, |
||
514 | SURF_INDEX_TEXTURE(inst->sampler), |
||
515 | inst->sampler, |
||
516 | msg_type, |
||
517 | rlen, |
||
518 | inst->mlen, |
||
519 | inst->header_present, |
||
520 | simd_mode, |
||
521 | return_format); |
||
522 | } |
||
523 | |||
524 | |||
525 | /* For OPCODE_DDX and OPCODE_DDY, per channel of output we've got input |
||
526 | * looking like: |
||
527 | * |
||
528 | * arg0: ss0.tl ss0.tr ss0.bl ss0.br ss1.tl ss1.tr ss1.bl ss1.br |
||
529 | * |
||
530 | * and we're trying to produce: |
||
531 | * |
||
532 | * DDX DDY |
||
533 | * dst: (ss0.tr - ss0.tl) (ss0.tl - ss0.bl) |
||
534 | * (ss0.tr - ss0.tl) (ss0.tr - ss0.br) |
||
535 | * (ss0.br - ss0.bl) (ss0.tl - ss0.bl) |
||
536 | * (ss0.br - ss0.bl) (ss0.tr - ss0.br) |
||
537 | * (ss1.tr - ss1.tl) (ss1.tl - ss1.bl) |
||
538 | * (ss1.tr - ss1.tl) (ss1.tr - ss1.br) |
||
539 | * (ss1.br - ss1.bl) (ss1.tl - ss1.bl) |
||
540 | * (ss1.br - ss1.bl) (ss1.tr - ss1.br) |
||
541 | * |
||
542 | * and add another set of two more subspans if in 16-pixel dispatch mode. |
||
543 | * |
||
544 | * For DDX, it ends up being easy: width = 2, horiz=0 gets us the same result |
||
545 | * for each pair, and vertstride = 2 jumps us 2 elements after processing a |
||
546 | * pair. But for DDY, it's harder, as we want to produce the pairs swizzled |
||
547 | * between each other. We could probably do it like ddx and swizzle the right |
||
548 | * order later, but bail for now and just produce |
||
549 | * ((ss0.tl - ss0.bl)x4 (ss1.tl - ss1.bl)x4) |
||
550 | */ |
||
551 | void |
||
552 | fs_generator::generate_ddx(fs_inst *inst, struct brw_reg dst, struct brw_reg src) |
||
553 | { |
||
554 | struct brw_reg src0 = brw_reg(src.file, src.nr, 1, |
||
555 | BRW_REGISTER_TYPE_F, |
||
556 | BRW_VERTICAL_STRIDE_2, |
||
557 | BRW_WIDTH_2, |
||
558 | BRW_HORIZONTAL_STRIDE_0, |
||
559 | BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); |
||
560 | struct brw_reg src1 = brw_reg(src.file, src.nr, 0, |
||
561 | BRW_REGISTER_TYPE_F, |
||
562 | BRW_VERTICAL_STRIDE_2, |
||
563 | BRW_WIDTH_2, |
||
564 | BRW_HORIZONTAL_STRIDE_0, |
||
565 | BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); |
||
566 | brw_ADD(p, dst, src0, negate(src1)); |
||
567 | } |
||
568 | |||
569 | /* The negate_value boolean is used to negate the derivative computation for |
||
570 | * FBOs, since they place the origin at the upper left instead of the lower |
||
571 | * left. |
||
572 | */ |
||
573 | void |
||
574 | fs_generator::generate_ddy(fs_inst *inst, struct brw_reg dst, struct brw_reg src, |
||
575 | bool negate_value) |
||
576 | { |
||
577 | struct brw_reg src0 = brw_reg(src.file, src.nr, 0, |
||
578 | BRW_REGISTER_TYPE_F, |
||
579 | BRW_VERTICAL_STRIDE_4, |
||
580 | BRW_WIDTH_4, |
||
581 | BRW_HORIZONTAL_STRIDE_0, |
||
582 | BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); |
||
583 | struct brw_reg src1 = brw_reg(src.file, src.nr, 2, |
||
584 | BRW_REGISTER_TYPE_F, |
||
585 | BRW_VERTICAL_STRIDE_4, |
||
586 | BRW_WIDTH_4, |
||
587 | BRW_HORIZONTAL_STRIDE_0, |
||
588 | BRW_SWIZZLE_XYZW, WRITEMASK_XYZW); |
||
589 | if (negate_value) |
||
590 | brw_ADD(p, dst, src1, negate(src0)); |
||
591 | else |
||
592 | brw_ADD(p, dst, src0, negate(src1)); |
||
593 | } |
||
594 | |||
595 | void |
||
596 | fs_generator::generate_discard_jump(fs_inst *inst) |
||
597 | { |
||
598 | assert(brw->gen >= 6); |
||
599 | |||
600 | /* This HALT will be patched up at FB write time to point UIP at the end of |
||
601 | * the program, and at brw_uip_jip() JIP will be set to the end of the |
||
602 | * current block (or the program). |
||
603 | */ |
||
604 | this->discard_halt_patches.push_tail(new(mem_ctx) ip_record(p->nr_insn)); |
||
605 | |||
606 | brw_push_insn_state(p); |
||
607 | brw_set_mask_control(p, BRW_MASK_DISABLE); |
||
608 | gen6_HALT(p); |
||
609 | brw_pop_insn_state(p); |
||
610 | } |
||
611 | |||
612 | void |
||
613 | fs_generator::generate_spill(fs_inst *inst, struct brw_reg src) |
||
614 | { |
||
615 | assert(inst->mlen != 0); |
||
616 | |||
617 | brw_MOV(p, |
||
618 | retype(brw_message_reg(inst->base_mrf + 1), BRW_REGISTER_TYPE_UD), |
||
619 | retype(src, BRW_REGISTER_TYPE_UD)); |
||
620 | brw_oword_block_write_scratch(p, brw_message_reg(inst->base_mrf), 1, |
||
621 | inst->offset); |
||
622 | } |
||
623 | |||
624 | void |
||
625 | fs_generator::generate_unspill(fs_inst *inst, struct brw_reg dst) |
||
626 | { |
||
627 | assert(inst->mlen != 0); |
||
628 | |||
629 | brw_oword_block_read_scratch(p, dst, brw_message_reg(inst->base_mrf), 1, |
||
630 | inst->offset); |
||
631 | } |
||
632 | |||
633 | void |
||
634 | fs_generator::generate_uniform_pull_constant_load(fs_inst *inst, |
||
635 | struct brw_reg dst, |
||
636 | struct brw_reg index, |
||
637 | struct brw_reg offset) |
||
638 | { |
||
639 | assert(inst->mlen != 0); |
||
640 | |||
641 | assert(index.file == BRW_IMMEDIATE_VALUE && |
||
642 | index.type == BRW_REGISTER_TYPE_UD); |
||
643 | uint32_t surf_index = index.dw1.ud; |
||
644 | |||
645 | assert(offset.file == BRW_IMMEDIATE_VALUE && |
||
646 | offset.type == BRW_REGISTER_TYPE_UD); |
||
647 | uint32_t read_offset = offset.dw1.ud; |
||
648 | |||
649 | brw_oword_block_read(p, dst, brw_message_reg(inst->base_mrf), |
||
650 | read_offset, surf_index); |
||
651 | } |
||
652 | |||
653 | void |
||
654 | fs_generator::generate_uniform_pull_constant_load_gen7(fs_inst *inst, |
||
655 | struct brw_reg dst, |
||
656 | struct brw_reg index, |
||
657 | struct brw_reg offset) |
||
658 | { |
||
659 | assert(inst->mlen == 0); |
||
660 | |||
661 | assert(index.file == BRW_IMMEDIATE_VALUE && |
||
662 | index.type == BRW_REGISTER_TYPE_UD); |
||
663 | uint32_t surf_index = index.dw1.ud; |
||
664 | |||
665 | assert(offset.file == BRW_GENERAL_REGISTER_FILE); |
||
666 | /* Reference just the dword we need, to avoid angering validate_reg(). */ |
||
667 | offset = brw_vec1_grf(offset.nr, 0); |
||
668 | |||
669 | brw_push_insn_state(p); |
||
670 | brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
||
671 | brw_set_mask_control(p, BRW_MASK_DISABLE); |
||
672 | struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND); |
||
673 | brw_pop_insn_state(p); |
||
674 | |||
675 | /* We use the SIMD4x2 mode because we want to end up with 4 components in |
||
676 | * the destination loaded consecutively from the same offset (which appears |
||
677 | * in the first component, and the rest are ignored). |
||
678 | */ |
||
679 | dst.width = BRW_WIDTH_4; |
||
680 | brw_set_dest(p, send, dst); |
||
681 | brw_set_src0(p, send, offset); |
||
682 | brw_set_sampler_message(p, send, |
||
683 | surf_index, |
||
684 | 0, /* LD message ignores sampler unit */ |
||
685 | GEN5_SAMPLER_MESSAGE_SAMPLE_LD, |
||
686 | 1, /* rlen */ |
||
687 | 1, /* mlen */ |
||
688 | false, /* no header */ |
||
689 | BRW_SAMPLER_SIMD_MODE_SIMD4X2, |
||
690 | 0); |
||
691 | } |
||
692 | |||
693 | void |
||
694 | fs_generator::generate_varying_pull_constant_load(fs_inst *inst, |
||
695 | struct brw_reg dst, |
||
696 | struct brw_reg index, |
||
697 | struct brw_reg offset) |
||
698 | { |
||
699 | assert(brw->gen < 7); /* Should use the gen7 variant. */ |
||
700 | assert(inst->header_present); |
||
701 | assert(inst->mlen); |
||
702 | |||
703 | assert(index.file == BRW_IMMEDIATE_VALUE && |
||
704 | index.type == BRW_REGISTER_TYPE_UD); |
||
705 | uint32_t surf_index = index.dw1.ud; |
||
706 | |||
707 | uint32_t simd_mode, rlen, msg_type; |
||
708 | if (dispatch_width == 16) { |
||
709 | simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; |
||
710 | rlen = 8; |
||
711 | } else { |
||
712 | simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; |
||
713 | rlen = 4; |
||
714 | } |
||
715 | |||
716 | if (brw->gen >= 5) |
||
717 | msg_type = GEN5_SAMPLER_MESSAGE_SAMPLE_LD; |
||
718 | else { |
||
719 | /* We always use the SIMD16 message so that we only have to load U, and |
||
720 | * not V or R. |
||
721 | */ |
||
722 | msg_type = BRW_SAMPLER_MESSAGE_SIMD16_LD; |
||
723 | assert(inst->mlen == 3); |
||
724 | assert(inst->regs_written == 8); |
||
725 | rlen = 8; |
||
726 | simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; |
||
727 | } |
||
728 | |||
729 | struct brw_reg offset_mrf = retype(brw_message_reg(inst->base_mrf + 1), |
||
730 | BRW_REGISTER_TYPE_D); |
||
731 | brw_MOV(p, offset_mrf, offset); |
||
732 | |||
733 | struct brw_reg header = brw_vec8_grf(0, 0); |
||
734 | gen6_resolve_implied_move(p, &header, inst->base_mrf); |
||
735 | |||
736 | struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND); |
||
737 | send->header.compression_control = BRW_COMPRESSION_NONE; |
||
738 | brw_set_dest(p, send, dst); |
||
739 | brw_set_src0(p, send, header); |
||
740 | if (brw->gen < 6) |
||
741 | send->header.destreg__conditionalmod = inst->base_mrf; |
||
742 | |||
743 | /* Our surface is set up as floats, regardless of what actual data is |
||
744 | * stored in it. |
||
745 | */ |
||
746 | uint32_t return_format = BRW_SAMPLER_RETURN_FORMAT_FLOAT32; |
||
747 | brw_set_sampler_message(p, send, |
||
748 | surf_index, |
||
749 | 0, /* sampler (unused) */ |
||
750 | msg_type, |
||
751 | rlen, |
||
752 | inst->mlen, |
||
753 | inst->header_present, |
||
754 | simd_mode, |
||
755 | return_format); |
||
756 | } |
||
757 | |||
758 | void |
||
759 | fs_generator::generate_varying_pull_constant_load_gen7(fs_inst *inst, |
||
760 | struct brw_reg dst, |
||
761 | struct brw_reg index, |
||
762 | struct brw_reg offset) |
||
763 | { |
||
764 | assert(brw->gen >= 7); |
||
765 | /* Varying-offset pull constant loads are treated as a normal expression on |
||
766 | * gen7, so the fact that it's a send message is hidden at the IR level. |
||
767 | */ |
||
768 | assert(!inst->header_present); |
||
769 | assert(!inst->mlen); |
||
770 | |||
771 | assert(index.file == BRW_IMMEDIATE_VALUE && |
||
772 | index.type == BRW_REGISTER_TYPE_UD); |
||
773 | uint32_t surf_index = index.dw1.ud; |
||
774 | |||
775 | uint32_t simd_mode, rlen, mlen; |
||
776 | if (dispatch_width == 16) { |
||
777 | mlen = 2; |
||
778 | rlen = 8; |
||
779 | simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD16; |
||
780 | } else { |
||
781 | mlen = 1; |
||
782 | rlen = 4; |
||
783 | simd_mode = BRW_SAMPLER_SIMD_MODE_SIMD8; |
||
784 | } |
||
785 | |||
786 | struct brw_instruction *send = brw_next_insn(p, BRW_OPCODE_SEND); |
||
787 | brw_set_dest(p, send, dst); |
||
788 | brw_set_src0(p, send, offset); |
||
789 | brw_set_sampler_message(p, send, |
||
790 | surf_index, |
||
791 | 0, /* LD message ignores sampler unit */ |
||
792 | GEN5_SAMPLER_MESSAGE_SAMPLE_LD, |
||
793 | rlen, |
||
794 | mlen, |
||
795 | false, /* no header */ |
||
796 | simd_mode, |
||
797 | 0); |
||
798 | } |
||
799 | |||
800 | /** |
||
801 | * Cause the current pixel/sample mask (from R1.7 bits 15:0) to be transferred |
||
802 | * into the flags register (f0.0). |
||
803 | * |
||
804 | * Used only on Gen6 and above. |
||
805 | */ |
||
806 | void |
||
807 | fs_generator::generate_mov_dispatch_to_flags(fs_inst *inst) |
||
808 | { |
||
809 | struct brw_reg flags = brw_flag_reg(0, inst->flag_subreg); |
||
810 | struct brw_reg dispatch_mask; |
||
811 | |||
812 | if (brw->gen >= 6) |
||
813 | dispatch_mask = retype(brw_vec1_grf(1, 7), BRW_REGISTER_TYPE_UW); |
||
814 | else |
||
815 | dispatch_mask = retype(brw_vec1_grf(0, 0), BRW_REGISTER_TYPE_UW); |
||
816 | |||
817 | brw_push_insn_state(p); |
||
818 | brw_set_mask_control(p, BRW_MASK_DISABLE); |
||
819 | brw_MOV(p, flags, dispatch_mask); |
||
820 | brw_pop_insn_state(p); |
||
821 | } |
||
822 | |||
823 | |||
824 | static uint32_t brw_file_from_reg(fs_reg *reg) |
||
825 | { |
||
826 | switch (reg->file) { |
||
827 | case ARF: |
||
828 | return BRW_ARCHITECTURE_REGISTER_FILE; |
||
829 | case GRF: |
||
830 | return BRW_GENERAL_REGISTER_FILE; |
||
831 | case MRF: |
||
832 | return BRW_MESSAGE_REGISTER_FILE; |
||
833 | case IMM: |
||
834 | return BRW_IMMEDIATE_VALUE; |
||
835 | default: |
||
836 | assert(!"not reached"); |
||
837 | return BRW_GENERAL_REGISTER_FILE; |
||
838 | } |
||
839 | } |
||
840 | |||
841 | static struct brw_reg |
||
842 | brw_reg_from_fs_reg(fs_reg *reg) |
||
843 | { |
||
844 | struct brw_reg brw_reg; |
||
845 | |||
846 | switch (reg->file) { |
||
847 | case GRF: |
||
848 | case ARF: |
||
849 | case MRF: |
||
850 | if (reg->smear == -1) { |
||
851 | brw_reg = brw_vec8_reg(brw_file_from_reg(reg), reg->reg, 0); |
||
852 | } else { |
||
853 | brw_reg = brw_vec1_reg(brw_file_from_reg(reg), reg->reg, reg->smear); |
||
854 | } |
||
855 | brw_reg = retype(brw_reg, reg->type); |
||
856 | if (reg->sechalf) |
||
857 | brw_reg = sechalf(brw_reg); |
||
858 | break; |
||
859 | case IMM: |
||
860 | switch (reg->type) { |
||
861 | case BRW_REGISTER_TYPE_F: |
||
862 | brw_reg = brw_imm_f(reg->imm.f); |
||
863 | break; |
||
864 | case BRW_REGISTER_TYPE_D: |
||
865 | brw_reg = brw_imm_d(reg->imm.i); |
||
866 | break; |
||
867 | case BRW_REGISTER_TYPE_UD: |
||
868 | brw_reg = brw_imm_ud(reg->imm.u); |
||
869 | break; |
||
870 | default: |
||
871 | assert(!"not reached"); |
||
872 | brw_reg = brw_null_reg(); |
||
873 | break; |
||
874 | } |
||
875 | break; |
||
876 | case HW_REG: |
||
877 | brw_reg = reg->fixed_hw_reg; |
||
878 | break; |
||
879 | case BAD_FILE: |
||
880 | /* Probably unused. */ |
||
881 | brw_reg = brw_null_reg(); |
||
882 | break; |
||
883 | case UNIFORM: |
||
884 | assert(!"not reached"); |
||
885 | brw_reg = brw_null_reg(); |
||
886 | break; |
||
887 | default: |
||
888 | assert(!"not reached"); |
||
889 | brw_reg = brw_null_reg(); |
||
890 | break; |
||
891 | } |
||
892 | if (reg->abs) |
||
893 | brw_reg = brw_abs(brw_reg); |
||
894 | if (reg->negate) |
||
895 | brw_reg = negate(brw_reg); |
||
896 | |||
897 | return brw_reg; |
||
898 | } |
||
899 | |||
900 | /** |
||
901 | * Sets the first word of a vgrf for gen7+ simd4x2 uniform pull constant |
||
902 | * sampler LD messages. |
||
903 | * |
||
904 | * We don't want to bake it into the send message's code generation because |
||
905 | * that means we don't get a chance to schedule the instructions. |
||
906 | */ |
||
907 | void |
||
908 | fs_generator::generate_set_simd4x2_offset(fs_inst *inst, |
||
909 | struct brw_reg dst, |
||
910 | struct brw_reg value) |
||
911 | { |
||
912 | assert(value.file == BRW_IMMEDIATE_VALUE); |
||
913 | |||
914 | brw_push_insn_state(p); |
||
915 | brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
||
916 | brw_set_mask_control(p, BRW_MASK_DISABLE); |
||
917 | brw_MOV(p, retype(brw_vec1_reg(dst.file, dst.nr, 0), value.type), value); |
||
918 | brw_pop_insn_state(p); |
||
919 | } |
||
920 | |||
921 | /** |
||
922 | * Change the register's data type from UD to W, doubling the strides in order |
||
923 | * to compensate for halving the data type width. |
||
924 | */ |
||
925 | static struct brw_reg |
||
926 | ud_reg_to_w(struct brw_reg r) |
||
927 | { |
||
928 | assert(r.type == BRW_REGISTER_TYPE_UD); |
||
929 | r.type = BRW_REGISTER_TYPE_W; |
||
930 | |||
931 | /* The BRW_*_STRIDE enums are defined so that incrementing the field |
||
932 | * doubles the real stride. |
||
933 | */ |
||
934 | if (r.hstride != 0) |
||
935 | ++r.hstride; |
||
936 | if (r.vstride != 0) |
||
937 | ++r.vstride; |
||
938 | |||
939 | return r; |
||
940 | } |
||
941 | |||
942 | void |
||
943 | fs_generator::generate_pack_half_2x16_split(fs_inst *inst, |
||
944 | struct brw_reg dst, |
||
945 | struct brw_reg x, |
||
946 | struct brw_reg y) |
||
947 | { |
||
948 | assert(brw->gen >= 7); |
||
949 | assert(dst.type == BRW_REGISTER_TYPE_UD); |
||
950 | assert(x.type == BRW_REGISTER_TYPE_F); |
||
951 | assert(y.type == BRW_REGISTER_TYPE_F); |
||
952 | |||
953 | /* From the Ivybridge PRM, Vol4, Part3, Section 6.27 f32to16: |
||
954 | * |
||
955 | * Because this instruction does not have a 16-bit floating-point type, |
||
956 | * the destination data type must be Word (W). |
||
957 | * |
||
958 | * The destination must be DWord-aligned and specify a horizontal stride |
||
959 | * (HorzStride) of 2. The 16-bit result is stored in the lower word of |
||
960 | * each destination channel and the upper word is not modified. |
||
961 | */ |
||
962 | struct brw_reg dst_w = ud_reg_to_w(dst); |
||
963 | |||
964 | /* Give each 32-bit channel of dst the form below , where "." means |
||
965 | * unchanged. |
||
966 | * 0x....hhhh |
||
967 | */ |
||
968 | brw_F32TO16(p, dst_w, y); |
||
969 | |||
970 | /* Now the form: |
||
971 | * 0xhhhh0000 |
||
972 | */ |
||
973 | brw_SHL(p, dst, dst, brw_imm_ud(16u)); |
||
974 | |||
975 | /* And, finally the form of packHalf2x16's output: |
||
976 | * 0xhhhhllll |
||
977 | */ |
||
978 | brw_F32TO16(p, dst_w, x); |
||
979 | } |
||
980 | |||
981 | void |
||
982 | fs_generator::generate_unpack_half_2x16_split(fs_inst *inst, |
||
983 | struct brw_reg dst, |
||
984 | struct brw_reg src) |
||
985 | { |
||
986 | assert(brw->gen >= 7); |
||
987 | assert(dst.type == BRW_REGISTER_TYPE_F); |
||
988 | assert(src.type == BRW_REGISTER_TYPE_UD); |
||
989 | |||
990 | /* From the Ivybridge PRM, Vol4, Part3, Section 6.26 f16to32: |
||
991 | * |
||
992 | * Because this instruction does not have a 16-bit floating-point type, |
||
993 | * the source data type must be Word (W). The destination type must be |
||
994 | * F (Float). |
||
995 | */ |
||
996 | struct brw_reg src_w = ud_reg_to_w(src); |
||
997 | |||
998 | /* Each channel of src has the form of unpackHalf2x16's input: 0xhhhhllll. |
||
999 | * For the Y case, we wish to access only the upper word; therefore |
||
1000 | * a 16-bit subregister offset is needed. |
||
1001 | */ |
||
1002 | assert(inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X || |
||
1003 | inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y); |
||
1004 | if (inst->opcode == FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y) |
||
1005 | src_w.subnr += 2; |
||
1006 | |||
1007 | brw_F16TO32(p, dst, src_w); |
||
1008 | } |
||
1009 | |||
1010 | void |
||
1011 | fs_generator::generate_shader_time_add(fs_inst *inst, |
||
1012 | struct brw_reg payload, |
||
1013 | struct brw_reg offset, |
||
1014 | struct brw_reg value) |
||
1015 | { |
||
1016 | assert(brw->gen >= 7); |
||
1017 | brw_push_insn_state(p); |
||
1018 | brw_set_mask_control(p, true); |
||
1019 | |||
1020 | assert(payload.file == BRW_GENERAL_REGISTER_FILE); |
||
1021 | struct brw_reg payload_offset = retype(brw_vec1_grf(payload.nr, 0), |
||
1022 | offset.type); |
||
1023 | struct brw_reg payload_value = retype(brw_vec1_grf(payload.nr + 1, 0), |
||
1024 | value.type); |
||
1025 | |||
1026 | assert(offset.file == BRW_IMMEDIATE_VALUE); |
||
1027 | if (value.file == BRW_GENERAL_REGISTER_FILE) { |
||
1028 | value.width = BRW_WIDTH_1; |
||
1029 | value.hstride = BRW_HORIZONTAL_STRIDE_0; |
||
1030 | value.vstride = BRW_VERTICAL_STRIDE_0; |
||
1031 | } else { |
||
1032 | assert(value.file == BRW_IMMEDIATE_VALUE); |
||
1033 | } |
||
1034 | |||
1035 | /* Trying to deal with setup of the params from the IR is crazy in the FS8 |
||
1036 | * case, and we don't really care about squeezing every bit of performance |
||
1037 | * out of this path, so we just emit the MOVs from here. |
||
1038 | */ |
||
1039 | brw_MOV(p, payload_offset, offset); |
||
1040 | brw_MOV(p, payload_value, value); |
||
1041 | brw_shader_time_add(p, payload, SURF_INDEX_WM_SHADER_TIME); |
||
1042 | brw_pop_insn_state(p); |
||
1043 | } |
||
1044 | |||
1045 | void |
||
1046 | fs_generator::generate_code(exec_list *instructions) |
||
1047 | { |
||
1048 | int last_native_insn_offset = p->next_insn_offset; |
||
1049 | const char *last_annotation_string = NULL; |
||
1050 | const void *last_annotation_ir = NULL; |
||
1051 | |||
1052 | if (unlikely(INTEL_DEBUG & DEBUG_WM)) { |
||
1053 | if (shader) { |
||
1054 | printf("Native code for fragment shader %d (%d-wide dispatch):\n", |
||
1055 | prog->Name, dispatch_width); |
||
1056 | } else { |
||
1057 | printf("Native code for fragment program %d (%d-wide dispatch):\n", |
||
1058 | fp->Base.Id, dispatch_width); |
||
1059 | } |
||
1060 | } |
||
1061 | |||
1062 | cfg_t *cfg = NULL; |
||
1063 | if (unlikely(INTEL_DEBUG & DEBUG_WM)) |
||
1064 | cfg = new(mem_ctx) cfg_t(mem_ctx, instructions); |
||
1065 | |||
1066 | foreach_list(node, instructions) { |
||
1067 | fs_inst *inst = (fs_inst *)node; |
||
1068 | struct brw_reg src[3], dst; |
||
1069 | |||
1070 | if (unlikely(INTEL_DEBUG & DEBUG_WM)) { |
||
1071 | foreach_list(node, &cfg->block_list) { |
||
1072 | bblock_link *link = (bblock_link *)node; |
||
1073 | bblock_t *block = link->block; |
||
1074 | |||
1075 | if (block->start == inst) { |
||
1076 | printf(" START B%d", block->block_num); |
||
1077 | foreach_list(predecessor_node, &block->parents) { |
||
1078 | bblock_link *predecessor_link = |
||
1079 | (bblock_link *)predecessor_node; |
||
1080 | bblock_t *predecessor_block = predecessor_link->block; |
||
1081 | printf(" <-B%d", predecessor_block->block_num); |
||
1082 | } |
||
1083 | printf("\n"); |
||
1084 | } |
||
1085 | } |
||
1086 | |||
1087 | if (last_annotation_ir != inst->ir) { |
||
1088 | last_annotation_ir = inst->ir; |
||
1089 | if (last_annotation_ir) { |
||
1090 | printf(" "); |
||
1091 | if (shader) |
||
1092 | ((ir_instruction *)inst->ir)->print(); |
||
1093 | else { |
||
1094 | const prog_instruction *fpi; |
||
1095 | fpi = (const prog_instruction *)inst->ir; |
||
1096 | printf("%d: ", (int)(fpi - fp->Base.Instructions)); |
||
1097 | _mesa_fprint_instruction_opt(stdout, |
||
1098 | fpi, |
||
1099 | 0, PROG_PRINT_DEBUG, NULL); |
||
1100 | } |
||
1101 | printf("\n"); |
||
1102 | } |
||
1103 | } |
||
1104 | if (last_annotation_string != inst->annotation) { |
||
1105 | last_annotation_string = inst->annotation; |
||
1106 | if (last_annotation_string) |
||
1107 | printf(" %s\n", last_annotation_string); |
||
1108 | } |
||
1109 | } |
||
1110 | |||
1111 | for (unsigned int i = 0; i < 3; i++) { |
||
1112 | src[i] = brw_reg_from_fs_reg(&inst->src[i]); |
||
1113 | |||
1114 | /* The accumulator result appears to get used for the |
||
1115 | * conditional modifier generation. When negating a UD |
||
1116 | * value, there is a 33rd bit generated for the sign in the |
||
1117 | * accumulator value, so now you can't check, for example, |
||
1118 | * equality with a 32-bit value. See piglit fs-op-neg-uvec4. |
||
1119 | */ |
||
1120 | assert(!inst->conditional_mod || |
||
1121 | inst->src[i].type != BRW_REGISTER_TYPE_UD || |
||
1122 | !inst->src[i].negate); |
||
1123 | } |
||
1124 | dst = brw_reg_from_fs_reg(&inst->dst); |
||
1125 | |||
1126 | brw_set_conditionalmod(p, inst->conditional_mod); |
||
1127 | brw_set_predicate_control(p, inst->predicate); |
||
1128 | brw_set_predicate_inverse(p, inst->predicate_inverse); |
||
1129 | brw_set_flag_reg(p, 0, inst->flag_subreg); |
||
1130 | brw_set_saturate(p, inst->saturate); |
||
1131 | brw_set_mask_control(p, inst->force_writemask_all); |
||
1132 | |||
1133 | if (inst->force_uncompressed || dispatch_width == 8) { |
||
1134 | brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
||
1135 | } else if (inst->force_sechalf) { |
||
1136 | brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); |
||
1137 | } else { |
||
1138 | brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); |
||
1139 | } |
||
1140 | |||
1141 | switch (inst->opcode) { |
||
1142 | case BRW_OPCODE_MOV: |
||
1143 | brw_MOV(p, dst, src[0]); |
||
1144 | break; |
||
1145 | case BRW_OPCODE_ADD: |
||
1146 | brw_ADD(p, dst, src[0], src[1]); |
||
1147 | break; |
||
1148 | case BRW_OPCODE_MUL: |
||
1149 | brw_MUL(p, dst, src[0], src[1]); |
||
1150 | break; |
||
1151 | case BRW_OPCODE_MACH: |
||
1152 | brw_set_acc_write_control(p, 1); |
||
1153 | brw_MACH(p, dst, src[0], src[1]); |
||
1154 | brw_set_acc_write_control(p, 0); |
||
1155 | break; |
||
1156 | |||
1157 | case BRW_OPCODE_MAD: |
||
1158 | brw_set_access_mode(p, BRW_ALIGN_16); |
||
1159 | if (dispatch_width == 16) { |
||
1160 | brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
||
1161 | brw_MAD(p, dst, src[0], src[1], src[2]); |
||
1162 | brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); |
||
1163 | brw_MAD(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2])); |
||
1164 | brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); |
||
1165 | } else { |
||
1166 | brw_MAD(p, dst, src[0], src[1], src[2]); |
||
1167 | } |
||
1168 | brw_set_access_mode(p, BRW_ALIGN_1); |
||
1169 | break; |
||
1170 | |||
1171 | case BRW_OPCODE_LRP: |
||
1172 | brw_set_access_mode(p, BRW_ALIGN_16); |
||
1173 | if (dispatch_width == 16) { |
||
1174 | brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
||
1175 | brw_LRP(p, dst, src[0], src[1], src[2]); |
||
1176 | brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); |
||
1177 | brw_LRP(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2])); |
||
1178 | brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); |
||
1179 | } else { |
||
1180 | brw_LRP(p, dst, src[0], src[1], src[2]); |
||
1181 | } |
||
1182 | brw_set_access_mode(p, BRW_ALIGN_1); |
||
1183 | break; |
||
1184 | |||
1185 | case BRW_OPCODE_FRC: |
||
1186 | brw_FRC(p, dst, src[0]); |
||
1187 | break; |
||
1188 | case BRW_OPCODE_RNDD: |
||
1189 | brw_RNDD(p, dst, src[0]); |
||
1190 | break; |
||
1191 | case BRW_OPCODE_RNDE: |
||
1192 | brw_RNDE(p, dst, src[0]); |
||
1193 | break; |
||
1194 | case BRW_OPCODE_RNDZ: |
||
1195 | brw_RNDZ(p, dst, src[0]); |
||
1196 | break; |
||
1197 | |||
1198 | case BRW_OPCODE_AND: |
||
1199 | brw_AND(p, dst, src[0], src[1]); |
||
1200 | break; |
||
1201 | case BRW_OPCODE_OR: |
||
1202 | brw_OR(p, dst, src[0], src[1]); |
||
1203 | break; |
||
1204 | case BRW_OPCODE_XOR: |
||
1205 | brw_XOR(p, dst, src[0], src[1]); |
||
1206 | break; |
||
1207 | case BRW_OPCODE_NOT: |
||
1208 | brw_NOT(p, dst, src[0]); |
||
1209 | break; |
||
1210 | case BRW_OPCODE_ASR: |
||
1211 | brw_ASR(p, dst, src[0], src[1]); |
||
1212 | break; |
||
1213 | case BRW_OPCODE_SHR: |
||
1214 | brw_SHR(p, dst, src[0], src[1]); |
||
1215 | break; |
||
1216 | case BRW_OPCODE_SHL: |
||
1217 | brw_SHL(p, dst, src[0], src[1]); |
||
1218 | break; |
||
1219 | case BRW_OPCODE_F32TO16: |
||
1220 | brw_F32TO16(p, dst, src[0]); |
||
1221 | break; |
||
1222 | case BRW_OPCODE_F16TO32: |
||
1223 | brw_F16TO32(p, dst, src[0]); |
||
1224 | break; |
||
1225 | case BRW_OPCODE_CMP: |
||
1226 | brw_CMP(p, dst, inst->conditional_mod, src[0], src[1]); |
||
1227 | break; |
||
1228 | case BRW_OPCODE_SEL: |
||
1229 | brw_SEL(p, dst, src[0], src[1]); |
||
1230 | break; |
||
1231 | case BRW_OPCODE_BFREV: |
||
1232 | /* BFREV only supports UD type for src and dst. */ |
||
1233 | brw_BFREV(p, retype(dst, BRW_REGISTER_TYPE_UD), |
||
1234 | retype(src[0], BRW_REGISTER_TYPE_UD)); |
||
1235 | break; |
||
1236 | case BRW_OPCODE_FBH: |
||
1237 | /* FBH only supports UD type for dst. */ |
||
1238 | brw_FBH(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); |
||
1239 | break; |
||
1240 | case BRW_OPCODE_FBL: |
||
1241 | /* FBL only supports UD type for dst. */ |
||
1242 | brw_FBL(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); |
||
1243 | break; |
||
1244 | case BRW_OPCODE_CBIT: |
||
1245 | /* CBIT only supports UD type for dst. */ |
||
1246 | brw_CBIT(p, retype(dst, BRW_REGISTER_TYPE_UD), src[0]); |
||
1247 | break; |
||
1248 | |||
1249 | case BRW_OPCODE_BFE: |
||
1250 | brw_set_access_mode(p, BRW_ALIGN_16); |
||
1251 | if (dispatch_width == 16) { |
||
1252 | brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
||
1253 | brw_BFE(p, dst, src[0], src[1], src[2]); |
||
1254 | brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); |
||
1255 | brw_BFE(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2])); |
||
1256 | brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); |
||
1257 | } else { |
||
1258 | brw_BFE(p, dst, src[0], src[1], src[2]); |
||
1259 | } |
||
1260 | brw_set_access_mode(p, BRW_ALIGN_1); |
||
1261 | break; |
||
1262 | |||
1263 | case BRW_OPCODE_BFI1: |
||
1264 | brw_BFI1(p, dst, src[0], src[1]); |
||
1265 | break; |
||
1266 | case BRW_OPCODE_BFI2: |
||
1267 | brw_set_access_mode(p, BRW_ALIGN_16); |
||
1268 | if (dispatch_width == 16) { |
||
1269 | brw_set_compression_control(p, BRW_COMPRESSION_NONE); |
||
1270 | brw_BFI2(p, dst, src[0], src[1], src[2]); |
||
1271 | brw_set_compression_control(p, BRW_COMPRESSION_2NDHALF); |
||
1272 | brw_BFI2(p, sechalf(dst), sechalf(src[0]), sechalf(src[1]), sechalf(src[2])); |
||
1273 | brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); |
||
1274 | } else { |
||
1275 | brw_BFI2(p, dst, src[0], src[1], src[2]); |
||
1276 | } |
||
1277 | brw_set_access_mode(p, BRW_ALIGN_1); |
||
1278 | break; |
||
1279 | |||
1280 | case BRW_OPCODE_IF: |
||
1281 | if (inst->src[0].file != BAD_FILE) { |
||
1282 | /* The instruction has an embedded compare (only allowed on gen6) */ |
||
1283 | assert(brw->gen == 6); |
||
1284 | gen6_IF(p, inst->conditional_mod, src[0], src[1]); |
||
1285 | } else { |
||
1286 | brw_IF(p, dispatch_width == 16 ? BRW_EXECUTE_16 : BRW_EXECUTE_8); |
||
1287 | } |
||
1288 | break; |
||
1289 | |||
1290 | case BRW_OPCODE_ELSE: |
||
1291 | brw_ELSE(p); |
||
1292 | break; |
||
1293 | case BRW_OPCODE_ENDIF: |
||
1294 | brw_ENDIF(p); |
||
1295 | break; |
||
1296 | |||
1297 | case BRW_OPCODE_DO: |
||
1298 | brw_DO(p, BRW_EXECUTE_8); |
||
1299 | break; |
||
1300 | |||
1301 | case BRW_OPCODE_BREAK: |
||
1302 | brw_BREAK(p); |
||
1303 | brw_set_predicate_control(p, BRW_PREDICATE_NONE); |
||
1304 | break; |
||
1305 | case BRW_OPCODE_CONTINUE: |
||
1306 | /* FINISHME: We need to write the loop instruction support still. */ |
||
1307 | if (brw->gen >= 6) |
||
1308 | gen6_CONT(p); |
||
1309 | else |
||
1310 | brw_CONT(p); |
||
1311 | brw_set_predicate_control(p, BRW_PREDICATE_NONE); |
||
1312 | break; |
||
1313 | |||
1314 | case BRW_OPCODE_WHILE: |
||
1315 | brw_WHILE(p); |
||
1316 | break; |
||
1317 | |||
1318 | case SHADER_OPCODE_RCP: |
||
1319 | case SHADER_OPCODE_RSQ: |
||
1320 | case SHADER_OPCODE_SQRT: |
||
1321 | case SHADER_OPCODE_EXP2: |
||
1322 | case SHADER_OPCODE_LOG2: |
||
1323 | case SHADER_OPCODE_SIN: |
||
1324 | case SHADER_OPCODE_COS: |
||
1325 | if (brw->gen >= 7) { |
||
1326 | generate_math1_gen7(inst, dst, src[0]); |
||
1327 | } else if (brw->gen == 6) { |
||
1328 | generate_math1_gen6(inst, dst, src[0]); |
||
1329 | } else if (brw->gen == 5 || brw->is_g4x) { |
||
1330 | generate_math_g45(inst, dst, src[0]); |
||
1331 | } else { |
||
1332 | generate_math_gen4(inst, dst, src[0]); |
||
1333 | } |
||
1334 | break; |
||
1335 | case SHADER_OPCODE_INT_QUOTIENT: |
||
1336 | case SHADER_OPCODE_INT_REMAINDER: |
||
1337 | case SHADER_OPCODE_POW: |
||
1338 | if (brw->gen >= 7) { |
||
1339 | generate_math2_gen7(inst, dst, src[0], src[1]); |
||
1340 | } else if (brw->gen == 6) { |
||
1341 | generate_math2_gen6(inst, dst, src[0], src[1]); |
||
1342 | } else { |
||
1343 | generate_math_gen4(inst, dst, src[0]); |
||
1344 | } |
||
1345 | break; |
||
1346 | case FS_OPCODE_PIXEL_X: |
||
1347 | generate_pixel_xy(dst, true); |
||
1348 | break; |
||
1349 | case FS_OPCODE_PIXEL_Y: |
||
1350 | generate_pixel_xy(dst, false); |
||
1351 | break; |
||
1352 | case FS_OPCODE_CINTERP: |
||
1353 | brw_MOV(p, dst, src[0]); |
||
1354 | break; |
||
1355 | case FS_OPCODE_LINTERP: |
||
1356 | generate_linterp(inst, dst, src); |
||
1357 | break; |
||
1358 | case SHADER_OPCODE_TEX: |
||
1359 | case FS_OPCODE_TXB: |
||
1360 | case SHADER_OPCODE_TXD: |
||
1361 | case SHADER_OPCODE_TXF: |
||
1362 | case SHADER_OPCODE_TXF_MS: |
||
1363 | case SHADER_OPCODE_TXL: |
||
1364 | case SHADER_OPCODE_TXS: |
||
1365 | case SHADER_OPCODE_LOD: |
||
1366 | generate_tex(inst, dst, src[0]); |
||
1367 | break; |
||
1368 | case FS_OPCODE_DDX: |
||
1369 | generate_ddx(inst, dst, src[0]); |
||
1370 | break; |
||
1371 | case FS_OPCODE_DDY: |
||
1372 | /* Make sure fp->UsesDFdy flag got set (otherwise there's no |
||
1373 | * guarantee that c->key.render_to_fbo is set). |
||
1374 | */ |
||
1375 | assert(fp->UsesDFdy); |
||
1376 | generate_ddy(inst, dst, src[0], c->key.render_to_fbo); |
||
1377 | break; |
||
1378 | |||
1379 | case FS_OPCODE_SPILL: |
||
1380 | generate_spill(inst, src[0]); |
||
1381 | break; |
||
1382 | |||
1383 | case FS_OPCODE_UNSPILL: |
||
1384 | generate_unspill(inst, dst); |
||
1385 | break; |
||
1386 | |||
1387 | case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD: |
||
1388 | generate_uniform_pull_constant_load(inst, dst, src[0], src[1]); |
||
1389 | break; |
||
1390 | |||
1391 | case FS_OPCODE_UNIFORM_PULL_CONSTANT_LOAD_GEN7: |
||
1392 | generate_uniform_pull_constant_load_gen7(inst, dst, src[0], src[1]); |
||
1393 | break; |
||
1394 | |||
1395 | case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD: |
||
1396 | generate_varying_pull_constant_load(inst, dst, src[0], src[1]); |
||
1397 | break; |
||
1398 | |||
1399 | case FS_OPCODE_VARYING_PULL_CONSTANT_LOAD_GEN7: |
||
1400 | generate_varying_pull_constant_load_gen7(inst, dst, src[0], src[1]); |
||
1401 | break; |
||
1402 | |||
1403 | case FS_OPCODE_FB_WRITE: |
||
1404 | generate_fb_write(inst); |
||
1405 | break; |
||
1406 | |||
1407 | case FS_OPCODE_MOV_DISPATCH_TO_FLAGS: |
||
1408 | generate_mov_dispatch_to_flags(inst); |
||
1409 | break; |
||
1410 | |||
1411 | case FS_OPCODE_DISCARD_JUMP: |
||
1412 | generate_discard_jump(inst); |
||
1413 | break; |
||
1414 | |||
1415 | case SHADER_OPCODE_SHADER_TIME_ADD: |
||
1416 | generate_shader_time_add(inst, src[0], src[1], src[2]); |
||
1417 | break; |
||
1418 | |||
1419 | case FS_OPCODE_SET_SIMD4X2_OFFSET: |
||
1420 | generate_set_simd4x2_offset(inst, dst, src[0]); |
||
1421 | break; |
||
1422 | |||
1423 | case FS_OPCODE_PACK_HALF_2x16_SPLIT: |
||
1424 | generate_pack_half_2x16_split(inst, dst, src[0], src[1]); |
||
1425 | break; |
||
1426 | |||
1427 | case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_X: |
||
1428 | case FS_OPCODE_UNPACK_HALF_2x16_SPLIT_Y: |
||
1429 | generate_unpack_half_2x16_split(inst, dst, src[0]); |
||
1430 | break; |
||
1431 | |||
1432 | case FS_OPCODE_PLACEHOLDER_HALT: |
||
1433 | /* This is the place where the final HALT needs to be inserted if |
||
1434 | * we've emitted any discards. If not, this will emit no code. |
||
1435 | */ |
||
1436 | patch_discard_jumps_to_fb_writes(); |
||
1437 | break; |
||
1438 | |||
1439 | default: |
||
1440 | if (inst->opcode < (int) ARRAY_SIZE(opcode_descs)) { |
||
1441 | _mesa_problem(ctx, "Unsupported opcode `%s' in FS", |
||
1442 | opcode_descs[inst->opcode].name); |
||
1443 | } else { |
||
1444 | _mesa_problem(ctx, "Unsupported opcode %d in FS", inst->opcode); |
||
1445 | } |
||
1446 | abort(); |
||
1447 | } |
||
1448 | |||
1449 | if (unlikely(INTEL_DEBUG & DEBUG_WM)) { |
||
1450 | brw_dump_compile(p, stdout, |
||
1451 | last_native_insn_offset, p->next_insn_offset); |
||
1452 | |||
1453 | foreach_list(node, &cfg->block_list) { |
||
1454 | bblock_link *link = (bblock_link *)node; |
||
1455 | bblock_t *block = link->block; |
||
1456 | |||
1457 | if (block->end == inst) { |
||
1458 | printf(" END B%d", block->block_num); |
||
1459 | foreach_list(successor_node, &block->children) { |
||
1460 | bblock_link *successor_link = |
||
1461 | (bblock_link *)successor_node; |
||
1462 | bblock_t *successor_block = successor_link->block; |
||
1463 | printf(" ->B%d", successor_block->block_num); |
||
1464 | } |
||
1465 | printf("\n"); |
||
1466 | } |
||
1467 | } |
||
1468 | } |
||
1469 | |||
1470 | last_native_insn_offset = p->next_insn_offset; |
||
1471 | } |
||
1472 | |||
1473 | if (unlikely(INTEL_DEBUG & DEBUG_WM)) { |
||
1474 | printf("\n"); |
||
1475 | } |
||
1476 | |||
1477 | brw_set_uip_jip(p); |
||
1478 | |||
1479 | /* OK, while the INTEL_DEBUG=wm above is very nice for debugging FS |
||
1480 | * emit issues, it doesn't get the jump distances into the output, |
||
1481 | * which is often something we want to debug. So this is here in |
||
1482 | * case you're doing that. |
||
1483 | */ |
||
1484 | if (0) { |
||
1485 | brw_dump_compile(p, stdout, 0, p->next_insn_offset); |
||
1486 | } |
||
1487 | } |
||
1488 | |||
1489 | const unsigned * |
||
1490 | fs_generator::generate_assembly(exec_list *simd8_instructions, |
||
1491 | exec_list *simd16_instructions, |
||
1492 | unsigned *assembly_size) |
||
1493 | { |
||
1494 | dispatch_width = 8; |
||
1495 | generate_code(simd8_instructions); |
||
1496 | |||
1497 | if (simd16_instructions) { |
||
1498 | /* We have to do a compaction pass now, or the one at the end of |
||
1499 | * execution will squash down where our prog_offset start needs |
||
1500 | * to be. |
||
1501 | */ |
||
1502 | brw_compact_instructions(p); |
||
1503 | |||
1504 | /* align to 64 byte boundary. */ |
||
1505 | while ((p->nr_insn * sizeof(struct brw_instruction)) % 64) { |
||
1506 | brw_NOP(p); |
||
1507 | } |
||
1508 | |||
1509 | /* Save off the start of this 16-wide program */ |
||
1510 | c->prog_data.prog_offset_16 = p->nr_insn * sizeof(struct brw_instruction); |
||
1511 | |||
1512 | brw_set_compression_control(p, BRW_COMPRESSION_COMPRESSED); |
||
1513 | |||
1514 | dispatch_width = 16; |
||
1515 | generate_code(simd16_instructions); |
||
1516 | } |
||
1517 | |||
1518 | return brw_get_program(p, assembly_size); |
||
1519 | }>>-B%d",>>>=>><>> |