Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /************************************************************************** |
2 | * |
||
3 | * Copyright 2009 VMware, Inc. |
||
4 | * Copyright 2007-2008 VMware, Inc. |
||
5 | * All Rights Reserved. |
||
6 | * |
||
7 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
8 | * copy of this software and associated documentation files (the |
||
9 | * "Software"), to deal in the Software without restriction, including |
||
10 | * without limitation the rights to use, copy, modify, merge, publish, |
||
11 | * distribute, sub license, and/or sell copies of the Software, and to |
||
12 | * permit persons to whom the Software is furnished to do so, subject to |
||
13 | * the following conditions: |
||
14 | * |
||
15 | * The above copyright notice and this permission notice (including the |
||
16 | * next paragraph) shall be included in all copies or substantial portions |
||
17 | * of the Software. |
||
18 | * |
||
19 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
||
20 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
21 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
||
22 | * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR |
||
23 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||
24 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||
25 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
26 | * |
||
27 | **************************************************************************/ |
||
28 | |||
29 | /** |
||
30 | * @file |
||
31 | * TGSI to LLVM IR translation -- SoA. |
||
32 | * |
||
33 | * @author Jose Fonseca |
||
34 | * |
||
35 | * Based on tgsi_sse2.c code written by Michal Krol, Keith Whitwell, |
||
36 | * Brian Paul, and others. |
||
37 | */ |
||
38 | |||
39 | #include "pipe/p_config.h" |
||
40 | #include "pipe/p_shader_tokens.h" |
||
41 | #include "util/u_debug.h" |
||
42 | #include "util/u_math.h" |
||
43 | #include "util/u_memory.h" |
||
44 | #include "tgsi/tgsi_dump.h" |
||
45 | #include "tgsi/tgsi_exec.h" |
||
46 | #include "tgsi/tgsi_info.h" |
||
47 | #include "tgsi/tgsi_parse.h" |
||
48 | #include "tgsi/tgsi_util.h" |
||
49 | #include "tgsi/tgsi_scan.h" |
||
50 | #include "tgsi/tgsi_strings.h" |
||
51 | #include "lp_bld_tgsi_action.h" |
||
52 | #include "lp_bld_type.h" |
||
53 | #include "lp_bld_const.h" |
||
54 | #include "lp_bld_arit.h" |
||
55 | #include "lp_bld_bitarit.h" |
||
56 | #include "lp_bld_gather.h" |
||
57 | #include "lp_bld_init.h" |
||
58 | #include "lp_bld_logic.h" |
||
59 | #include "lp_bld_swizzle.h" |
||
60 | #include "lp_bld_flow.h" |
||
61 | #include "lp_bld_quad.h" |
||
62 | #include "lp_bld_tgsi.h" |
||
63 | #include "lp_bld_limits.h" |
||
64 | #include "lp_bld_debug.h" |
||
65 | #include "lp_bld_printf.h" |
||
66 | #include "lp_bld_sample.h" |
||
67 | #include "lp_bld_struct.h" |
||
68 | |||
69 | /* SM 4.0 says that subroutines can nest 32 deep and |
||
70 | * we need one more for our main function */ |
||
71 | #define LP_MAX_NUM_FUNCS 33 |
||
72 | |||
73 | #define DUMP_GS_EMITS 0 |
||
74 | |||
75 | /* |
||
76 | * If non-zero, the generated LLVM IR will print intermediate results on every TGSI |
||
77 | * instruction. |
||
78 | * |
||
79 | * TODO: |
||
80 | * - take execution masks in consideration |
||
81 | * - debug control-flow instructions |
||
82 | */ |
||
83 | #define DEBUG_EXECUTION 0 |
||
84 | |||
85 | |||
86 | /* |
||
87 | * Emit code to print a register value. |
||
88 | */ |
||
89 | static void |
||
90 | emit_dump_reg(struct gallivm_state *gallivm, |
||
91 | unsigned file, |
||
92 | unsigned index, |
||
93 | unsigned chan, |
||
94 | LLVMValueRef value) |
||
95 | { |
||
96 | char buf[32]; |
||
97 | |||
98 | util_snprintf(buf, sizeof buf, " %s[%u].%c = ", |
||
99 | tgsi_file_name(file), |
||
100 | index, "xyzw"[chan]); |
||
101 | |||
102 | lp_build_print_value(gallivm, buf, value); |
||
103 | } |
||
104 | |||
105 | /* |
||
106 | * Return the context for the current function. |
||
107 | * (always 'main', if shader doesn't do any function calls) |
||
108 | */ |
||
109 | static INLINE struct function_ctx * |
||
110 | func_ctx(struct lp_exec_mask *mask) |
||
111 | { |
||
112 | assert(mask->function_stack_size > 0); |
||
113 | assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS); |
||
114 | return &mask->function_stack[mask->function_stack_size - 1]; |
||
115 | } |
||
116 | |||
117 | /* |
||
118 | * Returns true if we're in a loop. |
||
119 | * It's global, meaning that it returns true even if there's |
||
120 | * no loop inside the current function, but we were inside |
||
121 | * a loop inside another function, from which this one was called. |
||
122 | */ |
||
123 | static INLINE boolean |
||
124 | mask_has_loop(struct lp_exec_mask *mask) |
||
125 | { |
||
126 | int i; |
||
127 | for (i = mask->function_stack_size - 1; i >= 0; --i) { |
||
128 | const struct function_ctx *ctx = &mask->function_stack[i]; |
||
129 | if (ctx->loop_stack_size > 0) |
||
130 | return TRUE; |
||
131 | } |
||
132 | return FALSE; |
||
133 | } |
||
134 | |||
135 | /* |
||
136 | * Returns true if we're inside a switch statement. |
||
137 | * It's global, meaning that it returns true even if there's |
||
138 | * no switch in the current function, but we were inside |
||
139 | * a switch inside another function, from which this one was called. |
||
140 | */ |
||
141 | static INLINE boolean |
||
142 | mask_has_switch(struct lp_exec_mask *mask) |
||
143 | { |
||
144 | int i; |
||
145 | for (i = mask->function_stack_size - 1; i >= 0; --i) { |
||
146 | const struct function_ctx *ctx = &mask->function_stack[i]; |
||
147 | if (ctx->switch_stack_size > 0) |
||
148 | return TRUE; |
||
149 | } |
||
150 | return FALSE; |
||
151 | } |
||
152 | |||
153 | /* |
||
154 | * Returns true if we're inside a conditional. |
||
155 | * It's global, meaning that it returns true even if there's |
||
156 | * no conditional in the current function, but we were inside |
||
157 | * a conditional inside another function, from which this one was called. |
||
158 | */ |
||
159 | static INLINE boolean |
||
160 | mask_has_cond(struct lp_exec_mask *mask) |
||
161 | { |
||
162 | int i; |
||
163 | for (i = mask->function_stack_size - 1; i >= 0; --i) { |
||
164 | const struct function_ctx *ctx = &mask->function_stack[i]; |
||
165 | if (ctx->cond_stack_size > 0) |
||
166 | return TRUE; |
||
167 | } |
||
168 | return FALSE; |
||
169 | } |
||
170 | |||
171 | |||
172 | /* |
||
173 | * Initialize a function context at the specified index. |
||
174 | */ |
||
175 | static void |
||
176 | lp_exec_mask_function_init(struct lp_exec_mask *mask, int function_idx) |
||
177 | { |
||
178 | LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context); |
||
179 | LLVMBuilderRef builder = mask->bld->gallivm->builder; |
||
180 | struct function_ctx *ctx = &mask->function_stack[function_idx]; |
||
181 | |||
182 | ctx->cond_stack_size = 0; |
||
183 | ctx->loop_stack_size = 0; |
||
184 | ctx->switch_stack_size = 0; |
||
185 | |||
186 | if (function_idx == 0) { |
||
187 | ctx->ret_mask = mask->ret_mask; |
||
188 | } |
||
189 | |||
190 | ctx->loop_limiter = lp_build_alloca(mask->bld->gallivm, |
||
191 | int_type, "looplimiter"); |
||
192 | LLVMBuildStore( |
||
193 | builder, |
||
194 | LLVMConstInt(int_type, LP_MAX_TGSI_LOOP_ITERATIONS, false), |
||
195 | ctx->loop_limiter); |
||
196 | } |
||
197 | |||
198 | static void lp_exec_mask_init(struct lp_exec_mask *mask, struct lp_build_context *bld) |
||
199 | { |
||
200 | mask->bld = bld; |
||
201 | mask->has_mask = FALSE; |
||
202 | mask->ret_in_main = FALSE; |
||
203 | /* For the main function */ |
||
204 | mask->function_stack_size = 1; |
||
205 | |||
206 | mask->int_vec_type = lp_build_int_vec_type(bld->gallivm, mask->bld->type); |
||
207 | mask->exec_mask = mask->ret_mask = mask->break_mask = mask->cont_mask = |
||
208 | mask->cond_mask = mask->switch_mask = |
||
209 | LLVMConstAllOnes(mask->int_vec_type); |
||
210 | |||
211 | mask->function_stack = CALLOC(LP_MAX_NUM_FUNCS, |
||
212 | sizeof(mask->function_stack[0])); |
||
213 | lp_exec_mask_function_init(mask, 0); |
||
214 | } |
||
215 | |||
216 | static void |
||
217 | lp_exec_mask_fini(struct lp_exec_mask *mask) |
||
218 | { |
||
219 | FREE(mask->function_stack); |
||
220 | } |
||
221 | |||
222 | static void lp_exec_mask_update(struct lp_exec_mask *mask) |
||
223 | { |
||
224 | LLVMBuilderRef builder = mask->bld->gallivm->builder; |
||
225 | boolean has_loop_mask = mask_has_loop(mask); |
||
226 | boolean has_cond_mask = mask_has_cond(mask); |
||
227 | boolean has_switch_mask = mask_has_switch(mask); |
||
228 | boolean has_ret_mask = mask->function_stack_size > 1 || |
||
229 | mask->ret_in_main; |
||
230 | |||
231 | if (has_loop_mask) { |
||
232 | /*for loops we need to update the entire mask at runtime */ |
||
233 | LLVMValueRef tmp; |
||
234 | assert(mask->break_mask); |
||
235 | tmp = LLVMBuildAnd(builder, |
||
236 | mask->cont_mask, |
||
237 | mask->break_mask, |
||
238 | "maskcb"); |
||
239 | mask->exec_mask = LLVMBuildAnd(builder, |
||
240 | mask->cond_mask, |
||
241 | tmp, |
||
242 | "maskfull"); |
||
243 | } else |
||
244 | mask->exec_mask = mask->cond_mask; |
||
245 | |||
246 | if (has_switch_mask) { |
||
247 | mask->exec_mask = LLVMBuildAnd(builder, |
||
248 | mask->exec_mask, |
||
249 | mask->switch_mask, |
||
250 | "switchmask"); |
||
251 | } |
||
252 | |||
253 | if (has_ret_mask) { |
||
254 | mask->exec_mask = LLVMBuildAnd(builder, |
||
255 | mask->exec_mask, |
||
256 | mask->ret_mask, |
||
257 | "callmask"); |
||
258 | } |
||
259 | |||
260 | mask->has_mask = (has_cond_mask || |
||
261 | has_loop_mask || |
||
262 | has_switch_mask || |
||
263 | has_ret_mask); |
||
264 | } |
||
265 | |||
266 | static void lp_exec_mask_cond_push(struct lp_exec_mask *mask, |
||
267 | LLVMValueRef val) |
||
268 | { |
||
269 | LLVMBuilderRef builder = mask->bld->gallivm->builder; |
||
270 | struct function_ctx *ctx = func_ctx(mask); |
||
271 | |||
272 | if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) { |
||
273 | ctx->cond_stack_size++; |
||
274 | return; |
||
275 | } |
||
276 | if (ctx->cond_stack_size == 0 && mask->function_stack_size == 1) { |
||
277 | assert(mask->cond_mask == LLVMConstAllOnes(mask->int_vec_type)); |
||
278 | } |
||
279 | ctx->cond_stack[ctx->cond_stack_size++] = mask->cond_mask; |
||
280 | assert(LLVMTypeOf(val) == mask->int_vec_type); |
||
281 | mask->cond_mask = LLVMBuildAnd(builder, |
||
282 | mask->cond_mask, |
||
283 | val, |
||
284 | ""); |
||
285 | lp_exec_mask_update(mask); |
||
286 | } |
||
287 | |||
288 | static void lp_exec_mask_cond_invert(struct lp_exec_mask *mask) |
||
289 | { |
||
290 | LLVMBuilderRef builder = mask->bld->gallivm->builder; |
||
291 | struct function_ctx *ctx = func_ctx(mask); |
||
292 | LLVMValueRef prev_mask; |
||
293 | LLVMValueRef inv_mask; |
||
294 | |||
295 | assert(ctx->cond_stack_size); |
||
296 | if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) |
||
297 | return; |
||
298 | prev_mask = ctx->cond_stack[ctx->cond_stack_size - 1]; |
||
299 | if (ctx->cond_stack_size == 1 && mask->function_stack_size == 1) { |
||
300 | assert(prev_mask == LLVMConstAllOnes(mask->int_vec_type)); |
||
301 | } |
||
302 | |||
303 | inv_mask = LLVMBuildNot(builder, mask->cond_mask, ""); |
||
304 | |||
305 | mask->cond_mask = LLVMBuildAnd(builder, |
||
306 | inv_mask, |
||
307 | prev_mask, ""); |
||
308 | lp_exec_mask_update(mask); |
||
309 | } |
||
310 | |||
311 | static void lp_exec_mask_cond_pop(struct lp_exec_mask *mask) |
||
312 | { |
||
313 | struct function_ctx *ctx = func_ctx(mask); |
||
314 | assert(ctx->cond_stack_size); |
||
315 | --ctx->cond_stack_size; |
||
316 | if (ctx->cond_stack_size >= LP_MAX_TGSI_NESTING) |
||
317 | return; |
||
318 | mask->cond_mask = ctx->cond_stack[ctx->cond_stack_size]; |
||
319 | lp_exec_mask_update(mask); |
||
320 | } |
||
321 | |||
322 | static void lp_exec_bgnloop(struct lp_exec_mask *mask) |
||
323 | { |
||
324 | LLVMBuilderRef builder = mask->bld->gallivm->builder; |
||
325 | struct function_ctx *ctx = func_ctx(mask); |
||
326 | |||
327 | if (ctx->loop_stack_size >= LP_MAX_TGSI_NESTING) { |
||
328 | ++ctx->loop_stack_size; |
||
329 | return; |
||
330 | } |
||
331 | |||
332 | ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] = |
||
333 | ctx->break_type; |
||
334 | ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_LOOP; |
||
335 | |||
336 | ctx->loop_stack[ctx->loop_stack_size].loop_block = ctx->loop_block; |
||
337 | ctx->loop_stack[ctx->loop_stack_size].cont_mask = mask->cont_mask; |
||
338 | ctx->loop_stack[ctx->loop_stack_size].break_mask = mask->break_mask; |
||
339 | ctx->loop_stack[ctx->loop_stack_size].break_var = ctx->break_var; |
||
340 | ++ctx->loop_stack_size; |
||
341 | |||
342 | ctx->break_var = lp_build_alloca(mask->bld->gallivm, mask->int_vec_type, ""); |
||
343 | LLVMBuildStore(builder, mask->break_mask, ctx->break_var); |
||
344 | |||
345 | ctx->loop_block = lp_build_insert_new_block(mask->bld->gallivm, "bgnloop"); |
||
346 | |||
347 | LLVMBuildBr(builder, ctx->loop_block); |
||
348 | LLVMPositionBuilderAtEnd(builder, ctx->loop_block); |
||
349 | |||
350 | mask->break_mask = LLVMBuildLoad(builder, ctx->break_var, ""); |
||
351 | |||
352 | lp_exec_mask_update(mask); |
||
353 | } |
||
354 | |||
355 | static void lp_exec_break(struct lp_exec_mask *mask, |
||
356 | struct lp_build_tgsi_context * bld_base) |
||
357 | { |
||
358 | LLVMBuilderRef builder = mask->bld->gallivm->builder; |
||
359 | struct function_ctx *ctx = func_ctx(mask); |
||
360 | |||
361 | if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) { |
||
362 | LLVMValueRef exec_mask = LLVMBuildNot(builder, |
||
363 | mask->exec_mask, |
||
364 | "break"); |
||
365 | |||
366 | mask->break_mask = LLVMBuildAnd(builder, |
||
367 | mask->break_mask, |
||
368 | exec_mask, "break_full"); |
||
369 | } |
||
370 | else { |
||
371 | unsigned opcode = bld_base->instructions[bld_base->pc + 1].Instruction.Opcode; |
||
372 | boolean break_always = (opcode == TGSI_OPCODE_ENDSWITCH || |
||
373 | opcode == TGSI_OPCODE_CASE); |
||
374 | |||
375 | |||
376 | if (ctx->switch_in_default) { |
||
377 | /* |
||
378 | * stop default execution but only if this is an unconditional switch. |
||
379 | * (The condition here is not perfect since dead code after break is |
||
380 | * allowed but should be sufficient since false negatives are just |
||
381 | * unoptimized - so we don't have to pre-evaluate that). |
||
382 | */ |
||
383 | if(break_always && ctx->switch_pc) { |
||
384 | bld_base->pc = ctx->switch_pc; |
||
385 | return; |
||
386 | } |
||
387 | } |
||
388 | |||
389 | if (break_always) { |
||
390 | mask->switch_mask = LLVMConstNull(mask->bld->int_vec_type); |
||
391 | } |
||
392 | else { |
||
393 | LLVMValueRef exec_mask = LLVMBuildNot(builder, |
||
394 | mask->exec_mask, |
||
395 | "break"); |
||
396 | mask->switch_mask = LLVMBuildAnd(builder, |
||
397 | mask->switch_mask, |
||
398 | exec_mask, "break_switch"); |
||
399 | } |
||
400 | } |
||
401 | |||
402 | lp_exec_mask_update(mask); |
||
403 | } |
||
404 | |||
405 | static void lp_exec_break_condition(struct lp_exec_mask *mask, |
||
406 | LLVMValueRef cond) |
||
407 | { |
||
408 | LLVMBuilderRef builder = mask->bld->gallivm->builder; |
||
409 | struct function_ctx *ctx = func_ctx(mask); |
||
410 | LLVMValueRef cond_mask = LLVMBuildAnd(builder, |
||
411 | mask->exec_mask, |
||
412 | cond, "cond_mask"); |
||
413 | cond_mask = LLVMBuildNot(builder, cond_mask, "break_cond"); |
||
414 | |||
415 | if (ctx->break_type == LP_EXEC_MASK_BREAK_TYPE_LOOP) { |
||
416 | mask->break_mask = LLVMBuildAnd(builder, |
||
417 | mask->break_mask, |
||
418 | cond_mask, "breakc_full"); |
||
419 | } |
||
420 | else { |
||
421 | mask->switch_mask = LLVMBuildAnd(builder, |
||
422 | mask->switch_mask, |
||
423 | cond_mask, "breakc_switch"); |
||
424 | } |
||
425 | |||
426 | lp_exec_mask_update(mask); |
||
427 | } |
||
428 | |||
429 | static void lp_exec_continue(struct lp_exec_mask *mask) |
||
430 | { |
||
431 | LLVMBuilderRef builder = mask->bld->gallivm->builder; |
||
432 | LLVMValueRef exec_mask = LLVMBuildNot(builder, |
||
433 | mask->exec_mask, |
||
434 | ""); |
||
435 | |||
436 | mask->cont_mask = LLVMBuildAnd(builder, |
||
437 | mask->cont_mask, |
||
438 | exec_mask, ""); |
||
439 | |||
440 | lp_exec_mask_update(mask); |
||
441 | } |
||
442 | |||
443 | |||
444 | static void lp_exec_endloop(struct gallivm_state *gallivm, |
||
445 | struct lp_exec_mask *mask) |
||
446 | { |
||
447 | LLVMBuilderRef builder = mask->bld->gallivm->builder; |
||
448 | struct function_ctx *ctx = func_ctx(mask); |
||
449 | LLVMBasicBlockRef endloop; |
||
450 | LLVMTypeRef int_type = LLVMInt32TypeInContext(mask->bld->gallivm->context); |
||
451 | LLVMTypeRef reg_type = LLVMIntTypeInContext(gallivm->context, |
||
452 | mask->bld->type.width * |
||
453 | mask->bld->type.length); |
||
454 | LLVMValueRef i1cond, i2cond, icond, limiter; |
||
455 | |||
456 | assert(mask->break_mask); |
||
457 | |||
458 | |||
459 | assert(ctx->loop_stack_size); |
||
460 | if (ctx->loop_stack_size > LP_MAX_TGSI_NESTING) { |
||
461 | --ctx->loop_stack_size; |
||
462 | return; |
||
463 | } |
||
464 | |||
465 | /* |
||
466 | * Restore the cont_mask, but don't pop |
||
467 | */ |
||
468 | mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size - 1].cont_mask; |
||
469 | lp_exec_mask_update(mask); |
||
470 | |||
471 | /* |
||
472 | * Unlike the continue mask, the break_mask must be preserved across loop |
||
473 | * iterations |
||
474 | */ |
||
475 | LLVMBuildStore(builder, mask->break_mask, ctx->break_var); |
||
476 | |||
477 | /* Decrement the loop limiter */ |
||
478 | limiter = LLVMBuildLoad(builder, ctx->loop_limiter, ""); |
||
479 | |||
480 | limiter = LLVMBuildSub( |
||
481 | builder, |
||
482 | limiter, |
||
483 | LLVMConstInt(int_type, 1, false), |
||
484 | ""); |
||
485 | |||
486 | LLVMBuildStore(builder, limiter, ctx->loop_limiter); |
||
487 | |||
488 | /* i1cond = (mask != 0) */ |
||
489 | i1cond = LLVMBuildICmp( |
||
490 | builder, |
||
491 | LLVMIntNE, |
||
492 | LLVMBuildBitCast(builder, mask->exec_mask, reg_type, ""), |
||
493 | LLVMConstNull(reg_type), "i1cond"); |
||
494 | |||
495 | /* i2cond = (looplimiter > 0) */ |
||
496 | i2cond = LLVMBuildICmp( |
||
497 | builder, |
||
498 | LLVMIntSGT, |
||
499 | limiter, |
||
500 | LLVMConstNull(int_type), "i2cond"); |
||
501 | |||
502 | /* if( i1cond && i2cond ) */ |
||
503 | icond = LLVMBuildAnd(builder, i1cond, i2cond, ""); |
||
504 | |||
505 | endloop = lp_build_insert_new_block(mask->bld->gallivm, "endloop"); |
||
506 | |||
507 | LLVMBuildCondBr(builder, |
||
508 | icond, ctx->loop_block, endloop); |
||
509 | |||
510 | LLVMPositionBuilderAtEnd(builder, endloop); |
||
511 | |||
512 | assert(ctx->loop_stack_size); |
||
513 | --ctx->loop_stack_size; |
||
514 | mask->cont_mask = ctx->loop_stack[ctx->loop_stack_size].cont_mask; |
||
515 | mask->break_mask = ctx->loop_stack[ctx->loop_stack_size].break_mask; |
||
516 | ctx->loop_block = ctx->loop_stack[ctx->loop_stack_size].loop_block; |
||
517 | ctx->break_var = ctx->loop_stack[ctx->loop_stack_size].break_var; |
||
518 | ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + |
||
519 | ctx->switch_stack_size]; |
||
520 | |||
521 | lp_exec_mask_update(mask); |
||
522 | } |
||
523 | |||
524 | static void lp_exec_switch(struct lp_exec_mask *mask, |
||
525 | LLVMValueRef switchval) |
||
526 | { |
||
527 | struct function_ctx *ctx = func_ctx(mask); |
||
528 | |||
529 | if (ctx->switch_stack_size >= LP_MAX_TGSI_NESTING || |
||
530 | ctx->loop_stack_size > LP_MAX_TGSI_NESTING) { |
||
531 | ctx->switch_stack_size++; |
||
532 | return; |
||
533 | } |
||
534 | |||
535 | ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size] = |
||
536 | ctx->break_type; |
||
537 | ctx->break_type = LP_EXEC_MASK_BREAK_TYPE_SWITCH; |
||
538 | |||
539 | ctx->switch_stack[ctx->switch_stack_size].switch_mask = mask->switch_mask; |
||
540 | ctx->switch_stack[ctx->switch_stack_size].switch_val = ctx->switch_val; |
||
541 | ctx->switch_stack[ctx->switch_stack_size].switch_mask_default = ctx->switch_mask_default; |
||
542 | ctx->switch_stack[ctx->switch_stack_size].switch_in_default = ctx->switch_in_default; |
||
543 | ctx->switch_stack[ctx->switch_stack_size].switch_pc = ctx->switch_pc; |
||
544 | ctx->switch_stack_size++; |
||
545 | |||
546 | mask->switch_mask = LLVMConstNull(mask->int_vec_type); |
||
547 | ctx->switch_val = switchval; |
||
548 | ctx->switch_mask_default = LLVMConstNull(mask->int_vec_type); |
||
549 | ctx->switch_in_default = false; |
||
550 | ctx->switch_pc = 0; |
||
551 | |||
552 | lp_exec_mask_update(mask); |
||
553 | } |
||
554 | |||
555 | static void lp_exec_endswitch(struct lp_exec_mask *mask, |
||
556 | struct lp_build_tgsi_context * bld_base) |
||
557 | { |
||
558 | LLVMBuilderRef builder = mask->bld->gallivm->builder; |
||
559 | struct function_ctx *ctx = func_ctx(mask); |
||
560 | |||
561 | if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) { |
||
562 | ctx->switch_stack_size--; |
||
563 | return; |
||
564 | } |
||
565 | |||
566 | /* check if there's deferred default if so do it now */ |
||
567 | if (ctx->switch_pc && !ctx->switch_in_default) { |
||
568 | LLVMValueRef prevmask, defaultmask; |
||
569 | unsigned tmp_pc; |
||
570 | prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask; |
||
571 | defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask"); |
||
572 | mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask"); |
||
573 | ctx->switch_in_default = true; |
||
574 | |||
575 | lp_exec_mask_update(mask); |
||
576 | |||
577 | assert(bld_base->instructions[ctx->switch_pc - 1].Instruction.Opcode == |
||
578 | TGSI_OPCODE_DEFAULT); |
||
579 | |||
580 | tmp_pc = bld_base->pc; |
||
581 | bld_base->pc = ctx->switch_pc; |
||
582 | /* |
||
583 | * re-purpose switch_pc to point to here again, since we stop execution of |
||
584 | * the deferred default after next break. |
||
585 | */ |
||
586 | ctx->switch_pc = tmp_pc - 1; |
||
587 | |||
588 | return; |
||
589 | } |
||
590 | |||
591 | else if (ctx->switch_pc && ctx->switch_in_default) { |
||
592 | assert(bld_base->pc == ctx->switch_pc + 1); |
||
593 | } |
||
594 | |||
595 | ctx->switch_stack_size--; |
||
596 | mask->switch_mask = ctx->switch_stack[ctx->switch_stack_size].switch_mask; |
||
597 | ctx->switch_val = ctx->switch_stack[ctx->switch_stack_size].switch_val; |
||
598 | ctx->switch_mask_default = ctx->switch_stack[ctx->switch_stack_size].switch_mask_default; |
||
599 | ctx->switch_in_default = ctx->switch_stack[ctx->switch_stack_size].switch_in_default; |
||
600 | ctx->switch_pc = ctx->switch_stack[ctx->switch_stack_size].switch_pc; |
||
601 | |||
602 | ctx->break_type = ctx->break_type_stack[ctx->loop_stack_size + ctx->switch_stack_size]; |
||
603 | |||
604 | lp_exec_mask_update(mask); |
||
605 | } |
||
606 | |||
607 | static void lp_exec_case(struct lp_exec_mask *mask, |
||
608 | LLVMValueRef caseval) |
||
609 | { |
||
610 | LLVMBuilderRef builder = mask->bld->gallivm->builder; |
||
611 | struct function_ctx *ctx = func_ctx(mask); |
||
612 | |||
613 | LLVMValueRef casemask, prevmask; |
||
614 | |||
615 | if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) { |
||
616 | return; |
||
617 | } |
||
618 | |||
619 | /* skipping case mask evaluation here is NOT optional (not in all cases anyway). */ |
||
620 | if (!ctx->switch_in_default) { |
||
621 | prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask; |
||
622 | casemask = lp_build_cmp(mask->bld, PIPE_FUNC_EQUAL, caseval, ctx->switch_val); |
||
623 | ctx->switch_mask_default = LLVMBuildOr(builder, casemask, |
||
624 | ctx->switch_mask_default, "sw_default_mask"); |
||
625 | casemask = LLVMBuildOr(builder, casemask, mask->switch_mask, ""); |
||
626 | mask->switch_mask = LLVMBuildAnd(builder, casemask, prevmask, "sw_mask"); |
||
627 | |||
628 | lp_exec_mask_update(mask); |
||
629 | } |
||
630 | } |
||
631 | |||
632 | /* |
||
633 | * Analyse default statement in a switch. |
||
634 | * \return true if default is last statement, false otherwise |
||
635 | * \param default_pc_start contains pc of instruction to jump to |
||
636 | * if default wasn't last but there's no |
||
637 | * fallthrough into default. |
||
638 | */ |
||
639 | static boolean default_analyse_is_last(struct lp_exec_mask *mask, |
||
640 | struct lp_build_tgsi_context * bld_base, |
||
641 | int *default_pc_start) |
||
642 | { |
||
643 | unsigned pc = bld_base->pc; |
||
644 | struct function_ctx *ctx = func_ctx(mask); |
||
645 | unsigned curr_switch_stack = ctx->switch_stack_size; |
||
646 | |||
647 | if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) { |
||
648 | return false; |
||
649 | } |
||
650 | |||
651 | /* skip over case statements which are together with default */ |
||
652 | while (bld_base->instructions[pc].Instruction.Opcode == TGSI_OPCODE_CASE) { |
||
653 | pc++; |
||
654 | } |
||
655 | |||
656 | while (pc != -1 && pc < bld_base->num_instructions) { |
||
657 | unsigned opcode = bld_base->instructions[pc].Instruction.Opcode; |
||
658 | switch (opcode) { |
||
659 | case TGSI_OPCODE_CASE: |
||
660 | if (curr_switch_stack == ctx->switch_stack_size) { |
||
661 | *default_pc_start = pc - 1; |
||
662 | return false; |
||
663 | } |
||
664 | break; |
||
665 | case TGSI_OPCODE_SWITCH: |
||
666 | curr_switch_stack++; |
||
667 | break; |
||
668 | case TGSI_OPCODE_ENDSWITCH: |
||
669 | if (curr_switch_stack == ctx->switch_stack_size) { |
||
670 | *default_pc_start = pc - 1; |
||
671 | return true; |
||
672 | } |
||
673 | curr_switch_stack--; |
||
674 | break; |
||
675 | } |
||
676 | pc++; |
||
677 | } |
||
678 | /* should never arrive here */ |
||
679 | assert(0); |
||
680 | return true; |
||
681 | } |
||
682 | |||
683 | static void lp_exec_default(struct lp_exec_mask *mask, |
||
684 | struct lp_build_tgsi_context * bld_base) |
||
685 | { |
||
686 | LLVMBuilderRef builder = mask->bld->gallivm->builder; |
||
687 | struct function_ctx *ctx = func_ctx(mask); |
||
688 | |||
689 | int default_exec_pc; |
||
690 | boolean default_is_last; |
||
691 | |||
692 | if (ctx->switch_stack_size > LP_MAX_TGSI_NESTING) { |
||
693 | return; |
||
694 | } |
||
695 | |||
696 | /* |
||
697 | * This is a messy opcode, because it may not be always at the end and |
||
698 | * there can be fallthrough in and out of it. |
||
699 | */ |
||
700 | |||
701 | default_is_last = default_analyse_is_last(mask, bld_base, &default_exec_pc); |
||
702 | /* |
||
703 | * If it is last statement in switch (note that case statements appearing |
||
704 | * "at the same time" as default don't change that) everything is just fine, |
||
705 | * update switch mask and go on. This means we can handle default with |
||
706 | * fallthrough INTO it without overhead, if it is last. |
||
707 | */ |
||
708 | if (default_is_last) { |
||
709 | LLVMValueRef prevmask, defaultmask; |
||
710 | prevmask = ctx->switch_stack[ctx->switch_stack_size - 1].switch_mask; |
||
711 | defaultmask = LLVMBuildNot(builder, ctx->switch_mask_default, "sw_default_mask"); |
||
712 | defaultmask = LLVMBuildOr(builder, defaultmask, mask->switch_mask, ""); |
||
713 | mask->switch_mask = LLVMBuildAnd(builder, prevmask, defaultmask, "sw_mask"); |
||
714 | ctx->switch_in_default = true; |
||
715 | |||
716 | lp_exec_mask_update(mask); |
||
717 | } |
||
718 | else { |
||
719 | /* |
||
720 | * Technically, "case" immediately before default isn't really a |
||
721 | * fallthrough, however we still have to count them as such as we |
||
722 | * already have updated the masks. |
||
723 | * If that happens in practice could add a switch optimizer pass |
||
724 | * which just gets rid of all case statements appearing together with |
||
725 | * default (or could do switch analysis at switch start time instead). |
||
726 | */ |
||
727 | unsigned opcode = bld_base->instructions[bld_base->pc - 1].Instruction.Opcode; |
||
728 | boolean ft_into = (opcode != TGSI_OPCODE_BRK && |
||
729 | opcode != TGSI_OPCODE_SWITCH); |
||
730 | /* |
||
731 | * If it is not last statement and there was no fallthrough into it, |
||
732 | * we record the PC and continue execution at next case (again, those |
||
733 | * case encountered at the same time don't count). At endswitch |
||
734 | * time, we update switchmask, and go back executing the code we skipped |
||
735 | * until the next break (possibly re-executing some code with changed mask |
||
736 | * if there was a fallthrough out of default). |
||
737 | * Finally, if it is not last statement and there was a fallthrough into it, |
||
738 | * do the same as with the former case, except instead of skipping the code |
||
739 | * just execute it without updating the mask, then go back and re-execute. |
||
740 | */ |
||
741 | ctx->switch_pc = bld_base->pc; |
||
742 | if (!ft_into) { |
||
743 | bld_base->pc = default_exec_pc; |
||
744 | } |
||
745 | } |
||
746 | } |
||
747 | |||
748 | |||
749 | /* stores val into an address pointed to by dst_ptr. |
||
750 | * mask->exec_mask is used to figure out which bits of val |
||
751 | * should be stored into the address |
||
752 | * (0 means don't store this bit, 1 means do store). |
||
753 | */ |
||
754 | static void lp_exec_mask_store(struct lp_exec_mask *mask, |
||
755 | struct lp_build_context *bld_store, |
||
756 | LLVMValueRef pred, |
||
757 | LLVMValueRef val, |
||
758 | LLVMValueRef dst_ptr) |
||
759 | { |
||
760 | LLVMBuilderRef builder = mask->bld->gallivm->builder; |
||
761 | |||
762 | assert(lp_check_value(bld_store->type, val)); |
||
763 | assert(LLVMGetTypeKind(LLVMTypeOf(dst_ptr)) == LLVMPointerTypeKind); |
||
764 | assert(LLVMGetElementType(LLVMTypeOf(dst_ptr)) == LLVMTypeOf(val)); |
||
765 | |||
766 | /* Mix the predicate and execution mask */ |
||
767 | if (mask->has_mask) { |
||
768 | if (pred) { |
||
769 | pred = LLVMBuildAnd(builder, pred, mask->exec_mask, ""); |
||
770 | } else { |
||
771 | pred = mask->exec_mask; |
||
772 | } |
||
773 | } |
||
774 | |||
775 | if (pred) { |
||
776 | LLVMValueRef res, dst; |
||
777 | |||
778 | dst = LLVMBuildLoad(builder, dst_ptr, ""); |
||
779 | res = lp_build_select(bld_store, pred, val, dst); |
||
780 | LLVMBuildStore(builder, res, dst_ptr); |
||
781 | } else |
||
782 | LLVMBuildStore(builder, val, dst_ptr); |
||
783 | } |
||
784 | |||
785 | static void lp_exec_mask_call(struct lp_exec_mask *mask, |
||
786 | int func, |
||
787 | int *pc) |
||
788 | { |
||
789 | if (mask->function_stack_size >= LP_MAX_NUM_FUNCS) { |
||
790 | return; |
||
791 | } |
||
792 | |||
793 | lp_exec_mask_function_init(mask, mask->function_stack_size); |
||
794 | mask->function_stack[mask->function_stack_size].pc = *pc; |
||
795 | mask->function_stack[mask->function_stack_size].ret_mask = mask->ret_mask; |
||
796 | mask->function_stack_size++; |
||
797 | *pc = func; |
||
798 | } |
||
799 | |||
800 | static void lp_exec_mask_ret(struct lp_exec_mask *mask, int *pc) |
||
801 | { |
||
802 | LLVMBuilderRef builder = mask->bld->gallivm->builder; |
||
803 | struct function_ctx *ctx = func_ctx(mask); |
||
804 | LLVMValueRef exec_mask; |
||
805 | |||
806 | if (ctx->cond_stack_size == 0 && |
||
807 | ctx->loop_stack_size == 0 && |
||
808 | ctx->switch_stack_size == 0 && |
||
809 | mask->function_stack_size == 1) { |
||
810 | /* returning from main() */ |
||
811 | *pc = -1; |
||
812 | return; |
||
813 | } |
||
814 | |||
815 | if (mask->function_stack_size == 1) { |
||
816 | /* |
||
817 | * This requires special handling since we need to ensure |
||
818 | * we don't drop the mask even if we have no call stack |
||
819 | * (e.g. after a ret in a if clause after the endif) |
||
820 | */ |
||
821 | mask->ret_in_main = TRUE; |
||
822 | } |
||
823 | |||
824 | exec_mask = LLVMBuildNot(builder, |
||
825 | mask->exec_mask, |
||
826 | "ret"); |
||
827 | |||
828 | mask->ret_mask = LLVMBuildAnd(builder, |
||
829 | mask->ret_mask, |
||
830 | exec_mask, "ret_full"); |
||
831 | |||
832 | lp_exec_mask_update(mask); |
||
833 | } |
||
834 | |||
835 | static void lp_exec_mask_bgnsub(struct lp_exec_mask *mask) |
||
836 | { |
||
837 | } |
||
838 | |||
839 | static void lp_exec_mask_endsub(struct lp_exec_mask *mask, int *pc) |
||
840 | { |
||
841 | struct function_ctx *ctx; |
||
842 | |||
843 | assert(mask->function_stack_size > 1); |
||
844 | assert(mask->function_stack_size <= LP_MAX_NUM_FUNCS); |
||
845 | |||
846 | ctx = func_ctx(mask); |
||
847 | mask->function_stack_size--; |
||
848 | |||
849 | *pc = ctx->pc; |
||
850 | mask->ret_mask = ctx->ret_mask; |
||
851 | |||
852 | lp_exec_mask_update(mask); |
||
853 | } |
||
854 | |||
855 | |||
856 | static LLVMValueRef |
||
857 | get_file_ptr(struct lp_build_tgsi_soa_context *bld, |
||
858 | unsigned file, |
||
859 | unsigned index, |
||
860 | unsigned chan) |
||
861 | { |
||
862 | LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; |
||
863 | LLVMValueRef (*array_of_vars)[TGSI_NUM_CHANNELS]; |
||
864 | LLVMValueRef var_of_array; |
||
865 | |||
866 | switch (file) { |
||
867 | case TGSI_FILE_TEMPORARY: |
||
868 | array_of_vars = bld->temps; |
||
869 | var_of_array = bld->temps_array; |
||
870 | break; |
||
871 | case TGSI_FILE_OUTPUT: |
||
872 | array_of_vars = bld->outputs; |
||
873 | var_of_array = bld->outputs_array; |
||
874 | break; |
||
875 | default: |
||
876 | assert(0); |
||
877 | return NULL; |
||
878 | } |
||
879 | |||
880 | assert(chan < 4); |
||
881 | |||
882 | if (bld->indirect_files & (1 << file)) { |
||
883 | LLVMValueRef lindex = lp_build_const_int32(bld->bld_base.base.gallivm, index * 4 + chan); |
||
884 | return LLVMBuildGEP(builder, var_of_array, &lindex, 1, ""); |
||
885 | } |
||
886 | else { |
||
887 | assert(index <= bld->bld_base.info->file_max[file]); |
||
888 | return array_of_vars[index][chan]; |
||
889 | } |
||
890 | } |
||
891 | |||
892 | |||
893 | /** |
||
894 | * Return pointer to a temporary register channel (src or dest). |
||
895 | * Note that indirect addressing cannot be handled here. |
||
896 | * \param index which temporary register |
||
897 | * \param chan which channel of the temp register. |
||
898 | */ |
||
899 | LLVMValueRef |
||
900 | lp_get_temp_ptr_soa(struct lp_build_tgsi_soa_context *bld, |
||
901 | unsigned index, |
||
902 | unsigned chan) |
||
903 | { |
||
904 | return get_file_ptr(bld, TGSI_FILE_TEMPORARY, index, chan); |
||
905 | } |
||
906 | |||
907 | /** |
||
908 | * Return pointer to a output register channel (src or dest). |
||
909 | * Note that indirect addressing cannot be handled here. |
||
910 | * \param index which output register |
||
911 | * \param chan which channel of the output register. |
||
912 | */ |
||
913 | LLVMValueRef |
||
914 | lp_get_output_ptr(struct lp_build_tgsi_soa_context *bld, |
||
915 | unsigned index, |
||
916 | unsigned chan) |
||
917 | { |
||
918 | return get_file_ptr(bld, TGSI_FILE_OUTPUT, index, chan); |
||
919 | } |
||
920 | |||
921 | /* |
||
922 | * If we have indirect addressing in outputs copy our alloca array |
||
923 | * to the outputs slots specified by the caller to make sure |
||
924 | * our outputs are delivered consistently via the same interface. |
||
925 | */ |
||
926 | static void |
||
927 | gather_outputs(struct lp_build_tgsi_soa_context * bld) |
||
928 | { |
||
929 | if ((bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) { |
||
930 | unsigned index, chan; |
||
931 | assert(bld->bld_base.info->num_outputs <= |
||
932 | bld->bld_base.info->file_max[TGSI_FILE_OUTPUT] + 1); |
||
933 | for (index = 0; index < bld->bld_base.info->num_outputs; ++index) { |
||
934 | for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { |
||
935 | bld->outputs[index][chan] = lp_get_output_ptr(bld, index, chan); |
||
936 | } |
||
937 | } |
||
938 | } |
||
939 | } |
||
940 | |||
941 | /** |
||
942 | * Gather vector. |
||
943 | * XXX the lp_build_gather() function should be capable of doing this |
||
944 | * with a little work. |
||
945 | */ |
||
946 | static LLVMValueRef |
||
947 | build_gather(struct lp_build_tgsi_context *bld_base, |
||
948 | LLVMValueRef base_ptr, |
||
949 | LLVMValueRef indexes, |
||
950 | LLVMValueRef overflow_mask) |
||
951 | { |
||
952 | struct gallivm_state *gallivm = bld_base->base.gallivm; |
||
953 | LLVMBuilderRef builder = gallivm->builder; |
||
954 | struct lp_build_context *uint_bld = &bld_base->uint_bld; |
||
955 | struct lp_build_context *bld = &bld_base->base; |
||
956 | LLVMValueRef res = bld->undef; |
||
957 | unsigned i; |
||
958 | |||
959 | /* |
||
960 | * overflow_mask is a vector telling us which channels |
||
961 | * in the vector overflowed. We use the overflow behavior for |
||
962 | * constant buffers which is defined as: |
||
963 | * Out of bounds access to constant buffer returns 0 in all |
||
964 | * components. Out of bounds behavior is always with respect |
||
965 | * to the size of the buffer bound at that slot. |
||
966 | */ |
||
967 | |||
968 | if (overflow_mask) { |
||
969 | /* |
||
970 | * We avoid per-element control flow here (also due to llvm going crazy, |
||
971 | * though I suspect it's better anyway since overflow is likely rare). |
||
972 | * Note that since we still fetch from buffers even if num_elements was |
||
973 | * zero (in this case we'll fetch from index zero) the jit func callers |
||
974 | * MUST provide valid fake constant buffers of size 4x32 (the values do |
||
975 | * not matter), otherwise we'd still need (not per element though) |
||
976 | * control flow. |
||
977 | */ |
||
978 | indexes = lp_build_select(uint_bld, overflow_mask, uint_bld->zero, indexes); |
||
979 | } |
||
980 | |||
981 | /* |
||
982 | * Loop over elements of index_vec, load scalar value, insert it into 'res'. |
||
983 | */ |
||
984 | for (i = 0; i < bld->type.length; i++) { |
||
985 | LLVMValueRef ii = lp_build_const_int32(bld->gallivm, i); |
||
986 | LLVMValueRef index = LLVMBuildExtractElement(builder, |
||
987 | indexes, ii, ""); |
||
988 | LLVMValueRef scalar_ptr, scalar; |
||
989 | |||
990 | scalar_ptr = LLVMBuildGEP(builder, base_ptr, |
||
991 | &index, 1, "gather_ptr"); |
||
992 | scalar = LLVMBuildLoad(builder, scalar_ptr, ""); |
||
993 | |||
994 | res = LLVMBuildInsertElement(builder, res, scalar, ii, ""); |
||
995 | } |
||
996 | |||
997 | if (overflow_mask) { |
||
998 | res = lp_build_select(bld, overflow_mask, bld->zero, res); |
||
999 | } |
||
1000 | |||
1001 | return res; |
||
1002 | } |
||
1003 | |||
1004 | |||
1005 | /** |
||
1006 | * Scatter/store vector. |
||
1007 | */ |
||
1008 | static void |
||
1009 | emit_mask_scatter(struct lp_build_tgsi_soa_context *bld, |
||
1010 | LLVMValueRef base_ptr, |
||
1011 | LLVMValueRef indexes, |
||
1012 | LLVMValueRef values, |
||
1013 | struct lp_exec_mask *mask, |
||
1014 | LLVMValueRef pred) |
||
1015 | { |
||
1016 | struct gallivm_state *gallivm = bld->bld_base.base.gallivm; |
||
1017 | LLVMBuilderRef builder = gallivm->builder; |
||
1018 | unsigned i; |
||
1019 | |||
1020 | /* Mix the predicate and execution mask */ |
||
1021 | if (mask->has_mask) { |
||
1022 | if (pred) { |
||
1023 | pred = LLVMBuildAnd(builder, pred, mask->exec_mask, ""); |
||
1024 | } |
||
1025 | else { |
||
1026 | pred = mask->exec_mask; |
||
1027 | } |
||
1028 | } |
||
1029 | |||
1030 | /* |
||
1031 | * Loop over elements of index_vec, store scalar value. |
||
1032 | */ |
||
1033 | for (i = 0; i < bld->bld_base.base.type.length; i++) { |
||
1034 | LLVMValueRef ii = lp_build_const_int32(gallivm, i); |
||
1035 | LLVMValueRef index = LLVMBuildExtractElement(builder, indexes, ii, ""); |
||
1036 | LLVMValueRef scalar_ptr = LLVMBuildGEP(builder, base_ptr, &index, 1, "scatter_ptr"); |
||
1037 | LLVMValueRef val = LLVMBuildExtractElement(builder, values, ii, "scatter_val"); |
||
1038 | LLVMValueRef scalar_pred = pred ? |
||
1039 | LLVMBuildExtractElement(builder, pred, ii, "scatter_pred") : NULL; |
||
1040 | |||
1041 | if (0) |
||
1042 | lp_build_printf(gallivm, "scatter %d: val %f at %d %p\n", |
||
1043 | ii, val, index, scalar_ptr); |
||
1044 | |||
1045 | if (scalar_pred) { |
||
1046 | LLVMValueRef real_val, dst_val; |
||
1047 | dst_val = LLVMBuildLoad(builder, scalar_ptr, ""); |
||
1048 | real_val = lp_build_select(&bld->elem_bld, scalar_pred, val, dst_val); |
||
1049 | LLVMBuildStore(builder, real_val, scalar_ptr); |
||
1050 | } |
||
1051 | else { |
||
1052 | LLVMBuildStore(builder, val, scalar_ptr); |
||
1053 | } |
||
1054 | } |
||
1055 | } |
||
1056 | |||
1057 | |||
1058 | /** |
||
1059 | * Read the current value of the ADDR register, convert the floats to |
||
1060 | * ints, add the base index and return the vector of offsets. |
||
1061 | * The offsets will be used to index into the constant buffer or |
||
1062 | * temporary register file. |
||
1063 | */ |
||
1064 | static LLVMValueRef |
||
1065 | get_indirect_index(struct lp_build_tgsi_soa_context *bld, |
||
1066 | unsigned reg_file, unsigned reg_index, |
||
1067 | const struct tgsi_ind_register *indirect_reg) |
||
1068 | { |
||
1069 | LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; |
||
1070 | struct lp_build_context *uint_bld = &bld->bld_base.uint_bld; |
||
1071 | /* always use X component of address register */ |
||
1072 | unsigned swizzle = indirect_reg->Swizzle; |
||
1073 | LLVMValueRef base; |
||
1074 | LLVMValueRef rel; |
||
1075 | LLVMValueRef max_index; |
||
1076 | LLVMValueRef index; |
||
1077 | |||
1078 | assert(bld->indirect_files & (1 << reg_file)); |
||
1079 | |||
1080 | base = lp_build_const_int_vec(bld->bld_base.base.gallivm, uint_bld->type, reg_index); |
||
1081 | |||
1082 | assert(swizzle < 4); |
||
1083 | switch (indirect_reg->File) { |
||
1084 | case TGSI_FILE_ADDRESS: |
||
1085 | rel = LLVMBuildLoad(builder, |
||
1086 | bld->addr[indirect_reg->Index][swizzle], |
||
1087 | "load addr reg"); |
||
1088 | /* ADDR LLVM values already have LLVM integer type. */ |
||
1089 | break; |
||
1090 | case TGSI_FILE_TEMPORARY: |
||
1091 | rel = lp_get_temp_ptr_soa(bld, indirect_reg->Index, swizzle); |
||
1092 | rel = LLVMBuildLoad(builder, rel, "load temp reg"); |
||
1093 | /* TEMP LLVM values always have LLVM float type, but for indirection, the |
||
1094 | * value actually stored is expected to be an integer */ |
||
1095 | rel = LLVMBuildBitCast(builder, rel, uint_bld->vec_type, ""); |
||
1096 | break; |
||
1097 | default: |
||
1098 | assert(0); |
||
1099 | rel = uint_bld->zero; |
||
1100 | } |
||
1101 | |||
1102 | index = lp_build_add(uint_bld, base, rel); |
||
1103 | |||
1104 | /* |
||
1105 | * emit_fetch_constant handles constant buffer overflow so this code |
||
1106 | * is pointless for them. |
||
1107 | * Furthermore the D3D10 spec in section 6.5 says: |
||
1108 | * If the constant buffer bound to a slot is larger than the size |
||
1109 | * declared in the shader for that slot, implementations are allowed |
||
1110 | * to return incorrect data (not necessarily 0) for indices that are |
||
1111 | * larger than the declared size but smaller than the buffer size. |
||
1112 | */ |
||
1113 | if (reg_file != TGSI_FILE_CONSTANT) { |
||
1114 | max_index = lp_build_const_int_vec(bld->bld_base.base.gallivm, |
||
1115 | uint_bld->type, |
||
1116 | bld->bld_base.info->file_max[reg_file]); |
||
1117 | |||
1118 | assert(!uint_bld->type.sign); |
||
1119 | index = lp_build_min(uint_bld, index, max_index); |
||
1120 | } |
||
1121 | |||
1122 | return index; |
||
1123 | } |
||
1124 | |||
1125 | static struct lp_build_context * |
||
1126 | stype_to_fetch(struct lp_build_tgsi_context * bld_base, |
||
1127 | enum tgsi_opcode_type stype) |
||
1128 | { |
||
1129 | struct lp_build_context *bld_fetch; |
||
1130 | |||
1131 | switch (stype) { |
||
1132 | case TGSI_TYPE_FLOAT: |
||
1133 | case TGSI_TYPE_UNTYPED: |
||
1134 | bld_fetch = &bld_base->base; |
||
1135 | break; |
||
1136 | case TGSI_TYPE_UNSIGNED: |
||
1137 | bld_fetch = &bld_base->uint_bld; |
||
1138 | break; |
||
1139 | case TGSI_TYPE_SIGNED: |
||
1140 | bld_fetch = &bld_base->int_bld; |
||
1141 | break; |
||
1142 | case TGSI_TYPE_VOID: |
||
1143 | case TGSI_TYPE_DOUBLE: |
||
1144 | default: |
||
1145 | assert(0); |
||
1146 | bld_fetch = NULL; |
||
1147 | break; |
||
1148 | } |
||
1149 | return bld_fetch; |
||
1150 | } |
||
1151 | |||
1152 | static LLVMValueRef |
||
1153 | get_soa_array_offsets(struct lp_build_context *uint_bld, |
||
1154 | LLVMValueRef indirect_index, |
||
1155 | unsigned chan_index, |
||
1156 | boolean need_perelement_offset) |
||
1157 | { |
||
1158 | struct gallivm_state *gallivm = uint_bld->gallivm; |
||
1159 | LLVMValueRef chan_vec = |
||
1160 | lp_build_const_int_vec(uint_bld->gallivm, uint_bld->type, chan_index); |
||
1161 | LLVMValueRef length_vec = |
||
1162 | lp_build_const_int_vec(gallivm, uint_bld->type, uint_bld->type.length); |
||
1163 | LLVMValueRef index_vec; |
||
1164 | |||
1165 | /* index_vec = (indirect_index * 4 + chan_index) * length + offsets */ |
||
1166 | index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); |
||
1167 | index_vec = lp_build_add(uint_bld, index_vec, chan_vec); |
||
1168 | index_vec = lp_build_mul(uint_bld, index_vec, length_vec); |
||
1169 | |||
1170 | if (need_perelement_offset) { |
||
1171 | LLVMValueRef pixel_offsets; |
||
1172 | int i; |
||
1173 | /* build pixel offset vector: {0, 1, 2, 3, ...} */ |
||
1174 | pixel_offsets = uint_bld->undef; |
||
1175 | for (i = 0; i < uint_bld->type.length; i++) { |
||
1176 | LLVMValueRef ii = lp_build_const_int32(gallivm, i); |
||
1177 | pixel_offsets = LLVMBuildInsertElement(gallivm->builder, pixel_offsets, |
||
1178 | ii, ii, ""); |
||
1179 | } |
||
1180 | index_vec = lp_build_add(uint_bld, index_vec, pixel_offsets); |
||
1181 | } |
||
1182 | return index_vec; |
||
1183 | } |
||
1184 | |||
1185 | static LLVMValueRef |
||
1186 | emit_fetch_constant( |
||
1187 | struct lp_build_tgsi_context * bld_base, |
||
1188 | const struct tgsi_full_src_register * reg, |
||
1189 | enum tgsi_opcode_type stype, |
||
1190 | unsigned swizzle) |
||
1191 | { |
||
1192 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
1193 | struct gallivm_state *gallivm = bld_base->base.gallivm; |
||
1194 | LLVMBuilderRef builder = gallivm->builder; |
||
1195 | struct lp_build_context *uint_bld = &bld_base->uint_bld; |
||
1196 | unsigned dimension = 0; |
||
1197 | LLVMValueRef consts_ptr; |
||
1198 | LLVMValueRef num_consts; |
||
1199 | LLVMValueRef res; |
||
1200 | |||
1201 | /* XXX: Handle fetching xyzw components as a vector */ |
||
1202 | assert(swizzle != ~0); |
||
1203 | |||
1204 | if (reg->Register.Dimension) { |
||
1205 | assert(!reg->Dimension.Indirect); |
||
1206 | dimension = reg->Dimension.Index; |
||
1207 | assert(dimension < LP_MAX_TGSI_CONST_BUFFERS); |
||
1208 | } |
||
1209 | |||
1210 | consts_ptr = bld->consts[dimension]; |
||
1211 | num_consts = bld->consts_sizes[dimension]; |
||
1212 | |||
1213 | if (reg->Register.Indirect) { |
||
1214 | LLVMValueRef indirect_index; |
||
1215 | LLVMValueRef swizzle_vec = |
||
1216 | lp_build_const_int_vec(gallivm, uint_bld->type, swizzle); |
||
1217 | LLVMValueRef index_vec; /* index into the const buffer */ |
||
1218 | LLVMValueRef overflow_mask; |
||
1219 | |||
1220 | indirect_index = get_indirect_index(bld, |
||
1221 | reg->Register.File, |
||
1222 | reg->Register.Index, |
||
1223 | ®->Indirect); |
||
1224 | |||
1225 | /* All fetches are from the same constant buffer, so |
||
1226 | * we need to propagate the size to a vector to do a |
||
1227 | * vector comparison */ |
||
1228 | num_consts = lp_build_broadcast_scalar(uint_bld, num_consts); |
||
1229 | /* Construct a boolean vector telling us which channels |
||
1230 | * overflow the bound constant buffer */ |
||
1231 | overflow_mask = lp_build_compare(gallivm, uint_bld->type, PIPE_FUNC_GEQUAL, |
||
1232 | indirect_index, num_consts); |
||
1233 | |||
1234 | /* index_vec = indirect_index * 4 + swizzle */ |
||
1235 | index_vec = lp_build_shl_imm(uint_bld, indirect_index, 2); |
||
1236 | index_vec = lp_build_add(uint_bld, index_vec, swizzle_vec); |
||
1237 | |||
1238 | /* Gather values from the constant buffer */ |
||
1239 | res = build_gather(bld_base, consts_ptr, index_vec, overflow_mask); |
||
1240 | } |
||
1241 | else { |
||
1242 | LLVMValueRef index; /* index into the const buffer */ |
||
1243 | LLVMValueRef scalar, scalar_ptr; |
||
1244 | |||
1245 | index = lp_build_const_int32(gallivm, reg->Register.Index * 4 + swizzle); |
||
1246 | |||
1247 | scalar_ptr = LLVMBuildGEP(builder, consts_ptr, |
||
1248 | &index, 1, ""); |
||
1249 | scalar = LLVMBuildLoad(builder, scalar_ptr, ""); |
||
1250 | res = lp_build_broadcast_scalar(&bld_base->base, scalar); |
||
1251 | } |
||
1252 | |||
1253 | if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) { |
||
1254 | struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype); |
||
1255 | res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, ""); |
||
1256 | } |
||
1257 | |||
1258 | return res; |
||
1259 | } |
||
1260 | |||
1261 | static LLVMValueRef |
||
1262 | emit_fetch_immediate( |
||
1263 | struct lp_build_tgsi_context * bld_base, |
||
1264 | const struct tgsi_full_src_register * reg, |
||
1265 | enum tgsi_opcode_type stype, |
||
1266 | unsigned swizzle) |
||
1267 | { |
||
1268 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
1269 | struct gallivm_state *gallivm = bld->bld_base.base.gallivm; |
||
1270 | LLVMBuilderRef builder = gallivm->builder; |
||
1271 | LLVMValueRef res = NULL; |
||
1272 | |||
1273 | if (bld->use_immediates_array || reg->Register.Indirect) { |
||
1274 | LLVMValueRef imms_array; |
||
1275 | LLVMTypeRef fptr_type; |
||
1276 | |||
1277 | /* cast imms_array pointer to float* */ |
||
1278 | fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); |
||
1279 | imms_array = LLVMBuildBitCast(builder, bld->imms_array, fptr_type, ""); |
||
1280 | |||
1281 | if (reg->Register.Indirect) { |
||
1282 | LLVMValueRef indirect_index; |
||
1283 | LLVMValueRef index_vec; /* index into the immediate register array */ |
||
1284 | |||
1285 | indirect_index = get_indirect_index(bld, |
||
1286 | reg->Register.File, |
||
1287 | reg->Register.Index, |
||
1288 | ®->Indirect); |
||
1289 | /* |
||
1290 | * Unlike for other reg classes, adding pixel offsets is unnecessary - |
||
1291 | * immediates are stored as full vectors (FIXME??? - might be better |
||
1292 | * to store them the same as constants) but all elements are the same |
||
1293 | * in any case. |
||
1294 | */ |
||
1295 | index_vec = get_soa_array_offsets(&bld_base->uint_bld, |
||
1296 | indirect_index, |
||
1297 | swizzle, |
||
1298 | FALSE); |
||
1299 | |||
1300 | /* Gather values from the immediate register array */ |
||
1301 | res = build_gather(bld_base, imms_array, index_vec, NULL); |
||
1302 | } else { |
||
1303 | LLVMValueRef lindex = lp_build_const_int32(gallivm, |
||
1304 | reg->Register.Index * 4 + swizzle); |
||
1305 | LLVMValueRef imms_ptr = LLVMBuildGEP(builder, |
||
1306 | bld->imms_array, &lindex, 1, ""); |
||
1307 | res = LLVMBuildLoad(builder, imms_ptr, ""); |
||
1308 | } |
||
1309 | } |
||
1310 | else { |
||
1311 | res = bld->immediates[reg->Register.Index][swizzle]; |
||
1312 | } |
||
1313 | |||
1314 | if (stype == TGSI_TYPE_UNSIGNED) { |
||
1315 | res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, ""); |
||
1316 | } else if (stype == TGSI_TYPE_SIGNED) { |
||
1317 | res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, ""); |
||
1318 | } |
||
1319 | return res; |
||
1320 | } |
||
1321 | |||
1322 | static LLVMValueRef |
||
1323 | emit_fetch_input( |
||
1324 | struct lp_build_tgsi_context * bld_base, |
||
1325 | const struct tgsi_full_src_register * reg, |
||
1326 | enum tgsi_opcode_type stype, |
||
1327 | unsigned swizzle) |
||
1328 | { |
||
1329 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
1330 | struct gallivm_state *gallivm = bld->bld_base.base.gallivm; |
||
1331 | LLVMBuilderRef builder = gallivm->builder; |
||
1332 | LLVMValueRef res; |
||
1333 | |||
1334 | if (reg->Register.Indirect) { |
||
1335 | LLVMValueRef indirect_index; |
||
1336 | LLVMValueRef index_vec; /* index into the input reg array */ |
||
1337 | LLVMValueRef inputs_array; |
||
1338 | LLVMTypeRef fptr_type; |
||
1339 | |||
1340 | indirect_index = get_indirect_index(bld, |
||
1341 | reg->Register.File, |
||
1342 | reg->Register.Index, |
||
1343 | ®->Indirect); |
||
1344 | |||
1345 | index_vec = get_soa_array_offsets(&bld_base->uint_bld, |
||
1346 | indirect_index, |
||
1347 | swizzle, |
||
1348 | TRUE); |
||
1349 | |||
1350 | /* cast inputs_array pointer to float* */ |
||
1351 | fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); |
||
1352 | inputs_array = LLVMBuildBitCast(builder, bld->inputs_array, fptr_type, ""); |
||
1353 | |||
1354 | /* Gather values from the input register array */ |
||
1355 | res = build_gather(bld_base, inputs_array, index_vec, NULL); |
||
1356 | } else { |
||
1357 | if (bld->indirect_files & (1 << TGSI_FILE_INPUT)) { |
||
1358 | LLVMValueRef lindex = lp_build_const_int32(gallivm, |
||
1359 | reg->Register.Index * 4 + swizzle); |
||
1360 | LLVMValueRef input_ptr = LLVMBuildGEP(builder, |
||
1361 | bld->inputs_array, &lindex, 1, ""); |
||
1362 | res = LLVMBuildLoad(builder, input_ptr, ""); |
||
1363 | } |
||
1364 | else { |
||
1365 | res = bld->inputs[reg->Register.Index][swizzle]; |
||
1366 | } |
||
1367 | } |
||
1368 | |||
1369 | assert(res); |
||
1370 | |||
1371 | if (stype == TGSI_TYPE_UNSIGNED) { |
||
1372 | res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, ""); |
||
1373 | } else if (stype == TGSI_TYPE_SIGNED) { |
||
1374 | res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, ""); |
||
1375 | } |
||
1376 | |||
1377 | return res; |
||
1378 | } |
||
1379 | |||
1380 | |||
1381 | static LLVMValueRef |
||
1382 | emit_fetch_gs_input( |
||
1383 | struct lp_build_tgsi_context * bld_base, |
||
1384 | const struct tgsi_full_src_register * reg, |
||
1385 | enum tgsi_opcode_type stype, |
||
1386 | unsigned swizzle) |
||
1387 | { |
||
1388 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
1389 | struct gallivm_state *gallivm = bld->bld_base.base.gallivm; |
||
1390 | const struct tgsi_shader_info *info = bld->bld_base.info; |
||
1391 | LLVMBuilderRef builder = gallivm->builder; |
||
1392 | LLVMValueRef attrib_index = NULL; |
||
1393 | LLVMValueRef vertex_index = NULL; |
||
1394 | LLVMValueRef swizzle_index = lp_build_const_int32(gallivm, swizzle); |
||
1395 | LLVMValueRef res; |
||
1396 | |||
1397 | if (info->input_semantic_name[reg->Register.Index] == TGSI_SEMANTIC_PRIMID) { |
||
1398 | /* This is really a system value not a regular input */ |
||
1399 | assert(!reg->Register.Indirect); |
||
1400 | assert(!reg->Dimension.Indirect); |
||
1401 | res = bld->system_values.prim_id; |
||
1402 | if (stype != TGSI_TYPE_UNSIGNED && stype != TGSI_TYPE_SIGNED) { |
||
1403 | res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, ""); |
||
1404 | } |
||
1405 | return res; |
||
1406 | } |
||
1407 | |||
1408 | if (reg->Register.Indirect) { |
||
1409 | attrib_index = get_indirect_index(bld, |
||
1410 | reg->Register.File, |
||
1411 | reg->Register.Index, |
||
1412 | ®->Indirect); |
||
1413 | } else { |
||
1414 | attrib_index = lp_build_const_int32(gallivm, reg->Register.Index); |
||
1415 | } |
||
1416 | |||
1417 | if (reg->Dimension.Indirect) { |
||
1418 | vertex_index = get_indirect_index(bld, |
||
1419 | reg->Register.File, |
||
1420 | reg->Dimension.Index, |
||
1421 | ®->DimIndirect); |
||
1422 | } else { |
||
1423 | vertex_index = lp_build_const_int32(gallivm, reg->Dimension.Index); |
||
1424 | } |
||
1425 | |||
1426 | res = bld->gs_iface->fetch_input(bld->gs_iface, bld_base, |
||
1427 | reg->Dimension.Indirect, |
||
1428 | vertex_index, |
||
1429 | reg->Register.Indirect, |
||
1430 | attrib_index, |
||
1431 | swizzle_index); |
||
1432 | |||
1433 | assert(res); |
||
1434 | |||
1435 | if (stype == TGSI_TYPE_UNSIGNED) { |
||
1436 | res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, ""); |
||
1437 | } else if (stype == TGSI_TYPE_SIGNED) { |
||
1438 | res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, ""); |
||
1439 | } |
||
1440 | |||
1441 | return res; |
||
1442 | } |
||
1443 | |||
1444 | static LLVMValueRef |
||
1445 | emit_fetch_temporary( |
||
1446 | struct lp_build_tgsi_context * bld_base, |
||
1447 | const struct tgsi_full_src_register * reg, |
||
1448 | enum tgsi_opcode_type stype, |
||
1449 | unsigned swizzle) |
||
1450 | { |
||
1451 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
1452 | struct gallivm_state *gallivm = bld->bld_base.base.gallivm; |
||
1453 | LLVMBuilderRef builder = gallivm->builder; |
||
1454 | LLVMValueRef res; |
||
1455 | |||
1456 | if (reg->Register.Indirect) { |
||
1457 | LLVMValueRef indirect_index; |
||
1458 | LLVMValueRef index_vec; /* index into the temp reg array */ |
||
1459 | LLVMValueRef temps_array; |
||
1460 | LLVMTypeRef fptr_type; |
||
1461 | |||
1462 | indirect_index = get_indirect_index(bld, |
||
1463 | reg->Register.File, |
||
1464 | reg->Register.Index, |
||
1465 | ®->Indirect); |
||
1466 | |||
1467 | index_vec = get_soa_array_offsets(&bld_base->uint_bld, |
||
1468 | indirect_index, |
||
1469 | swizzle, |
||
1470 | TRUE); |
||
1471 | |||
1472 | /* cast temps_array pointer to float* */ |
||
1473 | fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); |
||
1474 | temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, ""); |
||
1475 | |||
1476 | /* Gather values from the temporary register array */ |
||
1477 | res = build_gather(bld_base, temps_array, index_vec, NULL); |
||
1478 | } |
||
1479 | else { |
||
1480 | LLVMValueRef temp_ptr; |
||
1481 | temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, swizzle); |
||
1482 | res = LLVMBuildLoad(builder, temp_ptr, ""); |
||
1483 | } |
||
1484 | |||
1485 | if (stype == TGSI_TYPE_SIGNED || stype == TGSI_TYPE_UNSIGNED) { |
||
1486 | struct lp_build_context *bld_fetch = stype_to_fetch(bld_base, stype); |
||
1487 | res = LLVMBuildBitCast(builder, res, bld_fetch->vec_type, ""); |
||
1488 | } |
||
1489 | |||
1490 | return res; |
||
1491 | } |
||
1492 | |||
1493 | static LLVMValueRef |
||
1494 | emit_fetch_system_value( |
||
1495 | struct lp_build_tgsi_context * bld_base, |
||
1496 | const struct tgsi_full_src_register * reg, |
||
1497 | enum tgsi_opcode_type stype, |
||
1498 | unsigned swizzle) |
||
1499 | { |
||
1500 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
1501 | struct gallivm_state *gallivm = bld->bld_base.base.gallivm; |
||
1502 | const struct tgsi_shader_info *info = bld->bld_base.info; |
||
1503 | LLVMBuilderRef builder = gallivm->builder; |
||
1504 | LLVMValueRef res; |
||
1505 | enum tgsi_opcode_type atype; // Actual type of the value |
||
1506 | |||
1507 | assert(!reg->Register.Indirect); |
||
1508 | |||
1509 | switch (info->system_value_semantic_name[reg->Register.Index]) { |
||
1510 | case TGSI_SEMANTIC_INSTANCEID: |
||
1511 | res = lp_build_broadcast_scalar(&bld_base->uint_bld, bld->system_values.instance_id); |
||
1512 | atype = TGSI_TYPE_UNSIGNED; |
||
1513 | break; |
||
1514 | |||
1515 | case TGSI_SEMANTIC_VERTEXID: |
||
1516 | res = bld->system_values.vertex_id; |
||
1517 | atype = TGSI_TYPE_UNSIGNED; |
||
1518 | break; |
||
1519 | |||
1520 | case TGSI_SEMANTIC_VERTEXID_NOBASE: |
||
1521 | res = bld->system_values.vertex_id_nobase; |
||
1522 | atype = TGSI_TYPE_UNSIGNED; |
||
1523 | break; |
||
1524 | |||
1525 | case TGSI_SEMANTIC_BASEVERTEX: |
||
1526 | res = bld->system_values.basevertex; |
||
1527 | atype = TGSI_TYPE_UNSIGNED; |
||
1528 | break; |
||
1529 | |||
1530 | case TGSI_SEMANTIC_PRIMID: |
||
1531 | res = bld->system_values.prim_id; |
||
1532 | atype = TGSI_TYPE_UNSIGNED; |
||
1533 | break; |
||
1534 | |||
1535 | default: |
||
1536 | assert(!"unexpected semantic in emit_fetch_system_value"); |
||
1537 | res = bld_base->base.zero; |
||
1538 | atype = TGSI_TYPE_FLOAT; |
||
1539 | break; |
||
1540 | } |
||
1541 | |||
1542 | if (atype != stype) { |
||
1543 | if (stype == TGSI_TYPE_FLOAT) { |
||
1544 | res = LLVMBuildBitCast(builder, res, bld_base->base.vec_type, ""); |
||
1545 | } else if (stype == TGSI_TYPE_UNSIGNED) { |
||
1546 | res = LLVMBuildBitCast(builder, res, bld_base->uint_bld.vec_type, ""); |
||
1547 | } else if (stype == TGSI_TYPE_SIGNED) { |
||
1548 | res = LLVMBuildBitCast(builder, res, bld_base->int_bld.vec_type, ""); |
||
1549 | } |
||
1550 | } |
||
1551 | |||
1552 | return res; |
||
1553 | } |
||
1554 | |||
1555 | /** |
||
1556 | * Register fetch with derivatives. |
||
1557 | */ |
||
1558 | static void |
||
1559 | emit_fetch_deriv( |
||
1560 | struct lp_build_tgsi_soa_context *bld, |
||
1561 | LLVMValueRef src, |
||
1562 | LLVMValueRef *res, |
||
1563 | LLVMValueRef *ddx, |
||
1564 | LLVMValueRef *ddy) |
||
1565 | { |
||
1566 | if(res) |
||
1567 | *res = src; |
||
1568 | |||
1569 | /* TODO: use interpolation coeffs for inputs */ |
||
1570 | |||
1571 | if(ddx) |
||
1572 | *ddx = lp_build_ddx(&bld->bld_base.base, src); |
||
1573 | |||
1574 | if(ddy) |
||
1575 | *ddy = lp_build_ddy(&bld->bld_base.base, src); |
||
1576 | } |
||
1577 | |||
1578 | |||
1579 | /** |
||
1580 | * Predicate. |
||
1581 | */ |
||
1582 | static void |
||
1583 | emit_fetch_predicate( |
||
1584 | struct lp_build_tgsi_soa_context *bld, |
||
1585 | const struct tgsi_full_instruction *inst, |
||
1586 | LLVMValueRef *pred) |
||
1587 | { |
||
1588 | LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; |
||
1589 | unsigned index; |
||
1590 | unsigned char swizzles[4]; |
||
1591 | LLVMValueRef unswizzled[4] = {NULL, NULL, NULL, NULL}; |
||
1592 | LLVMValueRef value; |
||
1593 | unsigned chan; |
||
1594 | |||
1595 | if (!inst->Instruction.Predicate) { |
||
1596 | TGSI_FOR_EACH_CHANNEL( chan ) { |
||
1597 | pred[chan] = NULL; |
||
1598 | } |
||
1599 | return; |
||
1600 | } |
||
1601 | |||
1602 | swizzles[0] = inst->Predicate.SwizzleX; |
||
1603 | swizzles[1] = inst->Predicate.SwizzleY; |
||
1604 | swizzles[2] = inst->Predicate.SwizzleZ; |
||
1605 | swizzles[3] = inst->Predicate.SwizzleW; |
||
1606 | |||
1607 | index = inst->Predicate.Index; |
||
1608 | assert(index < LP_MAX_TGSI_PREDS); |
||
1609 | |||
1610 | TGSI_FOR_EACH_CHANNEL( chan ) { |
||
1611 | unsigned swizzle = swizzles[chan]; |
||
1612 | |||
1613 | /* |
||
1614 | * Only fetch the predicate register channels that are actually listed |
||
1615 | * in the swizzles |
||
1616 | */ |
||
1617 | if (!unswizzled[swizzle]) { |
||
1618 | value = LLVMBuildLoad(builder, |
||
1619 | bld->preds[index][swizzle], ""); |
||
1620 | |||
1621 | /* |
||
1622 | * Convert the value to an integer mask. |
||
1623 | * |
||
1624 | * TODO: Short-circuit this comparison -- a D3D setp_xx instructions |
||
1625 | * is needlessly causing two comparisons due to storing the intermediate |
||
1626 | * result as float vector instead of an integer mask vector. |
||
1627 | */ |
||
1628 | value = lp_build_compare(bld->bld_base.base.gallivm, |
||
1629 | bld->bld_base.base.type, |
||
1630 | PIPE_FUNC_NOTEQUAL, |
||
1631 | value, |
||
1632 | bld->bld_base.base.zero); |
||
1633 | if (inst->Predicate.Negate) { |
||
1634 | value = LLVMBuildNot(builder, value, ""); |
||
1635 | } |
||
1636 | |||
1637 | unswizzled[swizzle] = value; |
||
1638 | } else { |
||
1639 | value = unswizzled[swizzle]; |
||
1640 | } |
||
1641 | |||
1642 | pred[chan] = value; |
||
1643 | } |
||
1644 | } |
||
1645 | |||
1646 | |||
1647 | /** |
||
1648 | * Register store. |
||
1649 | */ |
||
1650 | static void |
||
1651 | emit_store_chan( |
||
1652 | struct lp_build_tgsi_context *bld_base, |
||
1653 | const struct tgsi_full_instruction *inst, |
||
1654 | unsigned index, |
||
1655 | unsigned chan_index, |
||
1656 | LLVMValueRef pred, |
||
1657 | LLVMValueRef value) |
||
1658 | { |
||
1659 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
1660 | struct gallivm_state *gallivm = bld_base->base.gallivm; |
||
1661 | LLVMBuilderRef builder = gallivm->builder; |
||
1662 | const struct tgsi_full_dst_register *reg = &inst->Dst[index]; |
||
1663 | struct lp_build_context *float_bld = &bld_base->base; |
||
1664 | struct lp_build_context *int_bld = &bld_base->int_bld; |
||
1665 | LLVMValueRef indirect_index = NULL; |
||
1666 | enum tgsi_opcode_type dtype = tgsi_opcode_infer_dst_type(inst->Instruction.Opcode); |
||
1667 | |||
1668 | /* |
||
1669 | * Apply saturation. |
||
1670 | * |
||
1671 | * It is always assumed to be float. |
||
1672 | */ |
||
1673 | switch( inst->Instruction.Saturate ) { |
||
1674 | case TGSI_SAT_NONE: |
||
1675 | break; |
||
1676 | |||
1677 | case TGSI_SAT_ZERO_ONE: |
||
1678 | assert(dtype == TGSI_TYPE_FLOAT || |
||
1679 | dtype == TGSI_TYPE_UNTYPED); |
||
1680 | value = LLVMBuildBitCast(builder, value, float_bld->vec_type, ""); |
||
1681 | value = lp_build_clamp_zero_one_nanzero(float_bld, value); |
||
1682 | break; |
||
1683 | |||
1684 | case TGSI_SAT_MINUS_PLUS_ONE: |
||
1685 | assert(dtype == TGSI_TYPE_FLOAT || |
||
1686 | dtype == TGSI_TYPE_UNTYPED); |
||
1687 | value = LLVMBuildBitCast(builder, value, float_bld->vec_type, ""); |
||
1688 | /* This will give -1.0 for NaN which is probably not what we want. */ |
||
1689 | value = lp_build_max_ext(float_bld, value, |
||
1690 | lp_build_const_vec(gallivm, float_bld->type, -1.0), |
||
1691 | GALLIVM_NAN_RETURN_OTHER_SECOND_NONNAN); |
||
1692 | value = lp_build_min(float_bld, value, float_bld->one); |
||
1693 | break; |
||
1694 | |||
1695 | default: |
||
1696 | assert(0); |
||
1697 | } |
||
1698 | |||
1699 | if (reg->Register.Indirect) { |
||
1700 | indirect_index = get_indirect_index(bld, |
||
1701 | reg->Register.File, |
||
1702 | reg->Register.Index, |
||
1703 | ®->Indirect); |
||
1704 | } else { |
||
1705 | assert(reg->Register.Index <= |
||
1706 | bld_base->info->file_max[reg->Register.File]); |
||
1707 | } |
||
1708 | |||
1709 | if (DEBUG_EXECUTION) { |
||
1710 | emit_dump_reg(gallivm, reg->Register.File, reg->Register.Index, chan_index, value); |
||
1711 | } |
||
1712 | |||
1713 | switch( reg->Register.File ) { |
||
1714 | case TGSI_FILE_OUTPUT: |
||
1715 | /* Outputs are always stored as floats */ |
||
1716 | value = LLVMBuildBitCast(builder, value, float_bld->vec_type, ""); |
||
1717 | |||
1718 | if (reg->Register.Indirect) { |
||
1719 | LLVMValueRef index_vec; /* indexes into the output registers */ |
||
1720 | LLVMValueRef outputs_array; |
||
1721 | LLVMTypeRef fptr_type; |
||
1722 | |||
1723 | index_vec = get_soa_array_offsets(&bld_base->uint_bld, |
||
1724 | indirect_index, |
||
1725 | chan_index, |
||
1726 | TRUE); |
||
1727 | |||
1728 | fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); |
||
1729 | outputs_array = LLVMBuildBitCast(builder, bld->outputs_array, fptr_type, ""); |
||
1730 | |||
1731 | /* Scatter store values into output registers */ |
||
1732 | emit_mask_scatter(bld, outputs_array, index_vec, value, |
||
1733 | &bld->exec_mask, pred); |
||
1734 | } |
||
1735 | else { |
||
1736 | LLVMValueRef out_ptr = lp_get_output_ptr(bld, reg->Register.Index, |
||
1737 | chan_index); |
||
1738 | lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, out_ptr); |
||
1739 | } |
||
1740 | break; |
||
1741 | |||
1742 | case TGSI_FILE_TEMPORARY: |
||
1743 | /* Temporaries are always stored as floats */ |
||
1744 | value = LLVMBuildBitCast(builder, value, float_bld->vec_type, ""); |
||
1745 | |||
1746 | if (reg->Register.Indirect) { |
||
1747 | LLVMValueRef index_vec; /* indexes into the temp registers */ |
||
1748 | LLVMValueRef temps_array; |
||
1749 | LLVMTypeRef fptr_type; |
||
1750 | |||
1751 | index_vec = get_soa_array_offsets(&bld_base->uint_bld, |
||
1752 | indirect_index, |
||
1753 | chan_index, |
||
1754 | TRUE); |
||
1755 | |||
1756 | fptr_type = LLVMPointerType(LLVMFloatTypeInContext(gallivm->context), 0); |
||
1757 | temps_array = LLVMBuildBitCast(builder, bld->temps_array, fptr_type, ""); |
||
1758 | |||
1759 | /* Scatter store values into temp registers */ |
||
1760 | emit_mask_scatter(bld, temps_array, index_vec, value, |
||
1761 | &bld->exec_mask, pred); |
||
1762 | } |
||
1763 | else { |
||
1764 | LLVMValueRef temp_ptr; |
||
1765 | temp_ptr = lp_get_temp_ptr_soa(bld, reg->Register.Index, chan_index); |
||
1766 | lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, temp_ptr); |
||
1767 | } |
||
1768 | break; |
||
1769 | |||
1770 | case TGSI_FILE_ADDRESS: |
||
1771 | assert(dtype == TGSI_TYPE_SIGNED); |
||
1772 | assert(LLVMTypeOf(value) == int_bld->vec_type); |
||
1773 | value = LLVMBuildBitCast(builder, value, int_bld->vec_type, ""); |
||
1774 | lp_exec_mask_store(&bld->exec_mask, int_bld, pred, value, |
||
1775 | bld->addr[reg->Register.Index][chan_index]); |
||
1776 | break; |
||
1777 | |||
1778 | case TGSI_FILE_PREDICATE: |
||
1779 | assert(LLVMTypeOf(value) == float_bld->vec_type); |
||
1780 | value = LLVMBuildBitCast(builder, value, float_bld->vec_type, ""); |
||
1781 | lp_exec_mask_store(&bld->exec_mask, float_bld, pred, value, |
||
1782 | bld->preds[reg->Register.Index][chan_index]); |
||
1783 | break; |
||
1784 | |||
1785 | default: |
||
1786 | assert( 0 ); |
||
1787 | } |
||
1788 | |||
1789 | (void)dtype; |
||
1790 | } |
||
1791 | |||
1792 | /* |
||
1793 | * Called at the beginning of the translation of each TGSI instruction, to |
||
1794 | * emit some debug code. |
||
1795 | */ |
||
1796 | static void |
||
1797 | emit_debug( |
||
1798 | struct lp_build_tgsi_context * bld_base, |
||
1799 | const struct tgsi_full_instruction * inst, |
||
1800 | const struct tgsi_opcode_info * info) |
||
1801 | |||
1802 | { |
||
1803 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
1804 | |||
1805 | if (DEBUG_EXECUTION) { |
||
1806 | /* |
||
1807 | * Dump the TGSI instruction. |
||
1808 | */ |
||
1809 | |||
1810 | struct gallivm_state *gallivm = bld_base->base.gallivm; |
||
1811 | char buf[512]; |
||
1812 | buf[0] = '$'; |
||
1813 | buf[1] = ' '; |
||
1814 | tgsi_dump_instruction_str(inst, bld_base->pc, &buf[2], sizeof buf - 2); |
||
1815 | lp_build_printf(gallivm, buf); |
||
1816 | |||
1817 | /* Dump the execution mask. |
||
1818 | */ |
||
1819 | if (bld->exec_mask.has_mask) { |
||
1820 | lp_build_print_value(gallivm, " mask = ", bld->exec_mask.exec_mask); |
||
1821 | } |
||
1822 | } |
||
1823 | } |
||
1824 | |||
1825 | static void |
||
1826 | emit_store( |
||
1827 | struct lp_build_tgsi_context * bld_base, |
||
1828 | const struct tgsi_full_instruction * inst, |
||
1829 | const struct tgsi_opcode_info * info, |
||
1830 | LLVMValueRef dst[4]) |
||
1831 | |||
1832 | { |
||
1833 | unsigned chan_index; |
||
1834 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
1835 | |||
1836 | if(info->num_dst) { |
||
1837 | LLVMValueRef pred[TGSI_NUM_CHANNELS]; |
||
1838 | |||
1839 | emit_fetch_predicate( bld, inst, pred ); |
||
1840 | |||
1841 | TGSI_FOR_EACH_DST0_ENABLED_CHANNEL( inst, chan_index ) { |
||
1842 | emit_store_chan(bld_base, inst, 0, chan_index, pred[chan_index], dst[chan_index]); |
||
1843 | } |
||
1844 | } |
||
1845 | } |
||
1846 | |||
1847 | static unsigned |
||
1848 | tgsi_to_pipe_tex_target(unsigned tgsi_target) |
||
1849 | { |
||
1850 | switch (tgsi_target) { |
||
1851 | case TGSI_TEXTURE_BUFFER: |
||
1852 | return PIPE_BUFFER; |
||
1853 | case TGSI_TEXTURE_1D: |
||
1854 | case TGSI_TEXTURE_SHADOW1D: |
||
1855 | return PIPE_TEXTURE_1D; |
||
1856 | case TGSI_TEXTURE_2D: |
||
1857 | case TGSI_TEXTURE_SHADOW2D: |
||
1858 | case TGSI_TEXTURE_2D_MSAA: |
||
1859 | return PIPE_TEXTURE_2D; |
||
1860 | case TGSI_TEXTURE_3D: |
||
1861 | return PIPE_TEXTURE_3D; |
||
1862 | case TGSI_TEXTURE_CUBE: |
||
1863 | case TGSI_TEXTURE_SHADOWCUBE: |
||
1864 | return PIPE_TEXTURE_CUBE; |
||
1865 | case TGSI_TEXTURE_RECT: |
||
1866 | case TGSI_TEXTURE_SHADOWRECT: |
||
1867 | return PIPE_TEXTURE_RECT; |
||
1868 | case TGSI_TEXTURE_1D_ARRAY: |
||
1869 | case TGSI_TEXTURE_SHADOW1D_ARRAY: |
||
1870 | return PIPE_TEXTURE_1D_ARRAY; |
||
1871 | case TGSI_TEXTURE_2D_ARRAY: |
||
1872 | case TGSI_TEXTURE_SHADOW2D_ARRAY: |
||
1873 | case TGSI_TEXTURE_2D_ARRAY_MSAA: |
||
1874 | return PIPE_TEXTURE_2D_ARRAY; |
||
1875 | case TGSI_TEXTURE_CUBE_ARRAY: |
||
1876 | case TGSI_TEXTURE_SHADOWCUBE_ARRAY: |
||
1877 | return PIPE_TEXTURE_CUBE_ARRAY; |
||
1878 | default: |
||
1879 | assert(0); |
||
1880 | return PIPE_BUFFER; |
||
1881 | } |
||
1882 | } |
||
1883 | |||
1884 | |||
1885 | static enum lp_sampler_lod_property |
||
1886 | lp_build_lod_property( |
||
1887 | struct lp_build_tgsi_context *bld_base, |
||
1888 | const struct tgsi_full_instruction *inst, |
||
1889 | unsigned src_op) |
||
1890 | { |
||
1891 | const struct tgsi_full_src_register *reg = &inst->Src[src_op]; |
||
1892 | enum lp_sampler_lod_property lod_property; |
||
1893 | |||
1894 | /* |
||
1895 | * Not much we can do here. We could try catching inputs declared |
||
1896 | * with constant interpolation but not sure it's worth it - since for |
||
1897 | * TEX opcodes as well as FETCH/LD the lod comes from same reg as |
||
1898 | * the coords, so it could only work for SAMPLE/TXQ/SVIEWINFO), just |
||
1899 | * like the constant/immediate recognition below. |
||
1900 | * What seems to be of more value would be to recognize temps holding |
||
1901 | * broadcasted scalars but no way we can do it. |
||
1902 | * Tried asking llvm but without any success (using LLVMIsConstant |
||
1903 | * even though this isn't exactly what we'd need), even as simple as |
||
1904 | * IMM[0] UINT32 (0,-1,0,0) |
||
1905 | * MOV TEMP[0] IMM[0].yyyy |
||
1906 | * SVIEWINFO TEMP[1], TEMP[0].xxxx, SVIEWINFO[0] |
||
1907 | * doesn't work. |
||
1908 | * This means there's ZERO chance this will ever catch a scalar lod |
||
1909 | * with traditional tex opcodes as well as texel fetches, since the lod |
||
1910 | * comes from the same reg as coords (except some test shaders using |
||
1911 | * constant coords maybe). |
||
1912 | * There's at least hope for sample opcodes as well as size queries. |
||
1913 | */ |
||
1914 | if (reg->Register.File == TGSI_FILE_CONSTANT || |
||
1915 | reg->Register.File == TGSI_FILE_IMMEDIATE) { |
||
1916 | lod_property = LP_SAMPLER_LOD_SCALAR; |
||
1917 | } |
||
1918 | else if (bld_base->info->processor == TGSI_PROCESSOR_FRAGMENT) { |
||
1919 | if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) { |
||
1920 | lod_property = LP_SAMPLER_LOD_PER_ELEMENT; |
||
1921 | } |
||
1922 | else { |
||
1923 | lod_property = LP_SAMPLER_LOD_PER_QUAD; |
||
1924 | } |
||
1925 | } |
||
1926 | else { |
||
1927 | /* never use scalar (per-quad) lod the results are just too wrong. */ |
||
1928 | lod_property = LP_SAMPLER_LOD_PER_ELEMENT; |
||
1929 | } |
||
1930 | return lod_property; |
||
1931 | } |
||
1932 | |||
1933 | |||
1934 | /** |
||
1935 | * High-level instruction translators. |
||
1936 | */ |
||
1937 | |||
1938 | static void |
||
1939 | emit_tex( struct lp_build_tgsi_soa_context *bld, |
||
1940 | const struct tgsi_full_instruction *inst, |
||
1941 | enum lp_build_tex_modifier modifier, |
||
1942 | LLVMValueRef *texel, |
||
1943 | unsigned sampler_reg, |
||
1944 | enum lp_sampler_op_type sampler_op) |
||
1945 | { |
||
1946 | unsigned unit = inst->Src[sampler_reg].Register.Index; |
||
1947 | LLVMValueRef oow = NULL; |
||
1948 | LLVMValueRef lod = NULL; |
||
1949 | LLVMValueRef coords[5]; |
||
1950 | LLVMValueRef offsets[3] = { NULL }; |
||
1951 | struct lp_derivatives derivs; |
||
1952 | struct lp_sampler_params params; |
||
1953 | enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR; |
||
1954 | unsigned num_derivs, num_offsets, i; |
||
1955 | unsigned shadow_coord = 0; |
||
1956 | unsigned layer_coord = 0; |
||
1957 | unsigned sample_key = sampler_op << LP_SAMPLER_OP_TYPE_SHIFT; |
||
1958 | |||
1959 | memset(¶ms, 0, sizeof(params)); |
||
1960 | |||
1961 | if (!bld->sampler) { |
||
1962 | _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); |
||
1963 | for (i = 0; i < 4; i++) { |
||
1964 | texel[i] = bld->bld_base.base.undef; |
||
1965 | } |
||
1966 | return; |
||
1967 | } |
||
1968 | |||
1969 | switch (inst->Texture.Texture) { |
||
1970 | case TGSI_TEXTURE_1D_ARRAY: |
||
1971 | layer_coord = 1; |
||
1972 | /* fallthrough */ |
||
1973 | case TGSI_TEXTURE_1D: |
||
1974 | num_offsets = 1; |
||
1975 | num_derivs = 1; |
||
1976 | break; |
||
1977 | case TGSI_TEXTURE_2D_ARRAY: |
||
1978 | layer_coord = 2; |
||
1979 | /* fallthrough */ |
||
1980 | case TGSI_TEXTURE_2D: |
||
1981 | case TGSI_TEXTURE_RECT: |
||
1982 | num_offsets = 2; |
||
1983 | num_derivs = 2; |
||
1984 | break; |
||
1985 | case TGSI_TEXTURE_SHADOW1D_ARRAY: |
||
1986 | layer_coord = 1; |
||
1987 | /* fallthrough */ |
||
1988 | case TGSI_TEXTURE_SHADOW1D: |
||
1989 | shadow_coord = 2; |
||
1990 | num_offsets = 1; |
||
1991 | num_derivs = 1; |
||
1992 | break; |
||
1993 | case TGSI_TEXTURE_SHADOW2D_ARRAY: |
||
1994 | layer_coord = 2; |
||
1995 | shadow_coord = 3; |
||
1996 | num_offsets = 2; |
||
1997 | num_derivs = 2; |
||
1998 | break; |
||
1999 | case TGSI_TEXTURE_SHADOW2D: |
||
2000 | case TGSI_TEXTURE_SHADOWRECT: |
||
2001 | shadow_coord = 2; |
||
2002 | num_offsets = 2; |
||
2003 | num_derivs = 2; |
||
2004 | break; |
||
2005 | case TGSI_TEXTURE_CUBE: |
||
2006 | num_offsets = 2; |
||
2007 | num_derivs = 3; |
||
2008 | break; |
||
2009 | case TGSI_TEXTURE_3D: |
||
2010 | num_offsets = 3; |
||
2011 | num_derivs = 3; |
||
2012 | break; |
||
2013 | case TGSI_TEXTURE_SHADOWCUBE: |
||
2014 | shadow_coord = 3; |
||
2015 | num_offsets = 2; |
||
2016 | num_derivs = 3; |
||
2017 | break; |
||
2018 | case TGSI_TEXTURE_CUBE_ARRAY: |
||
2019 | num_offsets = 2; |
||
2020 | num_derivs = 3; |
||
2021 | layer_coord = 3; |
||
2022 | break; |
||
2023 | case TGSI_TEXTURE_SHADOWCUBE_ARRAY: |
||
2024 | num_offsets = 2; |
||
2025 | num_derivs = 3; |
||
2026 | layer_coord = 3; |
||
2027 | shadow_coord = 4; /* shadow coord special different reg */ |
||
2028 | break; |
||
2029 | case TGSI_TEXTURE_2D_MSAA: |
||
2030 | case TGSI_TEXTURE_2D_ARRAY_MSAA: |
||
2031 | default: |
||
2032 | assert(0); |
||
2033 | return; |
||
2034 | } |
||
2035 | |||
2036 | /* Note lod and especially projected are illegal in a LOT of cases */ |
||
2037 | if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS || |
||
2038 | modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { |
||
2039 | if (inst->Texture.Texture == TGSI_TEXTURE_SHADOWCUBE || |
||
2040 | inst->Texture.Texture == TGSI_TEXTURE_CUBE_ARRAY) { |
||
2041 | /* note that shadow cube array with bias/explicit lod does not exist */ |
||
2042 | lod = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0); |
||
2043 | } |
||
2044 | else { |
||
2045 | lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3); |
||
2046 | } |
||
2047 | if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) { |
||
2048 | sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT; |
||
2049 | } |
||
2050 | else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { |
||
2051 | sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT; |
||
2052 | } |
||
2053 | lod_property = lp_build_lod_property(&bld->bld_base, inst, 0); |
||
2054 | } |
||
2055 | |||
2056 | if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) { |
||
2057 | oow = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3); |
||
2058 | oow = lp_build_rcp(&bld->bld_base.base, oow); |
||
2059 | } |
||
2060 | |||
2061 | for (i = 0; i < num_derivs; i++) { |
||
2062 | coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i); |
||
2063 | if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) |
||
2064 | coords[i] = lp_build_mul(&bld->bld_base.base, coords[i], oow); |
||
2065 | } |
||
2066 | for (i = num_derivs; i < 5; i++) { |
||
2067 | coords[i] = bld->bld_base.base.undef; |
||
2068 | } |
||
2069 | |||
2070 | /* Layer coord always goes into 3rd slot, except for cube map arrays */ |
||
2071 | if (layer_coord) { |
||
2072 | if (layer_coord == 3) { |
||
2073 | coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord); |
||
2074 | } |
||
2075 | else { |
||
2076 | coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord); |
||
2077 | } |
||
2078 | if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) |
||
2079 | coords[2] = lp_build_mul(&bld->bld_base.base, coords[2], oow); |
||
2080 | } |
||
2081 | /* Shadow coord occupies always 5th slot. */ |
||
2082 | if (shadow_coord) { |
||
2083 | sample_key |= LP_SAMPLER_SHADOW; |
||
2084 | if (shadow_coord == 4) { |
||
2085 | coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 1, 0); |
||
2086 | } |
||
2087 | else { |
||
2088 | coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 0, shadow_coord); |
||
2089 | } |
||
2090 | if (modifier == LP_BLD_TEX_MODIFIER_PROJECTED) |
||
2091 | coords[4] = lp_build_mul(&bld->bld_base.base, coords[4], oow); |
||
2092 | } |
||
2093 | |||
2094 | if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { |
||
2095 | unsigned dim; |
||
2096 | sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT; |
||
2097 | for (dim = 0; dim < num_derivs; ++dim) { |
||
2098 | derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 1, dim); |
||
2099 | derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 2, dim); |
||
2100 | } |
||
2101 | params.derivs = &derivs; |
||
2102 | /* |
||
2103 | * could also check all src regs if constant but I doubt such |
||
2104 | * cases exist in practice. |
||
2105 | */ |
||
2106 | if (bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT) { |
||
2107 | if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) { |
||
2108 | lod_property = LP_SAMPLER_LOD_PER_ELEMENT; |
||
2109 | } |
||
2110 | else { |
||
2111 | lod_property = LP_SAMPLER_LOD_PER_QUAD; |
||
2112 | } |
||
2113 | } |
||
2114 | else { |
||
2115 | lod_property = LP_SAMPLER_LOD_PER_ELEMENT; |
||
2116 | } |
||
2117 | } |
||
2118 | sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT; |
||
2119 | |||
2120 | /* we don't handle the 4 offset version of tg4 */ |
||
2121 | if (inst->Texture.NumOffsets == 1) { |
||
2122 | unsigned dim; |
||
2123 | sample_key |= LP_SAMPLER_OFFSETS; |
||
2124 | for (dim = 0; dim < num_offsets; dim++) { |
||
2125 | offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim); |
||
2126 | } |
||
2127 | } |
||
2128 | |||
2129 | params.type = bld->bld_base.base.type; |
||
2130 | params.sample_key = sample_key; |
||
2131 | params.texture_index = unit; |
||
2132 | params.sampler_index = unit; |
||
2133 | params.context_ptr = bld->context_ptr; |
||
2134 | params.coords = coords; |
||
2135 | params.offsets = offsets; |
||
2136 | params.lod = lod; |
||
2137 | params.texel = texel; |
||
2138 | |||
2139 | bld->sampler->emit_tex_sample(bld->sampler, |
||
2140 | bld->bld_base.base.gallivm, |
||
2141 | ¶ms); |
||
2142 | } |
||
2143 | |||
2144 | static void |
||
2145 | emit_sample(struct lp_build_tgsi_soa_context *bld, |
||
2146 | const struct tgsi_full_instruction *inst, |
||
2147 | enum lp_build_tex_modifier modifier, |
||
2148 | boolean compare, |
||
2149 | LLVMValueRef *texel) |
||
2150 | { |
||
2151 | struct gallivm_state *gallivm = bld->bld_base.base.gallivm; |
||
2152 | unsigned texture_unit, sampler_unit; |
||
2153 | LLVMValueRef lod = NULL; |
||
2154 | LLVMValueRef coords[5]; |
||
2155 | LLVMValueRef offsets[3] = { NULL }; |
||
2156 | struct lp_derivatives derivs; |
||
2157 | struct lp_sampler_params params; |
||
2158 | enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR; |
||
2159 | |||
2160 | unsigned num_offsets, num_derivs, i; |
||
2161 | unsigned layer_coord = 0; |
||
2162 | unsigned sample_key = LP_SAMPLER_OP_TEXTURE << LP_SAMPLER_OP_TYPE_SHIFT; |
||
2163 | |||
2164 | memset(¶ms, 0, sizeof(params)); |
||
2165 | |||
2166 | if (!bld->sampler) { |
||
2167 | _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); |
||
2168 | for (i = 0; i < 4; i++) { |
||
2169 | texel[i] = bld->bld_base.base.undef; |
||
2170 | } |
||
2171 | return; |
||
2172 | } |
||
2173 | |||
2174 | /* |
||
2175 | * unlike old-style tex opcodes the texture/sampler indices |
||
2176 | * always come from src1 and src2 respectively. |
||
2177 | */ |
||
2178 | texture_unit = inst->Src[1].Register.Index; |
||
2179 | sampler_unit = inst->Src[2].Register.Index; |
||
2180 | |||
2181 | /* |
||
2182 | * Note inst->Texture.Texture will contain the number of offsets, |
||
2183 | * however the target information is NOT there and comes from the |
||
2184 | * declared sampler views instead. |
||
2185 | */ |
||
2186 | switch (bld->sv[texture_unit].Resource) { |
||
2187 | case TGSI_TEXTURE_1D: |
||
2188 | num_offsets = 1; |
||
2189 | num_derivs = 1; |
||
2190 | break; |
||
2191 | case TGSI_TEXTURE_1D_ARRAY: |
||
2192 | layer_coord = 1; |
||
2193 | num_offsets = 1; |
||
2194 | num_derivs = 1; |
||
2195 | break; |
||
2196 | case TGSI_TEXTURE_2D: |
||
2197 | case TGSI_TEXTURE_RECT: |
||
2198 | num_offsets = 2; |
||
2199 | num_derivs = 2; |
||
2200 | break; |
||
2201 | case TGSI_TEXTURE_2D_ARRAY: |
||
2202 | layer_coord = 2; |
||
2203 | num_offsets = 2; |
||
2204 | num_derivs = 2; |
||
2205 | break; |
||
2206 | case TGSI_TEXTURE_CUBE: |
||
2207 | num_offsets = 2; |
||
2208 | num_derivs = 3; |
||
2209 | break; |
||
2210 | case TGSI_TEXTURE_3D: |
||
2211 | num_offsets = 3; |
||
2212 | num_derivs = 3; |
||
2213 | break; |
||
2214 | case TGSI_TEXTURE_CUBE_ARRAY: |
||
2215 | layer_coord = 3; |
||
2216 | num_offsets = 2; |
||
2217 | num_derivs = 3; |
||
2218 | break; |
||
2219 | default: |
||
2220 | assert(0); |
||
2221 | return; |
||
2222 | } |
||
2223 | |||
2224 | if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS || |
||
2225 | modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { |
||
2226 | lod = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0); |
||
2227 | if (modifier == LP_BLD_TEX_MODIFIER_LOD_BIAS) { |
||
2228 | sample_key |= LP_SAMPLER_LOD_BIAS << LP_SAMPLER_LOD_CONTROL_SHIFT; |
||
2229 | } |
||
2230 | else if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_LOD) { |
||
2231 | sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT; |
||
2232 | } |
||
2233 | lod_property = lp_build_lod_property(&bld->bld_base, inst, 0); |
||
2234 | } |
||
2235 | else if (modifier == LP_BLD_TEX_MODIFIER_LOD_ZERO) { |
||
2236 | /* XXX might be better to explicitly pass the level zero information */ |
||
2237 | sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT; |
||
2238 | lod = lp_build_const_vec(gallivm, bld->bld_base.base.type, 0.0F); |
||
2239 | } |
||
2240 | |||
2241 | for (i = 0; i < num_derivs; i++) { |
||
2242 | coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i); |
||
2243 | } |
||
2244 | for (i = num_derivs; i < 5; i++) { |
||
2245 | coords[i] = bld->bld_base.base.undef; |
||
2246 | } |
||
2247 | |||
2248 | /* Layer coord always goes into 3rd slot, except for cube map arrays */ |
||
2249 | if (layer_coord) { |
||
2250 | if (layer_coord == 3) |
||
2251 | coords[3] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord); |
||
2252 | else |
||
2253 | coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord); |
||
2254 | } |
||
2255 | /* Shadow coord occupies always 5th slot. */ |
||
2256 | if (compare) { |
||
2257 | sample_key |= LP_SAMPLER_SHADOW; |
||
2258 | coords[4] = lp_build_emit_fetch(&bld->bld_base, inst, 3, 0); |
||
2259 | } |
||
2260 | |||
2261 | if (modifier == LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV) { |
||
2262 | unsigned dim; |
||
2263 | sample_key |= LP_SAMPLER_LOD_DERIVATIVES << LP_SAMPLER_LOD_CONTROL_SHIFT; |
||
2264 | for (dim = 0; dim < num_derivs; ++dim) { |
||
2265 | derivs.ddx[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 3, dim); |
||
2266 | derivs.ddy[dim] = lp_build_emit_fetch(&bld->bld_base, inst, 4, dim); |
||
2267 | } |
||
2268 | params.derivs = &derivs; |
||
2269 | /* |
||
2270 | * could also check all src regs if constant but I doubt such |
||
2271 | * cases exist in practice. |
||
2272 | */ |
||
2273 | if (bld->bld_base.info->processor == TGSI_PROCESSOR_FRAGMENT) { |
||
2274 | if (gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) { |
||
2275 | lod_property = LP_SAMPLER_LOD_PER_ELEMENT; |
||
2276 | } |
||
2277 | else { |
||
2278 | lod_property = LP_SAMPLER_LOD_PER_QUAD; |
||
2279 | } |
||
2280 | } |
||
2281 | else { |
||
2282 | lod_property = LP_SAMPLER_LOD_PER_ELEMENT; |
||
2283 | } |
||
2284 | } |
||
2285 | |||
2286 | /* some advanced gather instructions (txgo) would require 4 offsets */ |
||
2287 | if (inst->Texture.NumOffsets == 1) { |
||
2288 | unsigned dim; |
||
2289 | sample_key |= LP_SAMPLER_OFFSETS; |
||
2290 | for (dim = 0; dim < num_offsets; dim++) { |
||
2291 | offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim); |
||
2292 | } |
||
2293 | } |
||
2294 | sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT; |
||
2295 | |||
2296 | params.type = bld->bld_base.base.type; |
||
2297 | params.sample_key = sample_key; |
||
2298 | params.texture_index = texture_unit; |
||
2299 | params.sampler_index = sampler_unit; |
||
2300 | params.context_ptr = bld->context_ptr; |
||
2301 | params.coords = coords; |
||
2302 | params.offsets = offsets; |
||
2303 | params.lod = lod; |
||
2304 | params.texel = texel; |
||
2305 | |||
2306 | bld->sampler->emit_tex_sample(bld->sampler, |
||
2307 | bld->bld_base.base.gallivm, |
||
2308 | ¶ms); |
||
2309 | |||
2310 | if (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_RED || |
||
2311 | inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_GREEN || |
||
2312 | inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_BLUE || |
||
2313 | inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_ALPHA) { |
||
2314 | unsigned char swizzles[4]; |
||
2315 | swizzles[0] = inst->Src[1].Register.SwizzleX; |
||
2316 | swizzles[1] = inst->Src[1].Register.SwizzleY; |
||
2317 | swizzles[2] = inst->Src[1].Register.SwizzleZ; |
||
2318 | swizzles[3] = inst->Src[1].Register.SwizzleW; |
||
2319 | |||
2320 | lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles); |
||
2321 | } |
||
2322 | } |
||
2323 | |||
2324 | static void |
||
2325 | emit_fetch_texels( struct lp_build_tgsi_soa_context *bld, |
||
2326 | const struct tgsi_full_instruction *inst, |
||
2327 | LLVMValueRef *texel, |
||
2328 | boolean is_samplei) |
||
2329 | { |
||
2330 | unsigned unit, target; |
||
2331 | LLVMValueRef coord_undef = LLVMGetUndef(bld->bld_base.base.int_vec_type); |
||
2332 | LLVMValueRef explicit_lod = NULL; |
||
2333 | LLVMValueRef coords[5]; |
||
2334 | LLVMValueRef offsets[3] = { NULL }; |
||
2335 | struct lp_sampler_params params; |
||
2336 | enum lp_sampler_lod_property lod_property = LP_SAMPLER_LOD_SCALAR; |
||
2337 | unsigned dims, i; |
||
2338 | unsigned layer_coord = 0; |
||
2339 | unsigned sample_key = LP_SAMPLER_OP_FETCH << LP_SAMPLER_OP_TYPE_SHIFT; |
||
2340 | |||
2341 | memset(¶ms, 0, sizeof(params)); |
||
2342 | |||
2343 | if (!bld->sampler) { |
||
2344 | _debug_printf("warning: found texture instruction but no sampler generator supplied\n"); |
||
2345 | for (i = 0; i < 4; i++) { |
||
2346 | texel[i] = coord_undef; |
||
2347 | } |
||
2348 | return; |
||
2349 | } |
||
2350 | |||
2351 | unit = inst->Src[1].Register.Index; |
||
2352 | |||
2353 | if (is_samplei) { |
||
2354 | target = bld->sv[unit].Resource; |
||
2355 | } |
||
2356 | else { |
||
2357 | target = inst->Texture.Texture; |
||
2358 | } |
||
2359 | |||
2360 | switch (target) { |
||
2361 | case TGSI_TEXTURE_1D: |
||
2362 | case TGSI_TEXTURE_BUFFER: |
||
2363 | dims = 1; |
||
2364 | break; |
||
2365 | case TGSI_TEXTURE_1D_ARRAY: |
||
2366 | layer_coord = 1; |
||
2367 | dims = 1; |
||
2368 | break; |
||
2369 | case TGSI_TEXTURE_2D: |
||
2370 | case TGSI_TEXTURE_RECT: |
||
2371 | case TGSI_TEXTURE_2D_MSAA: |
||
2372 | dims = 2; |
||
2373 | break; |
||
2374 | case TGSI_TEXTURE_2D_ARRAY: |
||
2375 | case TGSI_TEXTURE_2D_ARRAY_MSAA: |
||
2376 | layer_coord = 2; |
||
2377 | dims = 2; |
||
2378 | break; |
||
2379 | case TGSI_TEXTURE_3D: |
||
2380 | dims = 3; |
||
2381 | break; |
||
2382 | default: |
||
2383 | assert(0); |
||
2384 | return; |
||
2385 | } |
||
2386 | |||
2387 | /* always have lod except for buffers and msaa targets ? */ |
||
2388 | if (target != TGSI_TEXTURE_BUFFER && |
||
2389 | target != TGSI_TEXTURE_2D_MSAA && |
||
2390 | target != TGSI_TEXTURE_2D_ARRAY_MSAA) { |
||
2391 | sample_key |= LP_SAMPLER_LOD_EXPLICIT << LP_SAMPLER_LOD_CONTROL_SHIFT; |
||
2392 | explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 3); |
||
2393 | lod_property = lp_build_lod_property(&bld->bld_base, inst, 0); |
||
2394 | } |
||
2395 | /* XXX: for real msaa support, the w component would be the sample index. */ |
||
2396 | |||
2397 | for (i = 0; i < dims; i++) { |
||
2398 | coords[i] = lp_build_emit_fetch(&bld->bld_base, inst, 0, i); |
||
2399 | } |
||
2400 | /* never use more than 3 coords here but emit_fetch_texel copies all 5 anyway */ |
||
2401 | for (i = dims; i < 5; i++) { |
||
2402 | coords[i] = coord_undef; |
||
2403 | } |
||
2404 | if (layer_coord) |
||
2405 | coords[2] = lp_build_emit_fetch(&bld->bld_base, inst, 0, layer_coord); |
||
2406 | |||
2407 | if (inst->Texture.NumOffsets == 1) { |
||
2408 | unsigned dim; |
||
2409 | sample_key |= LP_SAMPLER_OFFSETS; |
||
2410 | for (dim = 0; dim < dims; dim++) { |
||
2411 | offsets[dim] = lp_build_emit_fetch_texoffset(&bld->bld_base, inst, 0, dim); |
||
2412 | } |
||
2413 | } |
||
2414 | sample_key |= lod_property << LP_SAMPLER_LOD_PROPERTY_SHIFT; |
||
2415 | |||
2416 | params.type = bld->bld_base.base.type; |
||
2417 | params.sample_key = sample_key; |
||
2418 | params.texture_index = unit; |
||
2419 | params.sampler_index = unit; |
||
2420 | params.context_ptr = bld->context_ptr; |
||
2421 | params.coords = coords; |
||
2422 | params.offsets = offsets; |
||
2423 | params.derivs = NULL; |
||
2424 | params.lod = explicit_lod; |
||
2425 | params.texel = texel; |
||
2426 | |||
2427 | bld->sampler->emit_tex_sample(bld->sampler, |
||
2428 | bld->bld_base.base.gallivm, |
||
2429 | ¶ms); |
||
2430 | |||
2431 | if (is_samplei && |
||
2432 | (inst->Src[1].Register.SwizzleX != PIPE_SWIZZLE_RED || |
||
2433 | inst->Src[1].Register.SwizzleY != PIPE_SWIZZLE_GREEN || |
||
2434 | inst->Src[1].Register.SwizzleZ != PIPE_SWIZZLE_BLUE || |
||
2435 | inst->Src[1].Register.SwizzleW != PIPE_SWIZZLE_ALPHA)) { |
||
2436 | unsigned char swizzles[4]; |
||
2437 | swizzles[0] = inst->Src[1].Register.SwizzleX; |
||
2438 | swizzles[1] = inst->Src[1].Register.SwizzleY; |
||
2439 | swizzles[2] = inst->Src[1].Register.SwizzleZ; |
||
2440 | swizzles[3] = inst->Src[1].Register.SwizzleW; |
||
2441 | |||
2442 | lp_build_swizzle_soa_inplace(&bld->bld_base.base, texel, swizzles); |
||
2443 | } |
||
2444 | } |
||
2445 | |||
2446 | static void |
||
2447 | emit_size_query( struct lp_build_tgsi_soa_context *bld, |
||
2448 | const struct tgsi_full_instruction *inst, |
||
2449 | LLVMValueRef *sizes_out, |
||
2450 | boolean is_sviewinfo) |
||
2451 | { |
||
2452 | LLVMValueRef explicit_lod; |
||
2453 | enum lp_sampler_lod_property lod_property; |
||
2454 | unsigned has_lod; |
||
2455 | unsigned i; |
||
2456 | unsigned unit = inst->Src[1].Register.Index; |
||
2457 | unsigned target, pipe_target; |
||
2458 | |||
2459 | if (is_sviewinfo) { |
||
2460 | target = bld->sv[unit].Resource; |
||
2461 | } |
||
2462 | else { |
||
2463 | target = inst->Texture.Texture; |
||
2464 | } |
||
2465 | switch (target) { |
||
2466 | case TGSI_TEXTURE_BUFFER: |
||
2467 | case TGSI_TEXTURE_RECT: |
||
2468 | case TGSI_TEXTURE_SHADOWRECT: |
||
2469 | has_lod = 0; |
||
2470 | break; |
||
2471 | default: |
||
2472 | has_lod = 1; |
||
2473 | break; |
||
2474 | } |
||
2475 | |||
2476 | if (!bld->sampler) { |
||
2477 | _debug_printf("warning: found texture query instruction but no sampler generator supplied\n"); |
||
2478 | for (i = 0; i < 4; i++) |
||
2479 | sizes_out[i] = bld->bld_base.int_bld.undef; |
||
2480 | return; |
||
2481 | } |
||
2482 | |||
2483 | if (has_lod) { |
||
2484 | explicit_lod = lp_build_emit_fetch(&bld->bld_base, inst, 0, 0); |
||
2485 | lod_property = lp_build_lod_property(&bld->bld_base, inst, 0); |
||
2486 | } |
||
2487 | else { |
||
2488 | explicit_lod = NULL; |
||
2489 | lod_property = LP_SAMPLER_LOD_SCALAR; |
||
2490 | } |
||
2491 | |||
2492 | |||
2493 | pipe_target = tgsi_to_pipe_tex_target(target); |
||
2494 | |||
2495 | bld->sampler->emit_size_query(bld->sampler, |
||
2496 | bld->bld_base.base.gallivm, |
||
2497 | bld->bld_base.int_bld.type, |
||
2498 | unit, pipe_target, |
||
2499 | bld->context_ptr, |
||
2500 | TRUE, |
||
2501 | lod_property, |
||
2502 | explicit_lod, |
||
2503 | sizes_out); |
||
2504 | } |
||
2505 | |||
2506 | static boolean |
||
2507 | near_end_of_shader(struct lp_build_tgsi_soa_context *bld, |
||
2508 | int pc) |
||
2509 | { |
||
2510 | int i; |
||
2511 | |||
2512 | for (i = 0; i < 5; i++) { |
||
2513 | unsigned opcode; |
||
2514 | |||
2515 | if (pc + i >= bld->bld_base.info->num_instructions) |
||
2516 | return TRUE; |
||
2517 | |||
2518 | opcode = bld->bld_base.instructions[pc + i].Instruction.Opcode; |
||
2519 | |||
2520 | if (opcode == TGSI_OPCODE_END) |
||
2521 | return TRUE; |
||
2522 | |||
2523 | if (opcode == TGSI_OPCODE_TEX || |
||
2524 | opcode == TGSI_OPCODE_TXP || |
||
2525 | opcode == TGSI_OPCODE_TXD || |
||
2526 | opcode == TGSI_OPCODE_TXB || |
||
2527 | opcode == TGSI_OPCODE_TXL || |
||
2528 | opcode == TGSI_OPCODE_TXF || |
||
2529 | opcode == TGSI_OPCODE_TXQ || |
||
2530 | opcode == TGSI_OPCODE_TEX2 || |
||
2531 | opcode == TGSI_OPCODE_TXB2 || |
||
2532 | opcode == TGSI_OPCODE_TXL2 || |
||
2533 | opcode == TGSI_OPCODE_SAMPLE || |
||
2534 | opcode == TGSI_OPCODE_SAMPLE_B || |
||
2535 | opcode == TGSI_OPCODE_SAMPLE_C || |
||
2536 | opcode == TGSI_OPCODE_SAMPLE_C_LZ || |
||
2537 | opcode == TGSI_OPCODE_SAMPLE_D || |
||
2538 | opcode == TGSI_OPCODE_SAMPLE_I || |
||
2539 | opcode == TGSI_OPCODE_SAMPLE_L || |
||
2540 | opcode == TGSI_OPCODE_SVIEWINFO || |
||
2541 | opcode == TGSI_OPCODE_CAL || |
||
2542 | opcode == TGSI_OPCODE_CALLNZ || |
||
2543 | opcode == TGSI_OPCODE_IF || |
||
2544 | opcode == TGSI_OPCODE_UIF || |
||
2545 | opcode == TGSI_OPCODE_BGNLOOP || |
||
2546 | opcode == TGSI_OPCODE_SWITCH) |
||
2547 | return FALSE; |
||
2548 | } |
||
2549 | |||
2550 | return TRUE; |
||
2551 | } |
||
2552 | |||
2553 | |||
2554 | |||
2555 | /** |
||
2556 | * Kill fragment if any of the src register values are negative. |
||
2557 | */ |
||
2558 | static void |
||
2559 | emit_kill_if( |
||
2560 | struct lp_build_tgsi_soa_context *bld, |
||
2561 | const struct tgsi_full_instruction *inst, |
||
2562 | int pc) |
||
2563 | { |
||
2564 | LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; |
||
2565 | const struct tgsi_full_src_register *reg = &inst->Src[0]; |
||
2566 | LLVMValueRef terms[TGSI_NUM_CHANNELS]; |
||
2567 | LLVMValueRef mask; |
||
2568 | unsigned chan_index; |
||
2569 | |||
2570 | memset(&terms, 0, sizeof terms); |
||
2571 | |||
2572 | TGSI_FOR_EACH_CHANNEL( chan_index ) { |
||
2573 | unsigned swizzle; |
||
2574 | |||
2575 | /* Unswizzle channel */ |
||
2576 | swizzle = tgsi_util_get_full_src_register_swizzle( reg, chan_index ); |
||
2577 | |||
2578 | /* Check if the component has not been already tested. */ |
||
2579 | assert(swizzle < TGSI_NUM_CHANNELS); |
||
2580 | if( !terms[swizzle] ) |
||
2581 | /* TODO: change the comparison operator instead of setting the sign */ |
||
2582 | terms[swizzle] = lp_build_emit_fetch(&bld->bld_base, inst, 0, chan_index ); |
||
2583 | } |
||
2584 | |||
2585 | mask = NULL; |
||
2586 | TGSI_FOR_EACH_CHANNEL( chan_index ) { |
||
2587 | if(terms[chan_index]) { |
||
2588 | LLVMValueRef chan_mask; |
||
2589 | |||
2590 | /* |
||
2591 | * If term < 0 then mask = 0 else mask = ~0. |
||
2592 | */ |
||
2593 | chan_mask = lp_build_cmp(&bld->bld_base.base, PIPE_FUNC_GEQUAL, terms[chan_index], bld->bld_base.base.zero); |
||
2594 | |||
2595 | if(mask) |
||
2596 | mask = LLVMBuildAnd(builder, mask, chan_mask, ""); |
||
2597 | else |
||
2598 | mask = chan_mask; |
||
2599 | } |
||
2600 | } |
||
2601 | |||
2602 | if (bld->exec_mask.has_mask) { |
||
2603 | LLVMValueRef invmask; |
||
2604 | invmask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp"); |
||
2605 | mask = LLVMBuildOr(builder, mask, invmask, ""); |
||
2606 | } |
||
2607 | |||
2608 | lp_build_mask_update(bld->mask, mask); |
||
2609 | if (!near_end_of_shader(bld, pc)) |
||
2610 | lp_build_mask_check(bld->mask); |
||
2611 | } |
||
2612 | |||
2613 | |||
2614 | /** |
||
2615 | * Unconditional fragment kill. |
||
2616 | * The only predication is the execution mask which will apply if |
||
2617 | * we're inside a loop or conditional. |
||
2618 | */ |
||
2619 | static void |
||
2620 | emit_kill(struct lp_build_tgsi_soa_context *bld, |
||
2621 | int pc) |
||
2622 | { |
||
2623 | LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; |
||
2624 | LLVMValueRef mask; |
||
2625 | |||
2626 | /* For those channels which are "alive", disable fragment shader |
||
2627 | * execution. |
||
2628 | */ |
||
2629 | if (bld->exec_mask.has_mask) { |
||
2630 | mask = LLVMBuildNot(builder, bld->exec_mask.exec_mask, "kilp"); |
||
2631 | } |
||
2632 | else { |
||
2633 | LLVMValueRef zero = LLVMConstNull(bld->bld_base.base.int_vec_type); |
||
2634 | mask = zero; |
||
2635 | } |
||
2636 | |||
2637 | lp_build_mask_update(bld->mask, mask); |
||
2638 | |||
2639 | if (!near_end_of_shader(bld, pc)) |
||
2640 | lp_build_mask_check(bld->mask); |
||
2641 | } |
||
2642 | |||
2643 | |||
2644 | /** |
||
2645 | * Emit code which will dump the value of all the temporary registers |
||
2646 | * to stdout. |
||
2647 | */ |
||
2648 | static void |
||
2649 | emit_dump_file(struct lp_build_tgsi_soa_context *bld, |
||
2650 | unsigned file) |
||
2651 | { |
||
2652 | const struct tgsi_shader_info *info = bld->bld_base.info; |
||
2653 | struct gallivm_state *gallivm = bld->bld_base.base.gallivm; |
||
2654 | LLVMBuilderRef builder = gallivm->builder; |
||
2655 | LLVMValueRef reg_ptr; |
||
2656 | int index; |
||
2657 | int max_index = info->file_max[file]; |
||
2658 | |||
2659 | /* |
||
2660 | * Some register files, particularly constants, can be very large, |
||
2661 | * and dumping everything could make this unusably slow. |
||
2662 | */ |
||
2663 | max_index = MIN2(max_index, 32); |
||
2664 | |||
2665 | for (index = 0; index <= max_index; index++) { |
||
2666 | LLVMValueRef res; |
||
2667 | unsigned mask; |
||
2668 | int chan; |
||
2669 | |||
2670 | if (index < 8 * sizeof(unsigned) && |
||
2671 | (info->file_mask[file] & (1 << index)) == 0) { |
||
2672 | /* This was not declared.*/ |
||
2673 | continue; |
||
2674 | } |
||
2675 | |||
2676 | if (file == TGSI_FILE_INPUT) { |
||
2677 | mask = info->input_usage_mask[index]; |
||
2678 | } else { |
||
2679 | mask = TGSI_WRITEMASK_XYZW; |
||
2680 | } |
||
2681 | |||
2682 | for (chan = 0; chan < 4; chan++) { |
||
2683 | if ((mask & (1 << chan)) == 0) { |
||
2684 | /* This channel is not used.*/ |
||
2685 | continue; |
||
2686 | } |
||
2687 | |||
2688 | if (file == TGSI_FILE_CONSTANT) { |
||
2689 | struct tgsi_full_src_register reg; |
||
2690 | memset(®, 0, sizeof reg); |
||
2691 | reg.Register.File = file; |
||
2692 | reg.Register.Index = index; |
||
2693 | reg.Register.SwizzleX = 0; |
||
2694 | reg.Register.SwizzleY = 1; |
||
2695 | reg.Register.SwizzleZ = 2; |
||
2696 | reg.Register.SwizzleW = 3; |
||
2697 | |||
2698 | res = bld->bld_base.emit_fetch_funcs[file](&bld->bld_base, ®, TGSI_TYPE_FLOAT, chan); |
||
2699 | if (!res) { |
||
2700 | continue; |
||
2701 | } |
||
2702 | } else if (file == TGSI_FILE_INPUT) { |
||
2703 | res = bld->inputs[index][chan]; |
||
2704 | if (!res) { |
||
2705 | continue; |
||
2706 | } |
||
2707 | } else if (file == TGSI_FILE_TEMPORARY) { |
||
2708 | reg_ptr = lp_get_temp_ptr_soa(bld, index, chan); |
||
2709 | assert(reg_ptr); |
||
2710 | res = LLVMBuildLoad(builder, reg_ptr, ""); |
||
2711 | } else if (file == TGSI_FILE_OUTPUT) { |
||
2712 | reg_ptr = lp_get_output_ptr(bld, index, chan); |
||
2713 | assert(reg_ptr); |
||
2714 | res = LLVMBuildLoad(builder, reg_ptr, ""); |
||
2715 | } else { |
||
2716 | assert(0); |
||
2717 | continue; |
||
2718 | } |
||
2719 | |||
2720 | emit_dump_reg(gallivm, file, index, chan, res); |
||
2721 | } |
||
2722 | } |
||
2723 | } |
||
2724 | |||
2725 | |||
2726 | |||
2727 | void |
||
2728 | lp_emit_declaration_soa( |
||
2729 | struct lp_build_tgsi_context *bld_base, |
||
2730 | const struct tgsi_full_declaration *decl) |
||
2731 | { |
||
2732 | struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); |
||
2733 | struct gallivm_state *gallivm = bld->bld_base.base.gallivm; |
||
2734 | LLVMTypeRef vec_type = bld->bld_base.base.vec_type; |
||
2735 | const unsigned first = decl->Range.First; |
||
2736 | const unsigned last = decl->Range.Last; |
||
2737 | unsigned idx, i; |
||
2738 | |||
2739 | assert(last <= bld->bld_base.info->file_max[decl->Declaration.File]); |
||
2740 | |||
2741 | switch (decl->Declaration.File) { |
||
2742 | case TGSI_FILE_TEMPORARY: |
||
2743 | if (!(bld->indirect_files & (1 << TGSI_FILE_TEMPORARY))) { |
||
2744 | assert(last < LP_MAX_INLINED_TEMPS); |
||
2745 | for (idx = first; idx <= last; ++idx) { |
||
2746 | for (i = 0; i < TGSI_NUM_CHANNELS; i++) |
||
2747 | bld->temps[idx][i] = lp_build_alloca(gallivm, vec_type, "temp"); |
||
2748 | } |
||
2749 | } |
||
2750 | break; |
||
2751 | |||
2752 | case TGSI_FILE_OUTPUT: |
||
2753 | if (!(bld->indirect_files & (1 << TGSI_FILE_OUTPUT))) { |
||
2754 | for (idx = first; idx <= last; ++idx) { |
||
2755 | for (i = 0; i < TGSI_NUM_CHANNELS; i++) |
||
2756 | bld->outputs[idx][i] = lp_build_alloca(gallivm, |
||
2757 | vec_type, "output"); |
||
2758 | } |
||
2759 | } |
||
2760 | break; |
||
2761 | |||
2762 | case TGSI_FILE_ADDRESS: |
||
2763 | /* ADDR registers are only allocated with an integer LLVM IR type, |
||
2764 | * as they are guaranteed to always have integers. |
||
2765 | * XXX: Not sure if this exception is worthwhile (or the whole idea of |
||
2766 | * an ADDR register for that matter). |
||
2767 | */ |
||
2768 | assert(last < LP_MAX_TGSI_ADDRS); |
||
2769 | for (idx = first; idx <= last; ++idx) { |
||
2770 | assert(idx < LP_MAX_TGSI_ADDRS); |
||
2771 | for (i = 0; i < TGSI_NUM_CHANNELS; i++) |
||
2772 | bld->addr[idx][i] = lp_build_alloca(gallivm, bld_base->base.int_vec_type, "addr"); |
||
2773 | } |
||
2774 | break; |
||
2775 | |||
2776 | case TGSI_FILE_PREDICATE: |
||
2777 | assert(last < LP_MAX_TGSI_PREDS); |
||
2778 | for (idx = first; idx <= last; ++idx) { |
||
2779 | for (i = 0; i < TGSI_NUM_CHANNELS; i++) |
||
2780 | bld->preds[idx][i] = lp_build_alloca(gallivm, vec_type, |
||
2781 | "predicate"); |
||
2782 | } |
||
2783 | break; |
||
2784 | |||
2785 | case TGSI_FILE_SAMPLER_VIEW: |
||
2786 | /* |
||
2787 | * The target stored here MUST match whatever there actually |
||
2788 | * is in the set sampler views (what about return type?). |
||
2789 | */ |
||
2790 | assert(last < PIPE_MAX_SHADER_SAMPLER_VIEWS); |
||
2791 | for (idx = first; idx <= last; ++idx) { |
||
2792 | bld->sv[idx] = decl->SamplerView; |
||
2793 | } |
||
2794 | break; |
||
2795 | |||
2796 | case TGSI_FILE_CONSTANT: |
||
2797 | { |
||
2798 | /* |
||
2799 | * We could trivially fetch the per-buffer pointer when fetching the |
||
2800 | * constant, relying on llvm to figure out it's always the same pointer |
||
2801 | * anyway. However, doing so results in a huge (more than factor of 10) |
||
2802 | * slowdown in llvm compilation times for some (but not all) shaders |
||
2803 | * (more specifically, the IR optimization spends way more time in |
||
2804 | * DominatorTree::dominates). At least with llvm versions 3.1, 3.3. |
||
2805 | */ |
||
2806 | unsigned idx2D = decl->Dim.Index2D; |
||
2807 | LLVMValueRef index2D = lp_build_const_int32(gallivm, idx2D); |
||
2808 | assert(idx2D < LP_MAX_TGSI_CONST_BUFFERS); |
||
2809 | bld->consts[idx2D] = |
||
2810 | lp_build_array_get(gallivm, bld->consts_ptr, index2D); |
||
2811 | bld->consts_sizes[idx2D] = |
||
2812 | lp_build_array_get(gallivm, bld->const_sizes_ptr, index2D); |
||
2813 | } |
||
2814 | break; |
||
2815 | |||
2816 | default: |
||
2817 | /* don't need to declare other vars */ |
||
2818 | break; |
||
2819 | } |
||
2820 | } |
||
2821 | |||
2822 | |||
2823 | void lp_emit_immediate_soa( |
||
2824 | struct lp_build_tgsi_context *bld_base, |
||
2825 | const struct tgsi_full_immediate *imm) |
||
2826 | { |
||
2827 | struct lp_build_tgsi_soa_context *bld = lp_soa_context(bld_base); |
||
2828 | struct gallivm_state * gallivm = bld_base->base.gallivm; |
||
2829 | LLVMValueRef imms[4]; |
||
2830 | unsigned i; |
||
2831 | const uint size = imm->Immediate.NrTokens - 1; |
||
2832 | assert(size <= 4); |
||
2833 | switch (imm->Immediate.DataType) { |
||
2834 | case TGSI_IMM_FLOAT32: |
||
2835 | for( i = 0; i < size; ++i ) |
||
2836 | imms[i] = |
||
2837 | lp_build_const_vec(gallivm, bld_base->base.type, imm->u[i].Float); |
||
2838 | |||
2839 | break; |
||
2840 | case TGSI_IMM_UINT32: |
||
2841 | for( i = 0; i < size; ++i ) { |
||
2842 | LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->uint_bld.type, imm->u[i].Uint); |
||
2843 | imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type); |
||
2844 | } |
||
2845 | |||
2846 | break; |
||
2847 | case TGSI_IMM_INT32: |
||
2848 | for( i = 0; i < size; ++i ) { |
||
2849 | LLVMValueRef tmp = lp_build_const_vec(gallivm, bld_base->int_bld.type, imm->u[i].Int); |
||
2850 | imms[i] = LLVMConstBitCast(tmp, bld_base->base.vec_type); |
||
2851 | } |
||
2852 | |||
2853 | break; |
||
2854 | } |
||
2855 | for( i = size; i < 4; ++i ) |
||
2856 | imms[i] = bld_base->base.undef; |
||
2857 | |||
2858 | if (bld->use_immediates_array) { |
||
2859 | unsigned index = bld->num_immediates; |
||
2860 | struct gallivm_state *gallivm = bld->bld_base.base.gallivm; |
||
2861 | LLVMBuilderRef builder = gallivm->builder; |
||
2862 | |||
2863 | assert(bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)); |
||
2864 | for (i = 0; i < 4; ++i ) { |
||
2865 | LLVMValueRef lindex = lp_build_const_int32( |
||
2866 | bld->bld_base.base.gallivm, index * 4 + i); |
||
2867 | LLVMValueRef imm_ptr = LLVMBuildGEP(builder, |
||
2868 | bld->imms_array, &lindex, 1, ""); |
||
2869 | LLVMBuildStore(builder, imms[i], imm_ptr); |
||
2870 | } |
||
2871 | } else { |
||
2872 | /* simply copy the immediate values into the next immediates[] slot */ |
||
2873 | unsigned i; |
||
2874 | const uint size = imm->Immediate.NrTokens - 1; |
||
2875 | assert(size <= 4); |
||
2876 | assert(bld->num_immediates < LP_MAX_INLINED_IMMEDIATES); |
||
2877 | |||
2878 | for(i = 0; i < 4; ++i ) |
||
2879 | bld->immediates[bld->num_immediates][i] = imms[i]; |
||
2880 | |||
2881 | if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) { |
||
2882 | unsigned index = bld->num_immediates; |
||
2883 | struct gallivm_state *gallivm = bld->bld_base.base.gallivm; |
||
2884 | LLVMBuilderRef builder = gallivm->builder; |
||
2885 | for (i = 0; i < 4; ++i ) { |
||
2886 | LLVMValueRef lindex = lp_build_const_int32( |
||
2887 | bld->bld_base.base.gallivm, index * 4 + i); |
||
2888 | LLVMValueRef imm_ptr = LLVMBuildGEP(builder, |
||
2889 | bld->imms_array, &lindex, 1, ""); |
||
2890 | LLVMBuildStore(builder, |
||
2891 | bld->immediates[index][i], |
||
2892 | imm_ptr); |
||
2893 | } |
||
2894 | } |
||
2895 | } |
||
2896 | |||
2897 | bld->num_immediates++; |
||
2898 | } |
||
2899 | |||
2900 | static void |
||
2901 | ddx_emit( |
||
2902 | const struct lp_build_tgsi_action * action, |
||
2903 | struct lp_build_tgsi_context * bld_base, |
||
2904 | struct lp_build_emit_data * emit_data) |
||
2905 | { |
||
2906 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
2907 | |||
2908 | emit_fetch_deriv(bld, emit_data->args[0], NULL, |
||
2909 | &emit_data->output[emit_data->chan], NULL); |
||
2910 | } |
||
2911 | |||
2912 | static void |
||
2913 | ddy_emit( |
||
2914 | const struct lp_build_tgsi_action * action, |
||
2915 | struct lp_build_tgsi_context * bld_base, |
||
2916 | struct lp_build_emit_data * emit_data) |
||
2917 | { |
||
2918 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
2919 | |||
2920 | emit_fetch_deriv(bld, emit_data->args[0], NULL, NULL, |
||
2921 | &emit_data->output[emit_data->chan]); |
||
2922 | } |
||
2923 | |||
2924 | static void |
||
2925 | kill_emit( |
||
2926 | const struct lp_build_tgsi_action * action, |
||
2927 | struct lp_build_tgsi_context * bld_base, |
||
2928 | struct lp_build_emit_data * emit_data) |
||
2929 | { |
||
2930 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
2931 | |||
2932 | emit_kill(bld, bld_base->pc - 1); |
||
2933 | } |
||
2934 | |||
2935 | static void |
||
2936 | kill_if_emit( |
||
2937 | const struct lp_build_tgsi_action * action, |
||
2938 | struct lp_build_tgsi_context * bld_base, |
||
2939 | struct lp_build_emit_data * emit_data) |
||
2940 | { |
||
2941 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
2942 | |||
2943 | emit_kill_if(bld, emit_data->inst, bld_base->pc - 1); |
||
2944 | } |
||
2945 | |||
2946 | static void |
||
2947 | tex_emit( |
||
2948 | const struct lp_build_tgsi_action * action, |
||
2949 | struct lp_build_tgsi_context * bld_base, |
||
2950 | struct lp_build_emit_data * emit_data) |
||
2951 | { |
||
2952 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
2953 | |||
2954 | emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, |
||
2955 | emit_data->output, 1, LP_SAMPLER_OP_TEXTURE); |
||
2956 | } |
||
2957 | |||
2958 | static void |
||
2959 | tex2_emit( |
||
2960 | const struct lp_build_tgsi_action * action, |
||
2961 | struct lp_build_tgsi_context * bld_base, |
||
2962 | struct lp_build_emit_data * emit_data) |
||
2963 | { |
||
2964 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
2965 | |||
2966 | emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, |
||
2967 | emit_data->output, 2, LP_SAMPLER_OP_TEXTURE); |
||
2968 | } |
||
2969 | |||
2970 | static void |
||
2971 | txb_emit( |
||
2972 | const struct lp_build_tgsi_action * action, |
||
2973 | struct lp_build_tgsi_context * bld_base, |
||
2974 | struct lp_build_emit_data * emit_data) |
||
2975 | { |
||
2976 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
2977 | |||
2978 | emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, |
||
2979 | emit_data->output, 1, LP_SAMPLER_OP_TEXTURE); |
||
2980 | } |
||
2981 | |||
2982 | static void |
||
2983 | txb2_emit( |
||
2984 | const struct lp_build_tgsi_action * action, |
||
2985 | struct lp_build_tgsi_context * bld_base, |
||
2986 | struct lp_build_emit_data * emit_data) |
||
2987 | { |
||
2988 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
2989 | |||
2990 | emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, |
||
2991 | emit_data->output, 2, LP_SAMPLER_OP_TEXTURE); |
||
2992 | } |
||
2993 | |||
2994 | static void |
||
2995 | txd_emit( |
||
2996 | const struct lp_build_tgsi_action * action, |
||
2997 | struct lp_build_tgsi_context * bld_base, |
||
2998 | struct lp_build_emit_data * emit_data) |
||
2999 | { |
||
3000 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3001 | |||
3002 | emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, |
||
3003 | emit_data->output, 3, LP_SAMPLER_OP_TEXTURE); |
||
3004 | } |
||
3005 | |||
3006 | static void |
||
3007 | txl_emit( |
||
3008 | const struct lp_build_tgsi_action * action, |
||
3009 | struct lp_build_tgsi_context * bld_base, |
||
3010 | struct lp_build_emit_data * emit_data) |
||
3011 | { |
||
3012 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3013 | |||
3014 | emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, |
||
3015 | emit_data->output, 1, LP_SAMPLER_OP_TEXTURE); |
||
3016 | } |
||
3017 | |||
3018 | static void |
||
3019 | txl2_emit( |
||
3020 | const struct lp_build_tgsi_action * action, |
||
3021 | struct lp_build_tgsi_context * bld_base, |
||
3022 | struct lp_build_emit_data * emit_data) |
||
3023 | { |
||
3024 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3025 | |||
3026 | emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, |
||
3027 | emit_data->output, 2, LP_SAMPLER_OP_TEXTURE); |
||
3028 | } |
||
3029 | |||
3030 | static void |
||
3031 | txp_emit( |
||
3032 | const struct lp_build_tgsi_action * action, |
||
3033 | struct lp_build_tgsi_context * bld_base, |
||
3034 | struct lp_build_emit_data * emit_data) |
||
3035 | { |
||
3036 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3037 | |||
3038 | emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_PROJECTED, |
||
3039 | emit_data->output, 1, LP_SAMPLER_OP_TEXTURE); |
||
3040 | } |
||
3041 | |||
3042 | static void |
||
3043 | tg4_emit( |
||
3044 | const struct lp_build_tgsi_action * action, |
||
3045 | struct lp_build_tgsi_context * bld_base, |
||
3046 | struct lp_build_emit_data * emit_data) |
||
3047 | { |
||
3048 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3049 | |||
3050 | emit_tex(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, |
||
3051 | emit_data->output, 2, LP_SAMPLER_OP_GATHER); |
||
3052 | } |
||
3053 | |||
3054 | static void |
||
3055 | txq_emit( |
||
3056 | const struct lp_build_tgsi_action * action, |
||
3057 | struct lp_build_tgsi_context * bld_base, |
||
3058 | struct lp_build_emit_data * emit_data) |
||
3059 | { |
||
3060 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3061 | |||
3062 | emit_size_query(bld, emit_data->inst, emit_data->output, FALSE); |
||
3063 | } |
||
3064 | |||
3065 | static void |
||
3066 | txf_emit( |
||
3067 | const struct lp_build_tgsi_action * action, |
||
3068 | struct lp_build_tgsi_context * bld_base, |
||
3069 | struct lp_build_emit_data * emit_data) |
||
3070 | { |
||
3071 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3072 | |||
3073 | emit_fetch_texels(bld, emit_data->inst, emit_data->output, FALSE); |
||
3074 | } |
||
3075 | |||
3076 | static void |
||
3077 | sample_i_emit( |
||
3078 | const struct lp_build_tgsi_action * action, |
||
3079 | struct lp_build_tgsi_context * bld_base, |
||
3080 | struct lp_build_emit_data * emit_data) |
||
3081 | { |
||
3082 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3083 | |||
3084 | emit_fetch_texels(bld, emit_data->inst, emit_data->output, TRUE); |
||
3085 | } |
||
3086 | |||
3087 | static void |
||
3088 | sample_emit( |
||
3089 | const struct lp_build_tgsi_action * action, |
||
3090 | struct lp_build_tgsi_context * bld_base, |
||
3091 | struct lp_build_emit_data * emit_data) |
||
3092 | { |
||
3093 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3094 | |||
3095 | emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, |
||
3096 | FALSE, emit_data->output); |
||
3097 | } |
||
3098 | |||
3099 | static void |
||
3100 | sample_b_emit( |
||
3101 | const struct lp_build_tgsi_action * action, |
||
3102 | struct lp_build_tgsi_context * bld_base, |
||
3103 | struct lp_build_emit_data * emit_data) |
||
3104 | { |
||
3105 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3106 | |||
3107 | emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_BIAS, |
||
3108 | FALSE, emit_data->output); |
||
3109 | } |
||
3110 | |||
3111 | static void |
||
3112 | sample_c_emit( |
||
3113 | const struct lp_build_tgsi_action * action, |
||
3114 | struct lp_build_tgsi_context * bld_base, |
||
3115 | struct lp_build_emit_data * emit_data) |
||
3116 | { |
||
3117 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3118 | |||
3119 | emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_NONE, |
||
3120 | TRUE, emit_data->output); |
||
3121 | } |
||
3122 | |||
3123 | static void |
||
3124 | sample_c_lz_emit( |
||
3125 | const struct lp_build_tgsi_action * action, |
||
3126 | struct lp_build_tgsi_context * bld_base, |
||
3127 | struct lp_build_emit_data * emit_data) |
||
3128 | { |
||
3129 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3130 | |||
3131 | emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_LOD_ZERO, |
||
3132 | TRUE, emit_data->output); |
||
3133 | } |
||
3134 | |||
3135 | static void |
||
3136 | sample_d_emit( |
||
3137 | const struct lp_build_tgsi_action * action, |
||
3138 | struct lp_build_tgsi_context * bld_base, |
||
3139 | struct lp_build_emit_data * emit_data) |
||
3140 | { |
||
3141 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3142 | |||
3143 | emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_DERIV, |
||
3144 | FALSE, emit_data->output); |
||
3145 | } |
||
3146 | |||
3147 | static void |
||
3148 | sample_l_emit( |
||
3149 | const struct lp_build_tgsi_action * action, |
||
3150 | struct lp_build_tgsi_context * bld_base, |
||
3151 | struct lp_build_emit_data * emit_data) |
||
3152 | { |
||
3153 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3154 | |||
3155 | emit_sample(bld, emit_data->inst, LP_BLD_TEX_MODIFIER_EXPLICIT_LOD, |
||
3156 | FALSE, emit_data->output); |
||
3157 | } |
||
3158 | |||
3159 | static void |
||
3160 | sviewinfo_emit( |
||
3161 | const struct lp_build_tgsi_action * action, |
||
3162 | struct lp_build_tgsi_context * bld_base, |
||
3163 | struct lp_build_emit_data * emit_data) |
||
3164 | { |
||
3165 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3166 | |||
3167 | emit_size_query(bld, emit_data->inst, emit_data->output, TRUE); |
||
3168 | } |
||
3169 | |||
3170 | static LLVMValueRef |
||
3171 | mask_vec(struct lp_build_tgsi_context *bld_base) |
||
3172 | { |
||
3173 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3174 | LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; |
||
3175 | struct lp_exec_mask *exec_mask = &bld->exec_mask; |
||
3176 | |||
3177 | if (!exec_mask->has_mask) { |
||
3178 | return lp_build_mask_value(bld->mask); |
||
3179 | } |
||
3180 | return LLVMBuildAnd(builder, lp_build_mask_value(bld->mask), |
||
3181 | exec_mask->exec_mask, ""); |
||
3182 | } |
||
3183 | |||
3184 | static void |
||
3185 | increment_vec_ptr_by_mask(struct lp_build_tgsi_context * bld_base, |
||
3186 | LLVMValueRef ptr, |
||
3187 | LLVMValueRef mask) |
||
3188 | { |
||
3189 | LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
||
3190 | LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, ""); |
||
3191 | |||
3192 | current_vec = LLVMBuildSub(builder, current_vec, mask, ""); |
||
3193 | |||
3194 | LLVMBuildStore(builder, current_vec, ptr); |
||
3195 | } |
||
3196 | |||
3197 | static void |
||
3198 | clear_uint_vec_ptr_from_mask(struct lp_build_tgsi_context * bld_base, |
||
3199 | LLVMValueRef ptr, |
||
3200 | LLVMValueRef mask) |
||
3201 | { |
||
3202 | LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
||
3203 | LLVMValueRef current_vec = LLVMBuildLoad(builder, ptr, ""); |
||
3204 | |||
3205 | current_vec = lp_build_select(&bld_base->uint_bld, |
||
3206 | mask, |
||
3207 | bld_base->uint_bld.zero, |
||
3208 | current_vec); |
||
3209 | |||
3210 | LLVMBuildStore(builder, current_vec, ptr); |
||
3211 | } |
||
3212 | |||
3213 | static LLVMValueRef |
||
3214 | clamp_mask_to_max_output_vertices(struct lp_build_tgsi_soa_context * bld, |
||
3215 | LLVMValueRef current_mask_vec, |
||
3216 | LLVMValueRef total_emitted_vertices_vec) |
||
3217 | { |
||
3218 | LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; |
||
3219 | struct lp_build_context *int_bld = &bld->bld_base.int_bld; |
||
3220 | LLVMValueRef max_mask = lp_build_cmp(int_bld, PIPE_FUNC_LESS, |
||
3221 | total_emitted_vertices_vec, |
||
3222 | bld->max_output_vertices_vec); |
||
3223 | |||
3224 | return LLVMBuildAnd(builder, current_mask_vec, max_mask, ""); |
||
3225 | } |
||
3226 | |||
3227 | static void |
||
3228 | emit_vertex( |
||
3229 | const struct lp_build_tgsi_action * action, |
||
3230 | struct lp_build_tgsi_context * bld_base, |
||
3231 | struct lp_build_emit_data * emit_data) |
||
3232 | { |
||
3233 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3234 | LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; |
||
3235 | |||
3236 | if (bld->gs_iface->emit_vertex) { |
||
3237 | LLVMValueRef mask = mask_vec(bld_base); |
||
3238 | LLVMValueRef total_emitted_vertices_vec = |
||
3239 | LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, ""); |
||
3240 | mask = clamp_mask_to_max_output_vertices(bld, mask, |
||
3241 | total_emitted_vertices_vec); |
||
3242 | gather_outputs(bld); |
||
3243 | bld->gs_iface->emit_vertex(bld->gs_iface, &bld->bld_base, |
||
3244 | bld->outputs, |
||
3245 | total_emitted_vertices_vec); |
||
3246 | increment_vec_ptr_by_mask(bld_base, bld->emitted_vertices_vec_ptr, |
||
3247 | mask); |
||
3248 | increment_vec_ptr_by_mask(bld_base, bld->total_emitted_vertices_vec_ptr, |
||
3249 | mask); |
||
3250 | #if DUMP_GS_EMITS |
||
3251 | lp_build_print_value(bld->bld_base.base.gallivm, |
||
3252 | " +++ emit vertex masked ones = ", |
||
3253 | mask); |
||
3254 | lp_build_print_value(bld->bld_base.base.gallivm, |
||
3255 | " +++ emit vertex emitted = ", |
||
3256 | total_emitted_vertices_vec); |
||
3257 | #endif |
||
3258 | } |
||
3259 | } |
||
3260 | |||
3261 | |||
3262 | static void |
||
3263 | end_primitive_masked(struct lp_build_tgsi_context * bld_base, |
||
3264 | LLVMValueRef mask) |
||
3265 | { |
||
3266 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3267 | LLVMBuilderRef builder = bld->bld_base.base.gallivm->builder; |
||
3268 | |||
3269 | if (bld->gs_iface->end_primitive) { |
||
3270 | struct lp_build_context *uint_bld = &bld_base->uint_bld; |
||
3271 | LLVMValueRef emitted_vertices_vec = |
||
3272 | LLVMBuildLoad(builder, bld->emitted_vertices_vec_ptr, ""); |
||
3273 | LLVMValueRef emitted_prims_vec = |
||
3274 | LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, ""); |
||
3275 | |||
3276 | LLVMValueRef emitted_mask = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, |
||
3277 | emitted_vertices_vec, |
||
3278 | uint_bld->zero); |
||
3279 | /* We need to combine the current execution mask with the mask |
||
3280 | telling us which, if any, execution slots actually have |
||
3281 | unemitted primitives, this way we make sure that end_primitives |
||
3282 | executes only on the paths that have unflushed vertices */ |
||
3283 | mask = LLVMBuildAnd(builder, mask, emitted_mask, ""); |
||
3284 | |||
3285 | bld->gs_iface->end_primitive(bld->gs_iface, &bld->bld_base, |
||
3286 | emitted_vertices_vec, |
||
3287 | emitted_prims_vec); |
||
3288 | |||
3289 | #if DUMP_GS_EMITS |
||
3290 | lp_build_print_value(bld->bld_base.base.gallivm, |
||
3291 | " +++ end prim masked ones = ", |
||
3292 | mask); |
||
3293 | lp_build_print_value(bld->bld_base.base.gallivm, |
||
3294 | " +++ end prim emitted verts1 = ", |
||
3295 | emitted_vertices_vec); |
||
3296 | lp_build_print_value(bld->bld_base.base.gallivm, |
||
3297 | " +++ end prim emitted prims1 = ", |
||
3298 | LLVMBuildLoad(builder, |
||
3299 | bld->emitted_prims_vec_ptr, "")); |
||
3300 | #endif |
||
3301 | increment_vec_ptr_by_mask(bld_base, bld->emitted_prims_vec_ptr, |
||
3302 | mask); |
||
3303 | clear_uint_vec_ptr_from_mask(bld_base, bld->emitted_vertices_vec_ptr, |
||
3304 | mask); |
||
3305 | #if DUMP_GS_EMITS |
||
3306 | lp_build_print_value(bld->bld_base.base.gallivm, |
||
3307 | " +++ end prim emitted verts2 = ", |
||
3308 | LLVMBuildLoad(builder, |
||
3309 | bld->emitted_vertices_vec_ptr, "")); |
||
3310 | #endif |
||
3311 | } |
||
3312 | |||
3313 | } |
||
3314 | |||
3315 | static void |
||
3316 | end_primitive( |
||
3317 | const struct lp_build_tgsi_action * action, |
||
3318 | struct lp_build_tgsi_context * bld_base, |
||
3319 | struct lp_build_emit_data * emit_data) |
||
3320 | { |
||
3321 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3322 | |||
3323 | if (bld->gs_iface->end_primitive) { |
||
3324 | LLVMValueRef mask = mask_vec(bld_base); |
||
3325 | end_primitive_masked(bld_base, mask); |
||
3326 | } |
||
3327 | } |
||
3328 | |||
3329 | static void |
||
3330 | cal_emit( |
||
3331 | const struct lp_build_tgsi_action * action, |
||
3332 | struct lp_build_tgsi_context * bld_base, |
||
3333 | struct lp_build_emit_data * emit_data) |
||
3334 | { |
||
3335 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3336 | |||
3337 | lp_exec_mask_call(&bld->exec_mask, emit_data->inst->Label.Label, |
||
3338 | &bld_base->pc); |
||
3339 | } |
||
3340 | |||
3341 | static void |
||
3342 | ret_emit( |
||
3343 | const struct lp_build_tgsi_action * action, |
||
3344 | struct lp_build_tgsi_context * bld_base, |
||
3345 | struct lp_build_emit_data * emit_data) |
||
3346 | { |
||
3347 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3348 | |||
3349 | lp_exec_mask_ret(&bld->exec_mask, &bld_base->pc); |
||
3350 | } |
||
3351 | |||
3352 | static void |
||
3353 | brk_emit( |
||
3354 | const struct lp_build_tgsi_action * action, |
||
3355 | struct lp_build_tgsi_context * bld_base, |
||
3356 | struct lp_build_emit_data * emit_data) |
||
3357 | { |
||
3358 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3359 | |||
3360 | lp_exec_break(&bld->exec_mask, bld_base); |
||
3361 | } |
||
3362 | |||
3363 | static void |
||
3364 | breakc_emit( |
||
3365 | const struct lp_build_tgsi_action * action, |
||
3366 | struct lp_build_tgsi_context * bld_base, |
||
3367 | struct lp_build_emit_data * emit_data) |
||
3368 | { |
||
3369 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3370 | LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
||
3371 | struct lp_build_context *uint_bld = &bld_base->uint_bld; |
||
3372 | LLVMValueRef unsigned_cond = |
||
3373 | LLVMBuildBitCast(builder, emit_data->args[0], uint_bld->vec_type, ""); |
||
3374 | LLVMValueRef cond = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, |
||
3375 | unsigned_cond, |
||
3376 | uint_bld->zero); |
||
3377 | |||
3378 | lp_exec_break_condition(&bld->exec_mask, cond); |
||
3379 | } |
||
3380 | |||
3381 | static void |
||
3382 | if_emit( |
||
3383 | const struct lp_build_tgsi_action * action, |
||
3384 | struct lp_build_tgsi_context * bld_base, |
||
3385 | struct lp_build_emit_data * emit_data) |
||
3386 | { |
||
3387 | LLVMValueRef tmp; |
||
3388 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3389 | |||
3390 | tmp = lp_build_cmp(&bld_base->base, PIPE_FUNC_NOTEQUAL, |
||
3391 | emit_data->args[0], bld->bld_base.base.zero); |
||
3392 | lp_exec_mask_cond_push(&bld->exec_mask, tmp); |
||
3393 | } |
||
3394 | |||
3395 | static void |
||
3396 | uif_emit( |
||
3397 | const struct lp_build_tgsi_action * action, |
||
3398 | struct lp_build_tgsi_context * bld_base, |
||
3399 | struct lp_build_emit_data * emit_data) |
||
3400 | { |
||
3401 | LLVMValueRef tmp; |
||
3402 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3403 | struct lp_build_context *uint_bld = &bld_base->uint_bld; |
||
3404 | |||
3405 | tmp = lp_build_cmp(uint_bld, PIPE_FUNC_NOTEQUAL, |
||
3406 | emit_data->args[0], uint_bld->zero); |
||
3407 | lp_exec_mask_cond_push(&bld->exec_mask, tmp); |
||
3408 | } |
||
3409 | |||
3410 | static void |
||
3411 | case_emit( |
||
3412 | const struct lp_build_tgsi_action * action, |
||
3413 | struct lp_build_tgsi_context * bld_base, |
||
3414 | struct lp_build_emit_data * emit_data) |
||
3415 | { |
||
3416 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3417 | |||
3418 | lp_exec_case(&bld->exec_mask, emit_data->args[0]); |
||
3419 | } |
||
3420 | |||
3421 | static void |
||
3422 | default_emit( |
||
3423 | const struct lp_build_tgsi_action * action, |
||
3424 | struct lp_build_tgsi_context * bld_base, |
||
3425 | struct lp_build_emit_data * emit_data) |
||
3426 | { |
||
3427 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3428 | |||
3429 | lp_exec_default(&bld->exec_mask, bld_base); |
||
3430 | } |
||
3431 | |||
3432 | static void |
||
3433 | switch_emit( |
||
3434 | const struct lp_build_tgsi_action * action, |
||
3435 | struct lp_build_tgsi_context * bld_base, |
||
3436 | struct lp_build_emit_data * emit_data) |
||
3437 | { |
||
3438 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3439 | |||
3440 | lp_exec_switch(&bld->exec_mask, emit_data->args[0]); |
||
3441 | } |
||
3442 | |||
3443 | static void |
||
3444 | endswitch_emit( |
||
3445 | const struct lp_build_tgsi_action * action, |
||
3446 | struct lp_build_tgsi_context * bld_base, |
||
3447 | struct lp_build_emit_data * emit_data) |
||
3448 | { |
||
3449 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3450 | |||
3451 | lp_exec_endswitch(&bld->exec_mask, bld_base); |
||
3452 | } |
||
3453 | |||
3454 | static void |
||
3455 | bgnloop_emit( |
||
3456 | const struct lp_build_tgsi_action * action, |
||
3457 | struct lp_build_tgsi_context * bld_base, |
||
3458 | struct lp_build_emit_data * emit_data) |
||
3459 | { |
||
3460 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3461 | |||
3462 | lp_exec_bgnloop(&bld->exec_mask); |
||
3463 | } |
||
3464 | |||
3465 | static void |
||
3466 | bgnsub_emit( |
||
3467 | const struct lp_build_tgsi_action * action, |
||
3468 | struct lp_build_tgsi_context * bld_base, |
||
3469 | struct lp_build_emit_data * emit_data) |
||
3470 | { |
||
3471 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3472 | |||
3473 | lp_exec_mask_bgnsub(&bld->exec_mask); |
||
3474 | } |
||
3475 | |||
3476 | static void |
||
3477 | else_emit( |
||
3478 | const struct lp_build_tgsi_action * action, |
||
3479 | struct lp_build_tgsi_context * bld_base, |
||
3480 | struct lp_build_emit_data * emit_data) |
||
3481 | { |
||
3482 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3483 | |||
3484 | lp_exec_mask_cond_invert(&bld->exec_mask); |
||
3485 | } |
||
3486 | |||
3487 | static void |
||
3488 | endif_emit( |
||
3489 | const struct lp_build_tgsi_action * action, |
||
3490 | struct lp_build_tgsi_context * bld_base, |
||
3491 | struct lp_build_emit_data * emit_data) |
||
3492 | { |
||
3493 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3494 | |||
3495 | lp_exec_mask_cond_pop(&bld->exec_mask); |
||
3496 | } |
||
3497 | |||
3498 | static void |
||
3499 | endloop_emit( |
||
3500 | const struct lp_build_tgsi_action * action, |
||
3501 | struct lp_build_tgsi_context * bld_base, |
||
3502 | struct lp_build_emit_data * emit_data) |
||
3503 | { |
||
3504 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3505 | |||
3506 | lp_exec_endloop(bld_base->base.gallivm, &bld->exec_mask); |
||
3507 | } |
||
3508 | |||
3509 | static void |
||
3510 | endsub_emit( |
||
3511 | const struct lp_build_tgsi_action * action, |
||
3512 | struct lp_build_tgsi_context * bld_base, |
||
3513 | struct lp_build_emit_data * emit_data) |
||
3514 | { |
||
3515 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3516 | |||
3517 | lp_exec_mask_endsub(&bld->exec_mask, &bld_base->pc); |
||
3518 | } |
||
3519 | |||
3520 | static void |
||
3521 | cont_emit( |
||
3522 | const struct lp_build_tgsi_action * action, |
||
3523 | struct lp_build_tgsi_context * bld_base, |
||
3524 | struct lp_build_emit_data * emit_data) |
||
3525 | { |
||
3526 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3527 | |||
3528 | lp_exec_continue(&bld->exec_mask); |
||
3529 | } |
||
3530 | |||
3531 | static void emit_prologue(struct lp_build_tgsi_context * bld_base) |
||
3532 | { |
||
3533 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3534 | struct gallivm_state * gallivm = bld_base->base.gallivm; |
||
3535 | |||
3536 | if (bld->indirect_files & (1 << TGSI_FILE_TEMPORARY)) { |
||
3537 | LLVMValueRef array_size = |
||
3538 | lp_build_const_int32(gallivm, |
||
3539 | bld_base->info->file_max[TGSI_FILE_TEMPORARY] * 4 + 4); |
||
3540 | bld->temps_array = lp_build_array_alloca(gallivm, |
||
3541 | bld_base->base.vec_type, array_size, |
||
3542 | "temp_array"); |
||
3543 | } |
||
3544 | |||
3545 | if (bld->indirect_files & (1 << TGSI_FILE_OUTPUT)) { |
||
3546 | LLVMValueRef array_size = |
||
3547 | lp_build_const_int32(gallivm, |
||
3548 | bld_base->info->file_max[TGSI_FILE_OUTPUT] * 4 + 4); |
||
3549 | bld->outputs_array = lp_build_array_alloca(gallivm, |
||
3550 | bld_base->base.vec_type, array_size, |
||
3551 | "output_array"); |
||
3552 | } |
||
3553 | |||
3554 | if (bld->indirect_files & (1 << TGSI_FILE_IMMEDIATE)) { |
||
3555 | LLVMValueRef array_size = |
||
3556 | lp_build_const_int32(gallivm, |
||
3557 | bld_base->info->file_max[TGSI_FILE_IMMEDIATE] * 4 + 4); |
||
3558 | bld->imms_array = lp_build_array_alloca(gallivm, |
||
3559 | bld_base->base.vec_type, array_size, |
||
3560 | "imms_array"); |
||
3561 | } |
||
3562 | |||
3563 | /* If we have indirect addressing in inputs we need to copy them into |
||
3564 | * our alloca array to be able to iterate over them */ |
||
3565 | if (bld->indirect_files & (1 << TGSI_FILE_INPUT) && !bld->gs_iface) { |
||
3566 | unsigned index, chan; |
||
3567 | LLVMTypeRef vec_type = bld_base->base.vec_type; |
||
3568 | LLVMValueRef array_size = lp_build_const_int32(gallivm, |
||
3569 | bld_base->info->file_max[TGSI_FILE_INPUT]*4 + 4); |
||
3570 | bld->inputs_array = lp_build_array_alloca(gallivm, |
||
3571 | vec_type, array_size, |
||
3572 | "input_array"); |
||
3573 | |||
3574 | assert(bld_base->info->num_inputs |
||
3575 | <= bld_base->info->file_max[TGSI_FILE_INPUT] + 1); |
||
3576 | |||
3577 | for (index = 0; index < bld_base->info->num_inputs; ++index) { |
||
3578 | for (chan = 0; chan < TGSI_NUM_CHANNELS; ++chan) { |
||
3579 | LLVMValueRef lindex = |
||
3580 | lp_build_const_int32(gallivm, index * 4 + chan); |
||
3581 | LLVMValueRef input_ptr = |
||
3582 | LLVMBuildGEP(gallivm->builder, bld->inputs_array, |
||
3583 | &lindex, 1, ""); |
||
3584 | LLVMValueRef value = bld->inputs[index][chan]; |
||
3585 | if (value) |
||
3586 | LLVMBuildStore(gallivm->builder, value, input_ptr); |
||
3587 | } |
||
3588 | } |
||
3589 | } |
||
3590 | |||
3591 | if (bld->gs_iface) { |
||
3592 | struct lp_build_context *uint_bld = &bld->bld_base.uint_bld; |
||
3593 | bld->emitted_prims_vec_ptr = |
||
3594 | lp_build_alloca(gallivm, |
||
3595 | uint_bld->vec_type, |
||
3596 | "emitted_prims_ptr"); |
||
3597 | bld->emitted_vertices_vec_ptr = |
||
3598 | lp_build_alloca(gallivm, |
||
3599 | uint_bld->vec_type, |
||
3600 | "emitted_vertices_ptr"); |
||
3601 | bld->total_emitted_vertices_vec_ptr = |
||
3602 | lp_build_alloca(gallivm, |
||
3603 | uint_bld->vec_type, |
||
3604 | "total_emitted_vertices_ptr"); |
||
3605 | |||
3606 | LLVMBuildStore(gallivm->builder, uint_bld->zero, |
||
3607 | bld->emitted_prims_vec_ptr); |
||
3608 | LLVMBuildStore(gallivm->builder, uint_bld->zero, |
||
3609 | bld->emitted_vertices_vec_ptr); |
||
3610 | LLVMBuildStore(gallivm->builder, uint_bld->zero, |
||
3611 | bld->total_emitted_vertices_vec_ptr); |
||
3612 | } |
||
3613 | |||
3614 | if (DEBUG_EXECUTION) { |
||
3615 | lp_build_printf(gallivm, "\n"); |
||
3616 | emit_dump_file(bld, TGSI_FILE_CONSTANT); |
||
3617 | if (!bld->gs_iface) |
||
3618 | emit_dump_file(bld, TGSI_FILE_INPUT); |
||
3619 | } |
||
3620 | } |
||
3621 | |||
3622 | static void emit_epilogue(struct lp_build_tgsi_context * bld_base) |
||
3623 | { |
||
3624 | struct lp_build_tgsi_soa_context * bld = lp_soa_context(bld_base); |
||
3625 | LLVMBuilderRef builder = bld_base->base.gallivm->builder; |
||
3626 | |||
3627 | if (DEBUG_EXECUTION) { |
||
3628 | /* for debugging */ |
||
3629 | if (0) { |
||
3630 | emit_dump_file(bld, TGSI_FILE_TEMPORARY); |
||
3631 | } |
||
3632 | emit_dump_file(bld, TGSI_FILE_OUTPUT); |
||
3633 | lp_build_printf(bld_base->base.gallivm, "\n"); |
||
3634 | } |
||
3635 | |||
3636 | /* If we have indirect addressing in outputs we need to copy our alloca array |
||
3637 | * to the outputs slots specified by the caller */ |
||
3638 | if (bld->gs_iface) { |
||
3639 | LLVMValueRef total_emitted_vertices_vec; |
||
3640 | LLVMValueRef emitted_prims_vec; |
||
3641 | /* implicit end_primitives, needed in case there are any unflushed |
||
3642 | vertices in the cache. Note must not call end_primitive here |
||
3643 | since the exec_mask is not valid at this point. */ |
||
3644 | end_primitive_masked(bld_base, lp_build_mask_value(bld->mask)); |
||
3645 | |||
3646 | total_emitted_vertices_vec = |
||
3647 | LLVMBuildLoad(builder, bld->total_emitted_vertices_vec_ptr, ""); |
||
3648 | emitted_prims_vec = |
||
3649 | LLVMBuildLoad(builder, bld->emitted_prims_vec_ptr, ""); |
||
3650 | |||
3651 | bld->gs_iface->gs_epilogue(bld->gs_iface, |
||
3652 | &bld->bld_base, |
||
3653 | total_emitted_vertices_vec, |
||
3654 | emitted_prims_vec); |
||
3655 | } else { |
||
3656 | gather_outputs(bld); |
||
3657 | } |
||
3658 | } |
||
3659 | |||
3660 | void |
||
3661 | lp_build_tgsi_soa(struct gallivm_state *gallivm, |
||
3662 | const struct tgsi_token *tokens, |
||
3663 | struct lp_type type, |
||
3664 | struct lp_build_mask_context *mask, |
||
3665 | LLVMValueRef consts_ptr, |
||
3666 | LLVMValueRef const_sizes_ptr, |
||
3667 | const struct lp_bld_tgsi_system_values *system_values, |
||
3668 | const LLVMValueRef (*inputs)[TGSI_NUM_CHANNELS], |
||
3669 | LLVMValueRef (*outputs)[TGSI_NUM_CHANNELS], |
||
3670 | LLVMValueRef context_ptr, |
||
3671 | struct lp_build_sampler_soa *sampler, |
||
3672 | const struct tgsi_shader_info *info, |
||
3673 | const struct lp_build_tgsi_gs_iface *gs_iface) |
||
3674 | { |
||
3675 | struct lp_build_tgsi_soa_context bld; |
||
3676 | |||
3677 | struct lp_type res_type; |
||
3678 | |||
3679 | assert(type.length <= LP_MAX_VECTOR_LENGTH); |
||
3680 | memset(&res_type, 0, sizeof res_type); |
||
3681 | res_type.width = type.width; |
||
3682 | res_type.length = type.length; |
||
3683 | res_type.sign = 1; |
||
3684 | |||
3685 | /* Setup build context */ |
||
3686 | memset(&bld, 0, sizeof bld); |
||
3687 | lp_build_context_init(&bld.bld_base.base, gallivm, type); |
||
3688 | lp_build_context_init(&bld.bld_base.uint_bld, gallivm, lp_uint_type(type)); |
||
3689 | lp_build_context_init(&bld.bld_base.int_bld, gallivm, lp_int_type(type)); |
||
3690 | lp_build_context_init(&bld.elem_bld, gallivm, lp_elem_type(type)); |
||
3691 | bld.mask = mask; |
||
3692 | bld.inputs = inputs; |
||
3693 | bld.outputs = outputs; |
||
3694 | bld.consts_ptr = consts_ptr; |
||
3695 | bld.const_sizes_ptr = const_sizes_ptr; |
||
3696 | bld.sampler = sampler; |
||
3697 | bld.bld_base.info = info; |
||
3698 | bld.indirect_files = info->indirect_files; |
||
3699 | bld.context_ptr = context_ptr; |
||
3700 | |||
3701 | /* |
||
3702 | * If the number of temporaries is rather large then we just |
||
3703 | * allocate them as an array right from the start and treat |
||
3704 | * like indirect temporaries. |
||
3705 | */ |
||
3706 | if (info->file_max[TGSI_FILE_TEMPORARY] >= LP_MAX_INLINED_TEMPS) { |
||
3707 | bld.indirect_files |= (1 << TGSI_FILE_TEMPORARY); |
||
3708 | } |
||
3709 | /* |
||
3710 | * For performance reason immediates are always backed in a static |
||
3711 | * array, but if their number is too great, we have to use just |
||
3712 | * a dynamically allocated array. |
||
3713 | */ |
||
3714 | bld.use_immediates_array = |
||
3715 | (info->file_max[TGSI_FILE_IMMEDIATE] >= LP_MAX_INLINED_IMMEDIATES); |
||
3716 | if (bld.use_immediates_array) { |
||
3717 | bld.indirect_files |= (1 << TGSI_FILE_IMMEDIATE); |
||
3718 | } |
||
3719 | |||
3720 | |||
3721 | bld.bld_base.soa = TRUE; |
||
3722 | bld.bld_base.emit_debug = emit_debug; |
||
3723 | bld.bld_base.emit_fetch_funcs[TGSI_FILE_CONSTANT] = emit_fetch_constant; |
||
3724 | bld.bld_base.emit_fetch_funcs[TGSI_FILE_IMMEDIATE] = emit_fetch_immediate; |
||
3725 | bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_input; |
||
3726 | bld.bld_base.emit_fetch_funcs[TGSI_FILE_TEMPORARY] = emit_fetch_temporary; |
||
3727 | bld.bld_base.emit_fetch_funcs[TGSI_FILE_SYSTEM_VALUE] = emit_fetch_system_value; |
||
3728 | bld.bld_base.emit_store = emit_store; |
||
3729 | |||
3730 | bld.bld_base.emit_declaration = lp_emit_declaration_soa; |
||
3731 | bld.bld_base.emit_immediate = lp_emit_immediate_soa; |
||
3732 | |||
3733 | bld.bld_base.emit_prologue = emit_prologue; |
||
3734 | bld.bld_base.emit_epilogue = emit_epilogue; |
||
3735 | |||
3736 | /* Set opcode actions */ |
||
3737 | lp_set_default_actions_cpu(&bld.bld_base); |
||
3738 | |||
3739 | bld.bld_base.op_actions[TGSI_OPCODE_BGNLOOP].emit = bgnloop_emit; |
||
3740 | bld.bld_base.op_actions[TGSI_OPCODE_BGNSUB].emit = bgnsub_emit; |
||
3741 | bld.bld_base.op_actions[TGSI_OPCODE_BRK].emit = brk_emit; |
||
3742 | bld.bld_base.op_actions[TGSI_OPCODE_BREAKC].emit = breakc_emit; |
||
3743 | bld.bld_base.op_actions[TGSI_OPCODE_CAL].emit = cal_emit; |
||
3744 | bld.bld_base.op_actions[TGSI_OPCODE_CASE].emit = case_emit; |
||
3745 | bld.bld_base.op_actions[TGSI_OPCODE_CONT].emit = cont_emit; |
||
3746 | bld.bld_base.op_actions[TGSI_OPCODE_DDX].emit = ddx_emit; |
||
3747 | bld.bld_base.op_actions[TGSI_OPCODE_DDY].emit = ddy_emit; |
||
3748 | bld.bld_base.op_actions[TGSI_OPCODE_DEFAULT].emit = default_emit; |
||
3749 | bld.bld_base.op_actions[TGSI_OPCODE_ELSE].emit = else_emit; |
||
3750 | bld.bld_base.op_actions[TGSI_OPCODE_ENDIF].emit = endif_emit; |
||
3751 | bld.bld_base.op_actions[TGSI_OPCODE_ENDLOOP].emit = endloop_emit; |
||
3752 | bld.bld_base.op_actions[TGSI_OPCODE_ENDSUB].emit = endsub_emit; |
||
3753 | bld.bld_base.op_actions[TGSI_OPCODE_ENDSWITCH].emit = endswitch_emit; |
||
3754 | bld.bld_base.op_actions[TGSI_OPCODE_IF].emit = if_emit; |
||
3755 | bld.bld_base.op_actions[TGSI_OPCODE_UIF].emit = uif_emit; |
||
3756 | bld.bld_base.op_actions[TGSI_OPCODE_KILL_IF].emit = kill_if_emit; |
||
3757 | bld.bld_base.op_actions[TGSI_OPCODE_KILL].emit = kill_emit; |
||
3758 | bld.bld_base.op_actions[TGSI_OPCODE_RET].emit = ret_emit; |
||
3759 | bld.bld_base.op_actions[TGSI_OPCODE_SWITCH].emit = switch_emit; |
||
3760 | bld.bld_base.op_actions[TGSI_OPCODE_TEX].emit = tex_emit; |
||
3761 | bld.bld_base.op_actions[TGSI_OPCODE_TXB].emit = txb_emit; |
||
3762 | bld.bld_base.op_actions[TGSI_OPCODE_TXD].emit = txd_emit; |
||
3763 | bld.bld_base.op_actions[TGSI_OPCODE_TXL].emit = txl_emit; |
||
3764 | bld.bld_base.op_actions[TGSI_OPCODE_TXP].emit = txp_emit; |
||
3765 | bld.bld_base.op_actions[TGSI_OPCODE_TXQ].emit = txq_emit; |
||
3766 | bld.bld_base.op_actions[TGSI_OPCODE_TXF].emit = txf_emit; |
||
3767 | bld.bld_base.op_actions[TGSI_OPCODE_TEX2].emit = tex2_emit; |
||
3768 | bld.bld_base.op_actions[TGSI_OPCODE_TXB2].emit = txb2_emit; |
||
3769 | bld.bld_base.op_actions[TGSI_OPCODE_TXL2].emit = txl2_emit; |
||
3770 | bld.bld_base.op_actions[TGSI_OPCODE_TG4].emit = tg4_emit; |
||
3771 | /* DX10 sampling ops */ |
||
3772 | bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE].emit = sample_emit; |
||
3773 | bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_B].emit = sample_b_emit; |
||
3774 | bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C].emit = sample_c_emit; |
||
3775 | bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_C_LZ].emit = sample_c_lz_emit; |
||
3776 | bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_D].emit = sample_d_emit; |
||
3777 | bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_I].emit = sample_i_emit; |
||
3778 | bld.bld_base.op_actions[TGSI_OPCODE_SAMPLE_L].emit = sample_l_emit; |
||
3779 | bld.bld_base.op_actions[TGSI_OPCODE_SVIEWINFO].emit = sviewinfo_emit; |
||
3780 | |||
3781 | if (gs_iface) { |
||
3782 | /* There's no specific value for this because it should always |
||
3783 | * be set, but apps using ext_geometry_shader4 quite often |
||
3784 | * were forgetting so we're using MAX_VERTEX_VARYING from |
||
3785 | * that spec even though we could debug_assert if it's not |
||
3786 | * set, but that's a lot uglier. */ |
||
3787 | uint max_output_vertices; |
||
3788 | |||
3789 | /* inputs are always indirect with gs */ |
||
3790 | bld.indirect_files |= (1 << TGSI_FILE_INPUT); |
||
3791 | bld.gs_iface = gs_iface; |
||
3792 | bld.bld_base.emit_fetch_funcs[TGSI_FILE_INPUT] = emit_fetch_gs_input; |
||
3793 | bld.bld_base.op_actions[TGSI_OPCODE_EMIT].emit = emit_vertex; |
||
3794 | bld.bld_base.op_actions[TGSI_OPCODE_ENDPRIM].emit = end_primitive; |
||
3795 | |||
3796 | max_output_vertices = |
||
3797 | info->properties[TGSI_PROPERTY_GS_MAX_OUTPUT_VERTICES]; |
||
3798 | if (!max_output_vertices) |
||
3799 | max_output_vertices = 32; |
||
3800 | |||
3801 | bld.max_output_vertices_vec = |
||
3802 | lp_build_const_int_vec(gallivm, bld.bld_base.int_bld.type, |
||
3803 | max_output_vertices); |
||
3804 | } |
||
3805 | |||
3806 | lp_exec_mask_init(&bld.exec_mask, &bld.bld_base.int_bld); |
||
3807 | |||
3808 | bld.system_values = *system_values; |
||
3809 | |||
3810 | lp_build_tgsi_llvm(&bld.bld_base, tokens); |
||
3811 | |||
3812 | if (0) { |
||
3813 | LLVMBasicBlockRef block = LLVMGetInsertBlock(gallivm->builder); |
||
3814 | LLVMValueRef function = LLVMGetBasicBlockParent(block); |
||
3815 | debug_printf("11111111111111111111111111111 \n"); |
||
3816 | tgsi_dump(tokens, 0); |
||
3817 | lp_debug_dump_value(function); |
||
3818 | debug_printf("2222222222222222222222222222 \n"); |
||
3819 | } |
||
3820 | |||
3821 | if (0) { |
||
3822 | LLVMModuleRef module = LLVMGetGlobalParent( |
||
3823 | LLVMGetBasicBlockParent(LLVMGetInsertBlock(gallivm->builder))); |
||
3824 | LLVMDumpModule(module); |
||
3825 | |||
3826 | } |
||
3827 | lp_exec_mask_fini(&bld.exec_mask); |
||
3828 | }><>><>><>=>>>=>><>><>><>><>>><>>>=>>><>>>>>=>>=>>>=>>>>=>>>=>><>>=>>><>=>><>>><>>=>>>>>><>>>>><>>><>><>>>><>>>><>><>><>>><>>><>>><>>>><>><>>><>= |