Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5563 | serge | 1 | /************************************************************************** |
2 | * |
||
3 | * Copyright 2009 VMware, Inc. |
||
4 | * All Rights Reserved. |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
7 | * copy of this software and associated documentation files (the |
||
8 | * "Software"), to deal in the Software without restriction, including |
||
9 | * without limitation the rights to use, copy, modify, merge, publish, |
||
10 | * distribute, sub license, and/or sell copies of the Software, and to |
||
11 | * permit persons to whom the Software is furnished to do so, subject to |
||
12 | * the following conditions: |
||
13 | * |
||
14 | * The above copyright notice and this permission notice (including the |
||
15 | * next paragraph) shall be included in all copies or substantial portions |
||
16 | * of the Software. |
||
17 | * |
||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
||
21 | * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR |
||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
25 | * |
||
26 | **************************************************************************/ |
||
27 | |||
28 | /** |
||
29 | * @file |
||
30 | * Helper functions for logical operations. |
||
31 | * |
||
32 | * @author Jose Fonseca |
||
33 | */ |
||
34 | |||
35 | |||
36 | #include "util/u_cpu_detect.h" |
||
37 | #include "util/u_memory.h" |
||
38 | #include "util/u_debug.h" |
||
39 | |||
40 | #include "lp_bld_type.h" |
||
41 | #include "lp_bld_const.h" |
||
42 | #include "lp_bld_init.h" |
||
43 | #include "lp_bld_intr.h" |
||
44 | #include "lp_bld_debug.h" |
||
45 | #include "lp_bld_logic.h" |
||
46 | |||
47 | |||
48 | /* |
||
49 | * XXX |
||
50 | * |
||
51 | * Selection with vector conditional like |
||
52 | * |
||
53 | * select <4 x i1> %C, %A, %B |
||
54 | * |
||
55 | * is valid IR (e.g. llvm/test/Assembler/vector-select.ll), but it is only |
||
56 | * supported on some backends (x86) starting with llvm 3.1. |
||
57 | * |
||
58 | * Expanding the boolean vector to full SIMD register width, as in |
||
59 | * |
||
60 | * sext <4 x i1> %C to <4 x i32> |
||
61 | * |
||
62 | * is valid and supported (e.g., llvm/test/CodeGen/X86/vec_compare.ll), but |
||
63 | * it causes assertion failures in LLVM 2.6. It appears to work correctly on |
||
64 | * LLVM 2.7. |
||
65 | */ |
||
66 | |||
67 | |||
68 | /** |
||
69 | * Build code to compare two values 'a' and 'b' of 'type' using the given func. |
||
70 | * \param func one of PIPE_FUNC_x |
||
71 | * The result values will be 0 for false or ~0 for true. |
||
72 | */ |
||
73 | LLVMValueRef |
||
74 | lp_build_compare(struct gallivm_state *gallivm, |
||
75 | const struct lp_type type, |
||
76 | unsigned func, |
||
77 | LLVMValueRef a, |
||
78 | LLVMValueRef b) |
||
79 | { |
||
80 | LLVMBuilderRef builder = gallivm->builder; |
||
81 | LLVMTypeRef int_vec_type = lp_build_int_vec_type(gallivm, type); |
||
82 | LLVMValueRef zeros = LLVMConstNull(int_vec_type); |
||
83 | LLVMValueRef ones = LLVMConstAllOnes(int_vec_type); |
||
84 | LLVMValueRef cond; |
||
85 | LLVMValueRef res; |
||
86 | |||
87 | assert(func >= PIPE_FUNC_NEVER); |
||
88 | assert(func <= PIPE_FUNC_ALWAYS); |
||
89 | assert(lp_check_value(type, a)); |
||
90 | assert(lp_check_value(type, b)); |
||
91 | |||
92 | if(func == PIPE_FUNC_NEVER) |
||
93 | return zeros; |
||
94 | if(func == PIPE_FUNC_ALWAYS) |
||
95 | return ones; |
||
96 | |||
97 | #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) |
||
98 | /* |
||
99 | * There are no unsigned integer comparison instructions in SSE. |
||
100 | */ |
||
101 | |||
102 | if (!type.floating && !type.sign && |
||
103 | type.width * type.length == 128 && |
||
104 | util_cpu_caps.has_sse2 && |
||
105 | (func == PIPE_FUNC_LESS || |
||
106 | func == PIPE_FUNC_LEQUAL || |
||
107 | func == PIPE_FUNC_GREATER || |
||
108 | func == PIPE_FUNC_GEQUAL) && |
||
109 | (gallivm_debug & GALLIVM_DEBUG_PERF)) { |
||
110 | debug_printf("%s: inefficient <%u x i%u> unsigned comparison\n", |
||
111 | __FUNCTION__, type.length, type.width); |
||
112 | } |
||
113 | #endif |
||
114 | |||
115 | #if HAVE_LLVM < 0x0207 |
||
116 | #if defined(PIPE_ARCH_X86) || defined(PIPE_ARCH_X86_64) |
||
117 | if(type.width * type.length == 128) { |
||
118 | if(type.floating && util_cpu_caps.has_sse) { |
||
119 | /* float[4] comparison */ |
||
120 | LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type); |
||
121 | LLVMValueRef args[3]; |
||
122 | unsigned cc; |
||
123 | boolean swap; |
||
124 | |||
125 | swap = FALSE; |
||
126 | switch(func) { |
||
127 | case PIPE_FUNC_EQUAL: |
||
128 | cc = 0; |
||
129 | break; |
||
130 | case PIPE_FUNC_NOTEQUAL: |
||
131 | cc = 4; |
||
132 | break; |
||
133 | case PIPE_FUNC_LESS: |
||
134 | cc = 1; |
||
135 | break; |
||
136 | case PIPE_FUNC_LEQUAL: |
||
137 | cc = 2; |
||
138 | break; |
||
139 | case PIPE_FUNC_GREATER: |
||
140 | cc = 1; |
||
141 | swap = TRUE; |
||
142 | break; |
||
143 | case PIPE_FUNC_GEQUAL: |
||
144 | cc = 2; |
||
145 | swap = TRUE; |
||
146 | break; |
||
147 | default: |
||
148 | assert(0); |
||
149 | return lp_build_undef(gallivm, type); |
||
150 | } |
||
151 | |||
152 | if(swap) { |
||
153 | args[0] = b; |
||
154 | args[1] = a; |
||
155 | } |
||
156 | else { |
||
157 | args[0] = a; |
||
158 | args[1] = b; |
||
159 | } |
||
160 | |||
161 | args[2] = LLVMConstInt(LLVMInt8TypeInContext(gallivm->context), cc, 0); |
||
162 | res = lp_build_intrinsic(builder, |
||
163 | "llvm.x86.sse.cmp.ps", |
||
164 | vec_type, |
||
165 | args, 3); |
||
166 | res = LLVMBuildBitCast(builder, res, int_vec_type, ""); |
||
167 | return res; |
||
168 | } |
||
169 | else if(util_cpu_caps.has_sse2) { |
||
170 | /* int[4] comparison */ |
||
171 | static const struct { |
||
172 | unsigned swap:1; |
||
173 | unsigned eq:1; |
||
174 | unsigned gt:1; |
||
175 | unsigned not:1; |
||
176 | } table[] = { |
||
177 | {0, 0, 0, 1}, /* PIPE_FUNC_NEVER */ |
||
178 | {1, 0, 1, 0}, /* PIPE_FUNC_LESS */ |
||
179 | {0, 1, 0, 0}, /* PIPE_FUNC_EQUAL */ |
||
180 | {0, 0, 1, 1}, /* PIPE_FUNC_LEQUAL */ |
||
181 | {0, 0, 1, 0}, /* PIPE_FUNC_GREATER */ |
||
182 | {0, 1, 0, 1}, /* PIPE_FUNC_NOTEQUAL */ |
||
183 | {1, 0, 1, 1}, /* PIPE_FUNC_GEQUAL */ |
||
184 | {0, 0, 0, 0} /* PIPE_FUNC_ALWAYS */ |
||
185 | }; |
||
186 | const char *pcmpeq; |
||
187 | const char *pcmpgt; |
||
188 | LLVMValueRef args[2]; |
||
189 | LLVMValueRef res; |
||
190 | LLVMTypeRef vec_type = lp_build_vec_type(gallivm, type); |
||
191 | |||
192 | switch (type.width) { |
||
193 | case 8: |
||
194 | pcmpeq = "llvm.x86.sse2.pcmpeq.b"; |
||
195 | pcmpgt = "llvm.x86.sse2.pcmpgt.b"; |
||
196 | break; |
||
197 | case 16: |
||
198 | pcmpeq = "llvm.x86.sse2.pcmpeq.w"; |
||
199 | pcmpgt = "llvm.x86.sse2.pcmpgt.w"; |
||
200 | break; |
||
201 | case 32: |
||
202 | pcmpeq = "llvm.x86.sse2.pcmpeq.d"; |
||
203 | pcmpgt = "llvm.x86.sse2.pcmpgt.d"; |
||
204 | break; |
||
205 | default: |
||
206 | assert(0); |
||
207 | return lp_build_undef(gallivm, type); |
||
208 | } |
||
209 | |||
210 | /* There are no unsigned comparison instructions. So flip the sign bit |
||
211 | * so that the results match. |
||
212 | */ |
||
213 | if (table[func].gt && !type.sign) { |
||
214 | LLVMValueRef msb = lp_build_const_int_vec(gallivm, type, (unsigned long long)1 << (type.width - 1)); |
||
215 | a = LLVMBuildXor(builder, a, msb, ""); |
||
216 | b = LLVMBuildXor(builder, b, msb, ""); |
||
217 | } |
||
218 | |||
219 | if(table[func].swap) { |
||
220 | args[0] = b; |
||
221 | args[1] = a; |
||
222 | } |
||
223 | else { |
||
224 | args[0] = a; |
||
225 | args[1] = b; |
||
226 | } |
||
227 | |||
228 | if(table[func].eq) |
||
229 | res = lp_build_intrinsic(builder, pcmpeq, vec_type, args, 2); |
||
230 | else if (table[func].gt) |
||
231 | res = lp_build_intrinsic(builder, pcmpgt, vec_type, args, 2); |
||
232 | else |
||
233 | res = LLVMConstNull(vec_type); |
||
234 | |||
235 | if(table[func].not) |
||
236 | res = LLVMBuildNot(builder, res, ""); |
||
237 | |||
238 | return res; |
||
239 | } |
||
240 | } /* if (type.width * type.length == 128) */ |
||
241 | #endif |
||
242 | #endif /* HAVE_LLVM < 0x0207 */ |
||
243 | |||
244 | /* XXX: It is not clear if we should use the ordered or unordered operators */ |
||
245 | |||
246 | if(type.floating) { |
||
247 | LLVMRealPredicate op; |
||
248 | switch(func) { |
||
249 | case PIPE_FUNC_NEVER: |
||
250 | op = LLVMRealPredicateFalse; |
||
251 | break; |
||
252 | case PIPE_FUNC_ALWAYS: |
||
253 | op = LLVMRealPredicateTrue; |
||
254 | break; |
||
255 | case PIPE_FUNC_EQUAL: |
||
256 | op = LLVMRealUEQ; |
||
257 | break; |
||
258 | case PIPE_FUNC_NOTEQUAL: |
||
259 | op = LLVMRealUNE; |
||
260 | break; |
||
261 | case PIPE_FUNC_LESS: |
||
262 | op = LLVMRealULT; |
||
263 | break; |
||
264 | case PIPE_FUNC_LEQUAL: |
||
265 | op = LLVMRealULE; |
||
266 | break; |
||
267 | case PIPE_FUNC_GREATER: |
||
268 | op = LLVMRealUGT; |
||
269 | break; |
||
270 | case PIPE_FUNC_GEQUAL: |
||
271 | op = LLVMRealUGE; |
||
272 | break; |
||
273 | default: |
||
274 | assert(0); |
||
275 | return lp_build_undef(gallivm, type); |
||
276 | } |
||
277 | |||
278 | #if HAVE_LLVM >= 0x0207 |
||
279 | cond = LLVMBuildFCmp(builder, op, a, b, ""); |
||
280 | res = LLVMBuildSExt(builder, cond, int_vec_type, ""); |
||
281 | #else |
||
282 | if (type.length == 1) { |
||
283 | cond = LLVMBuildFCmp(builder, op, a, b, ""); |
||
284 | res = LLVMBuildSExt(builder, cond, int_vec_type, ""); |
||
285 | } |
||
286 | else { |
||
287 | unsigned i; |
||
288 | |||
289 | res = LLVMGetUndef(int_vec_type); |
||
290 | |||
291 | debug_printf("%s: warning: using slow element-wise float" |
||
292 | " vector comparison\n", __FUNCTION__); |
||
293 | for (i = 0; i < type.length; ++i) { |
||
294 | LLVMValueRef index = lp_build_const_int32(gallivm, i); |
||
295 | cond = LLVMBuildFCmp(builder, op, |
||
296 | LLVMBuildExtractElement(builder, a, index, ""), |
||
297 | LLVMBuildExtractElement(builder, b, index, ""), |
||
298 | ""); |
||
299 | cond = LLVMBuildSelect(builder, cond, |
||
300 | LLVMConstExtractElement(ones, index), |
||
301 | LLVMConstExtractElement(zeros, index), |
||
302 | ""); |
||
303 | res = LLVMBuildInsertElement(builder, res, cond, index, ""); |
||
304 | } |
||
305 | } |
||
306 | #endif |
||
307 | } |
||
308 | else { |
||
309 | LLVMIntPredicate op; |
||
310 | switch(func) { |
||
311 | case PIPE_FUNC_EQUAL: |
||
312 | op = LLVMIntEQ; |
||
313 | break; |
||
314 | case PIPE_FUNC_NOTEQUAL: |
||
315 | op = LLVMIntNE; |
||
316 | break; |
||
317 | case PIPE_FUNC_LESS: |
||
318 | op = type.sign ? LLVMIntSLT : LLVMIntULT; |
||
319 | break; |
||
320 | case PIPE_FUNC_LEQUAL: |
||
321 | op = type.sign ? LLVMIntSLE : LLVMIntULE; |
||
322 | break; |
||
323 | case PIPE_FUNC_GREATER: |
||
324 | op = type.sign ? LLVMIntSGT : LLVMIntUGT; |
||
325 | break; |
||
326 | case PIPE_FUNC_GEQUAL: |
||
327 | op = type.sign ? LLVMIntSGE : LLVMIntUGE; |
||
328 | break; |
||
329 | default: |
||
330 | assert(0); |
||
331 | return lp_build_undef(gallivm, type); |
||
332 | } |
||
333 | |||
334 | #if HAVE_LLVM >= 0x0207 |
||
335 | cond = LLVMBuildICmp(builder, op, a, b, ""); |
||
336 | res = LLVMBuildSExt(builder, cond, int_vec_type, ""); |
||
337 | #else |
||
338 | if (type.length == 1) { |
||
339 | cond = LLVMBuildICmp(builder, op, a, b, ""); |
||
340 | res = LLVMBuildSExt(builder, cond, int_vec_type, ""); |
||
341 | } |
||
342 | else { |
||
343 | unsigned i; |
||
344 | |||
345 | res = LLVMGetUndef(int_vec_type); |
||
346 | |||
347 | if (gallivm_debug & GALLIVM_DEBUG_PERF) { |
||
348 | debug_printf("%s: using slow element-wise int" |
||
349 | " vector comparison\n", __FUNCTION__); |
||
350 | } |
||
351 | |||
352 | for(i = 0; i < type.length; ++i) { |
||
353 | LLVMValueRef index = lp_build_const_int32(gallivm, i); |
||
354 | cond = LLVMBuildICmp(builder, op, |
||
355 | LLVMBuildExtractElement(builder, a, index, ""), |
||
356 | LLVMBuildExtractElement(builder, b, index, ""), |
||
357 | ""); |
||
358 | cond = LLVMBuildSelect(builder, cond, |
||
359 | LLVMConstExtractElement(ones, index), |
||
360 | LLVMConstExtractElement(zeros, index), |
||
361 | ""); |
||
362 | res = LLVMBuildInsertElement(builder, res, cond, index, ""); |
||
363 | } |
||
364 | } |
||
365 | #endif |
||
366 | } |
||
367 | |||
368 | return res; |
||
369 | } |
||
370 | |||
371 | |||
372 | |||
373 | /** |
||
374 | * Build code to compare two values 'a' and 'b' using the given func. |
||
375 | * \param func one of PIPE_FUNC_x |
||
376 | * The result values will be 0 for false or ~0 for true. |
||
377 | */ |
||
378 | LLVMValueRef |
||
379 | lp_build_cmp(struct lp_build_context *bld, |
||
380 | unsigned func, |
||
381 | LLVMValueRef a, |
||
382 | LLVMValueRef b) |
||
383 | { |
||
384 | return lp_build_compare(bld->gallivm, bld->type, func, a, b); |
||
385 | } |
||
386 | |||
387 | |||
388 | /** |
||
389 | * Return (mask & a) | (~mask & b); |
||
390 | */ |
||
391 | LLVMValueRef |
||
392 | lp_build_select_bitwise(struct lp_build_context *bld, |
||
393 | LLVMValueRef mask, |
||
394 | LLVMValueRef a, |
||
395 | LLVMValueRef b) |
||
396 | { |
||
397 | LLVMBuilderRef builder = bld->gallivm->builder; |
||
398 | struct lp_type type = bld->type; |
||
399 | LLVMValueRef res; |
||
400 | |||
401 | assert(lp_check_value(type, a)); |
||
402 | assert(lp_check_value(type, b)); |
||
403 | |||
404 | if (a == b) { |
||
405 | return a; |
||
406 | } |
||
407 | |||
408 | if(type.floating) { |
||
409 | LLVMTypeRef int_vec_type = lp_build_int_vec_type(bld->gallivm, type); |
||
410 | a = LLVMBuildBitCast(builder, a, int_vec_type, ""); |
||
411 | b = LLVMBuildBitCast(builder, b, int_vec_type, ""); |
||
412 | } |
||
413 | |||
414 | a = LLVMBuildAnd(builder, a, mask, ""); |
||
415 | |||
416 | /* This often gets translated to PANDN, but sometimes the NOT is |
||
417 | * pre-computed and stored in another constant. The best strategy depends |
||
418 | * on available registers, so it is not a big deal -- hopefully LLVM does |
||
419 | * the right decision attending the rest of the program. |
||
420 | */ |
||
421 | b = LLVMBuildAnd(builder, b, LLVMBuildNot(builder, mask, ""), ""); |
||
422 | |||
423 | res = LLVMBuildOr(builder, a, b, ""); |
||
424 | |||
425 | if(type.floating) { |
||
426 | LLVMTypeRef vec_type = lp_build_vec_type(bld->gallivm, type); |
||
427 | res = LLVMBuildBitCast(builder, res, vec_type, ""); |
||
428 | } |
||
429 | |||
430 | return res; |
||
431 | } |
||
432 | |||
433 | |||
434 | /** |
||
435 | * Return mask ? a : b; |
||
436 | * |
||
437 | * mask is a bitwise mask, composed of 0 or ~0 for each element. Any other value |
||
438 | * will yield unpredictable results. |
||
439 | */ |
||
440 | LLVMValueRef |
||
441 | lp_build_select(struct lp_build_context *bld, |
||
442 | LLVMValueRef mask, |
||
443 | LLVMValueRef a, |
||
444 | LLVMValueRef b) |
||
445 | { |
||
446 | LLVMBuilderRef builder = bld->gallivm->builder; |
||
447 | LLVMContextRef lc = bld->gallivm->context; |
||
448 | struct lp_type type = bld->type; |
||
449 | LLVMValueRef res; |
||
450 | |||
451 | assert(lp_check_value(type, a)); |
||
452 | assert(lp_check_value(type, b)); |
||
453 | |||
454 | if(a == b) |
||
455 | return a; |
||
456 | |||
457 | if (type.length == 1) { |
||
458 | mask = LLVMBuildTrunc(builder, mask, LLVMInt1TypeInContext(lc), ""); |
||
459 | res = LLVMBuildSelect(builder, mask, a, b, ""); |
||
460 | } |
||
461 | else if (0) { |
||
462 | /* Generate a vector select. |
||
463 | * |
||
464 | * XXX: Using vector selects would avoid emitting intrinsics, but they aren't |
||
465 | * properly supported yet. |
||
466 | * |
||
467 | * LLVM 3.1 supports it, but it yields buggy code (e.g. lp_blend_test). |
||
468 | * |
||
469 | * LLVM 3.0 includes experimental support provided the -promote-elements |
||
470 | * options is passed to LLVM's command line (e.g., via |
||
471 | * llvm::cl::ParseCommandLineOptions), but resulting code quality is much |
||
472 | * worse, probably because some optimization passes don't know how to |
||
473 | * handle vector selects. |
||
474 | * |
||
475 | * See also: |
||
476 | * - http://lists.cs.uiuc.edu/pipermail/llvmdev/2011-October/043659.html |
||
477 | */ |
||
478 | |||
479 | /* Convert the mask to a vector of booleans. |
||
480 | * XXX: There are two ways to do this. Decide what's best. |
||
481 | */ |
||
482 | if (1) { |
||
483 | LLVMTypeRef bool_vec_type = LLVMVectorType(LLVMInt1TypeInContext(lc), type.length); |
||
484 | mask = LLVMBuildTrunc(builder, mask, bool_vec_type, ""); |
||
485 | } else { |
||
486 | mask = LLVMBuildICmp(builder, LLVMIntNE, mask, LLVMConstNull(bld->int_vec_type), ""); |
||
487 | } |
||
488 | res = LLVMBuildSelect(builder, mask, a, b, ""); |
||
489 | } |
||
490 | else if (((util_cpu_caps.has_sse4_1 && |
||
491 | type.width * type.length == 128) || |
||
492 | (util_cpu_caps.has_avx && |
||
493 | type.width * type.length == 256 && type.width >= 32)) && |
||
494 | !LLVMIsConstant(a) && |
||
495 | !LLVMIsConstant(b) && |
||
496 | !LLVMIsConstant(mask)) { |
||
497 | const char *intrinsic; |
||
498 | LLVMTypeRef arg_type; |
||
499 | LLVMValueRef args[3]; |
||
500 | |||
501 | /* |
||
502 | * There's only float blend in AVX but can just cast i32/i64 |
||
503 | * to float. |
||
504 | */ |
||
505 | if (type.width * type.length == 256) { |
||
506 | if (type.width == 64) { |
||
507 | intrinsic = "llvm.x86.avx.blendv.pd.256"; |
||
508 | arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 4); |
||
509 | } |
||
510 | else { |
||
511 | intrinsic = "llvm.x86.avx.blendv.ps.256"; |
||
512 | arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 8); |
||
513 | } |
||
514 | } |
||
515 | else if (type.floating && |
||
516 | type.width == 64) { |
||
517 | intrinsic = "llvm.x86.sse41.blendvpd"; |
||
518 | arg_type = LLVMVectorType(LLVMDoubleTypeInContext(lc), 2); |
||
519 | } else if (type.floating && |
||
520 | type.width == 32) { |
||
521 | intrinsic = "llvm.x86.sse41.blendvps"; |
||
522 | arg_type = LLVMVectorType(LLVMFloatTypeInContext(lc), 4); |
||
523 | } else { |
||
524 | intrinsic = "llvm.x86.sse41.pblendvb"; |
||
525 | arg_type = LLVMVectorType(LLVMInt8TypeInContext(lc), 16); |
||
526 | } |
||
527 | |||
528 | if (arg_type != bld->int_vec_type) { |
||
529 | mask = LLVMBuildBitCast(builder, mask, arg_type, ""); |
||
530 | } |
||
531 | |||
532 | if (arg_type != bld->vec_type) { |
||
533 | a = LLVMBuildBitCast(builder, a, arg_type, ""); |
||
534 | b = LLVMBuildBitCast(builder, b, arg_type, ""); |
||
535 | } |
||
536 | |||
537 | args[0] = b; |
||
538 | args[1] = a; |
||
539 | args[2] = mask; |
||
540 | |||
541 | res = lp_build_intrinsic(builder, intrinsic, |
||
542 | arg_type, args, Elements(args)); |
||
543 | |||
544 | if (arg_type != bld->vec_type) { |
||
545 | res = LLVMBuildBitCast(builder, res, bld->vec_type, ""); |
||
546 | } |
||
547 | } |
||
548 | else { |
||
549 | res = lp_build_select_bitwise(bld, mask, a, b); |
||
550 | } |
||
551 | |||
552 | return res; |
||
553 | } |
||
554 | |||
555 | |||
556 | /** |
||
557 | * Return mask ? a : b; |
||
558 | * |
||
559 | * mask is a TGSI_WRITEMASK_xxx. |
||
560 | */ |
||
561 | LLVMValueRef |
||
562 | lp_build_select_aos(struct lp_build_context *bld, |
||
563 | unsigned mask, |
||
564 | LLVMValueRef a, |
||
565 | LLVMValueRef b, |
||
566 | unsigned num_channels) |
||
567 | { |
||
568 | LLVMBuilderRef builder = bld->gallivm->builder; |
||
569 | const struct lp_type type = bld->type; |
||
570 | const unsigned n = type.length; |
||
571 | unsigned i, j; |
||
572 | |||
573 | assert((mask & ~0xf) == 0); |
||
574 | assert(lp_check_value(type, a)); |
||
575 | assert(lp_check_value(type, b)); |
||
576 | |||
577 | if(a == b) |
||
578 | return a; |
||
579 | if((mask & 0xf) == 0xf) |
||
580 | return a; |
||
581 | if((mask & 0xf) == 0x0) |
||
582 | return b; |
||
583 | if(a == bld->undef || b == bld->undef) |
||
584 | return bld->undef; |
||
585 | |||
586 | /* |
||
587 | * There are two major ways of accomplishing this: |
||
588 | * - with a shuffle |
||
589 | * - with a select |
||
590 | * |
||
591 | * The flip between these is empirical and might need to be adjusted. |
||
592 | */ |
||
593 | if (n <= 4) { |
||
594 | /* |
||
595 | * Shuffle. |
||
596 | */ |
||
597 | LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); |
||
598 | LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; |
||
599 | |||
600 | for(j = 0; j < n; j += num_channels) |
||
601 | for(i = 0; i < num_channels; ++i) |
||
602 | shuffles[j + i] = LLVMConstInt(elem_type, |
||
603 | (mask & (1 << i) ? 0 : n) + j + i, |
||
604 | 0); |
||
605 | |||
606 | return LLVMBuildShuffleVector(builder, a, b, LLVMConstVector(shuffles, n), ""); |
||
607 | } |
||
608 | else { |
||
609 | LLVMValueRef mask_vec = lp_build_const_mask_aos(bld->gallivm, type, mask, num_channels); |
||
610 | return lp_build_select(bld, mask_vec, a, b); |
||
611 | } |
||
612 | } |
||
613 | |||
614 | |||
615 | /** |
||
616 | * Return (scalar-cast)val ? true : false; |
||
617 | */ |
||
618 | LLVMValueRef |
||
619 | lp_build_any_true_range(struct lp_build_context *bld, |
||
620 | unsigned real_length, |
||
621 | LLVMValueRef val) |
||
622 | { |
||
623 | LLVMBuilderRef builder = bld->gallivm->builder; |
||
624 | LLVMTypeRef scalar_type; |
||
625 | LLVMTypeRef true_type; |
||
626 | |||
627 | assert(real_length <= bld->type.length); |
||
628 | |||
629 | true_type = LLVMIntTypeInContext(bld->gallivm->context, |
||
630 | bld->type.width * real_length); |
||
631 | scalar_type = LLVMIntTypeInContext(bld->gallivm->context, |
||
632 | bld->type.width * bld->type.length); |
||
633 | val = LLVMBuildBitCast(builder, val, scalar_type, ""); |
||
634 | /* |
||
635 | * We're using always native types so we can use intrinsics. |
||
636 | * However, if we don't do per-element calculations, we must ensure |
||
637 | * the excess elements aren't used since they may contain garbage. |
||
638 | */ |
||
639 | if (real_length < bld->type.length) { |
||
640 | val = LLVMBuildTrunc(builder, val, true_type, ""); |
||
641 | } |
||
642 | return LLVMBuildICmp(builder, LLVMIntNE, |
||
643 | val, LLVMConstNull(true_type), ""); |
||
644 | }>=>><>>>=>>>>><>>%u>=>4>4>4> |