Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | /************************************************************************** |
2 | * |
||
3 | * Copyright 2009-2010 VMware, Inc. |
||
4 | * All Rights Reserved. |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
7 | * copy of this software and associated documentation files (the |
||
8 | * "Software"), to deal in the Software without restriction, including |
||
9 | * without limitation the rights to use, copy, modify, merge, publish, |
||
10 | * distribute, sub license, and/or sell copies of the Software, and to |
||
11 | * permit persons to whom the Software is furnished to do so, subject to |
||
12 | * the following conditions: |
||
13 | * |
||
14 | * The above copyright notice and this permission notice (including the |
||
15 | * next paragraph) shall be included in all copies or substantial portions |
||
16 | * of the Software. |
||
17 | * |
||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
||
21 | * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR |
||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
25 | * |
||
26 | **************************************************************************/ |
||
27 | |||
28 | /** |
||
29 | * @file |
||
30 | * Depth/stencil testing to LLVM IR translation. |
||
31 | * |
||
32 | * To be done accurately/efficiently the depth/stencil test must be done with |
||
33 | * the same type/format of the depth/stencil buffer, which implies massaging |
||
34 | * the incoming depths to fit into place. Using a more straightforward |
||
35 | * type/format for depth/stencil values internally and only convert when |
||
36 | * flushing would avoid this, but it would most likely result in depth fighting |
||
37 | * artifacts. |
||
38 | * |
||
39 | * Since we're using linear layout for everything, but we need to deal with |
||
40 | * 2x2 quads, we need to load/store multiple values and swizzle them into |
||
41 | * place (we could avoid this by doing depth/stencil testing in linear format, |
||
42 | * which would be easy for late depth/stencil test as we could do that after |
||
43 | * the fragment shader loop just as we do for color buffers, but more tricky |
||
44 | * for early depth test as we'd need both masks and interpolated depth in |
||
45 | * linear format). |
||
46 | * |
||
47 | * |
||
48 | * @author Jose Fonseca |
||
49 | * @author Brian Paul |
||
50 | */ |
||
51 | |||
52 | #include "pipe/p_state.h" |
||
53 | #include "util/u_format.h" |
||
54 | #include "util/u_cpu_detect.h" |
||
55 | |||
56 | #include "gallivm/lp_bld_type.h" |
||
57 | #include "gallivm/lp_bld_arit.h" |
||
58 | #include "gallivm/lp_bld_bitarit.h" |
||
59 | #include "gallivm/lp_bld_const.h" |
||
60 | #include "gallivm/lp_bld_conv.h" |
||
61 | #include "gallivm/lp_bld_logic.h" |
||
62 | #include "gallivm/lp_bld_flow.h" |
||
63 | #include "gallivm/lp_bld_intr.h" |
||
64 | #include "gallivm/lp_bld_debug.h" |
||
65 | #include "gallivm/lp_bld_swizzle.h" |
||
66 | #include "gallivm/lp_bld_pack.h" |
||
67 | |||
68 | #include "lp_bld_depth.h" |
||
69 | |||
70 | |||
71 | /** Used to select fields from pipe_stencil_state */ |
||
72 | enum stencil_op { |
||
73 | S_FAIL_OP, |
||
74 | Z_FAIL_OP, |
||
75 | Z_PASS_OP |
||
76 | }; |
||
77 | |||
78 | |||
79 | |||
80 | /** |
||
81 | * Do the stencil test comparison (compare FB stencil values against ref value). |
||
82 | * This will be used twice when generating two-sided stencil code. |
||
83 | * \param stencil the front/back stencil state |
||
84 | * \param stencilRef the stencil reference value, replicated as a vector |
||
85 | * \param stencilVals vector of stencil values from framebuffer |
||
86 | * \return vector mask of pass/fail values (~0 or 0) |
||
87 | */ |
||
88 | static LLVMValueRef |
||
89 | lp_build_stencil_test_single(struct lp_build_context *bld, |
||
90 | const struct pipe_stencil_state *stencil, |
||
91 | LLVMValueRef stencilRef, |
||
92 | LLVMValueRef stencilVals) |
||
93 | { |
||
94 | LLVMBuilderRef builder = bld->gallivm->builder; |
||
95 | const unsigned stencilMax = 255; /* XXX fix */ |
||
96 | struct lp_type type = bld->type; |
||
97 | LLVMValueRef res; |
||
98 | |||
99 | /* |
||
100 | * SSE2 has intrinsics for signed comparisons, but not unsigned ones. Values |
||
101 | * are between 0..255 so ensure we generate the fastest comparisons for |
||
102 | * wider elements. |
||
103 | */ |
||
104 | if (type.width <= 8) { |
||
105 | assert(!type.sign); |
||
106 | } else { |
||
107 | assert(type.sign); |
||
108 | } |
||
109 | |||
110 | assert(stencil->enabled); |
||
111 | |||
112 | if (stencil->valuemask != stencilMax) { |
||
113 | /* compute stencilRef = stencilRef & valuemask */ |
||
114 | LLVMValueRef valuemask = lp_build_const_int_vec(bld->gallivm, type, stencil->valuemask); |
||
115 | stencilRef = LLVMBuildAnd(builder, stencilRef, valuemask, ""); |
||
116 | /* compute stencilVals = stencilVals & valuemask */ |
||
117 | stencilVals = LLVMBuildAnd(builder, stencilVals, valuemask, ""); |
||
118 | } |
||
119 | |||
120 | res = lp_build_cmp(bld, stencil->func, stencilRef, stencilVals); |
||
121 | |||
122 | return res; |
||
123 | } |
||
124 | |||
125 | |||
126 | /** |
||
127 | * Do the one or two-sided stencil test comparison. |
||
128 | * \sa lp_build_stencil_test_single |
||
129 | * \param front_facing an integer vector mask, indicating front (~0) or back |
||
130 | * (0) facing polygon. If NULL, assume front-facing. |
||
131 | */ |
||
132 | static LLVMValueRef |
||
133 | lp_build_stencil_test(struct lp_build_context *bld, |
||
134 | const struct pipe_stencil_state stencil[2], |
||
135 | LLVMValueRef stencilRefs[2], |
||
136 | LLVMValueRef stencilVals, |
||
137 | LLVMValueRef front_facing) |
||
138 | { |
||
139 | LLVMValueRef res; |
||
140 | |||
141 | assert(stencil[0].enabled); |
||
142 | |||
143 | /* do front face test */ |
||
144 | res = lp_build_stencil_test_single(bld, &stencil[0], |
||
145 | stencilRefs[0], stencilVals); |
||
146 | |||
147 | if (stencil[1].enabled && front_facing != NULL) { |
||
148 | /* do back face test */ |
||
149 | LLVMValueRef back_res; |
||
150 | |||
151 | back_res = lp_build_stencil_test_single(bld, &stencil[1], |
||
152 | stencilRefs[1], stencilVals); |
||
153 | |||
154 | res = lp_build_select(bld, front_facing, res, back_res); |
||
155 | } |
||
156 | |||
157 | return res; |
||
158 | } |
||
159 | |||
160 | |||
161 | /** |
||
162 | * Apply the stencil operator (add/sub/keep/etc) to the given vector |
||
163 | * of stencil values. |
||
164 | * \return new stencil values vector |
||
165 | */ |
||
166 | static LLVMValueRef |
||
167 | lp_build_stencil_op_single(struct lp_build_context *bld, |
||
168 | const struct pipe_stencil_state *stencil, |
||
169 | enum stencil_op op, |
||
170 | LLVMValueRef stencilRef, |
||
171 | LLVMValueRef stencilVals) |
||
172 | |||
173 | { |
||
174 | LLVMBuilderRef builder = bld->gallivm->builder; |
||
175 | struct lp_type type = bld->type; |
||
176 | LLVMValueRef res; |
||
177 | LLVMValueRef max = lp_build_const_int_vec(bld->gallivm, type, 0xff); |
||
178 | unsigned stencil_op; |
||
179 | |||
180 | assert(type.sign); |
||
181 | |||
182 | switch (op) { |
||
183 | case S_FAIL_OP: |
||
184 | stencil_op = stencil->fail_op; |
||
185 | break; |
||
186 | case Z_FAIL_OP: |
||
187 | stencil_op = stencil->zfail_op; |
||
188 | break; |
||
189 | case Z_PASS_OP: |
||
190 | stencil_op = stencil->zpass_op; |
||
191 | break; |
||
192 | default: |
||
193 | assert(0 && "Invalid stencil_op mode"); |
||
194 | stencil_op = PIPE_STENCIL_OP_KEEP; |
||
195 | } |
||
196 | |||
197 | switch (stencil_op) { |
||
198 | case PIPE_STENCIL_OP_KEEP: |
||
199 | res = stencilVals; |
||
200 | /* we can return early for this case */ |
||
201 | return res; |
||
202 | case PIPE_STENCIL_OP_ZERO: |
||
203 | res = bld->zero; |
||
204 | break; |
||
205 | case PIPE_STENCIL_OP_REPLACE: |
||
206 | res = stencilRef; |
||
207 | break; |
||
208 | case PIPE_STENCIL_OP_INCR: |
||
209 | res = lp_build_add(bld, stencilVals, bld->one); |
||
210 | res = lp_build_min(bld, res, max); |
||
211 | break; |
||
212 | case PIPE_STENCIL_OP_DECR: |
||
213 | res = lp_build_sub(bld, stencilVals, bld->one); |
||
214 | res = lp_build_max(bld, res, bld->zero); |
||
215 | break; |
||
216 | case PIPE_STENCIL_OP_INCR_WRAP: |
||
217 | res = lp_build_add(bld, stencilVals, bld->one); |
||
218 | res = LLVMBuildAnd(builder, res, max, ""); |
||
219 | break; |
||
220 | case PIPE_STENCIL_OP_DECR_WRAP: |
||
221 | res = lp_build_sub(bld, stencilVals, bld->one); |
||
222 | res = LLVMBuildAnd(builder, res, max, ""); |
||
223 | break; |
||
224 | case PIPE_STENCIL_OP_INVERT: |
||
225 | res = LLVMBuildNot(builder, stencilVals, ""); |
||
226 | res = LLVMBuildAnd(builder, res, max, ""); |
||
227 | break; |
||
228 | default: |
||
229 | assert(0 && "bad stencil op mode"); |
||
230 | res = bld->undef; |
||
231 | } |
||
232 | |||
233 | return res; |
||
234 | } |
||
235 | |||
236 | |||
237 | /** |
||
238 | * Do the one or two-sided stencil test op/update. |
||
239 | */ |
||
240 | static LLVMValueRef |
||
241 | lp_build_stencil_op(struct lp_build_context *bld, |
||
242 | const struct pipe_stencil_state stencil[2], |
||
243 | enum stencil_op op, |
||
244 | LLVMValueRef stencilRefs[2], |
||
245 | LLVMValueRef stencilVals, |
||
246 | LLVMValueRef mask, |
||
247 | LLVMValueRef front_facing) |
||
248 | |||
249 | { |
||
250 | LLVMBuilderRef builder = bld->gallivm->builder; |
||
251 | LLVMValueRef res; |
||
252 | |||
253 | assert(stencil[0].enabled); |
||
254 | |||
255 | /* do front face op */ |
||
256 | res = lp_build_stencil_op_single(bld, &stencil[0], op, |
||
257 | stencilRefs[0], stencilVals); |
||
258 | |||
259 | if (stencil[1].enabled && front_facing != NULL) { |
||
260 | /* do back face op */ |
||
261 | LLVMValueRef back_res; |
||
262 | |||
263 | back_res = lp_build_stencil_op_single(bld, &stencil[1], op, |
||
264 | stencilRefs[1], stencilVals); |
||
265 | |||
266 | res = lp_build_select(bld, front_facing, res, back_res); |
||
267 | } |
||
268 | |||
269 | if (stencil[0].writemask != 0xff || |
||
270 | (stencil[1].enabled && front_facing != NULL && stencil[1].writemask != 0xff)) { |
||
271 | /* mask &= stencil[0].writemask */ |
||
272 | LLVMValueRef writemask = lp_build_const_int_vec(bld->gallivm, bld->type, |
||
273 | stencil[0].writemask); |
||
274 | if (stencil[1].enabled && stencil[1].writemask != stencil[0].writemask && front_facing != NULL) { |
||
275 | LLVMValueRef back_writemask = lp_build_const_int_vec(bld->gallivm, bld->type, |
||
276 | stencil[1].writemask); |
||
277 | writemask = lp_build_select(bld, front_facing, writemask, back_writemask); |
||
278 | } |
||
279 | |||
280 | mask = LLVMBuildAnd(builder, mask, writemask, ""); |
||
281 | /* res = (res & mask) | (stencilVals & ~mask) */ |
||
282 | res = lp_build_select_bitwise(bld, mask, res, stencilVals); |
||
283 | } |
||
284 | else { |
||
285 | /* res = mask ? res : stencilVals */ |
||
286 | res = lp_build_select(bld, mask, res, stencilVals); |
||
287 | } |
||
288 | |||
289 | return res; |
||
290 | } |
||
291 | |||
292 | |||
293 | |||
294 | /** |
||
295 | * Return a type that matches the depth/stencil format. |
||
296 | */ |
||
297 | struct lp_type |
||
298 | lp_depth_type(const struct util_format_description *format_desc, |
||
299 | unsigned length) |
||
300 | { |
||
301 | struct lp_type type; |
||
302 | unsigned z_swizzle; |
||
303 | |||
304 | assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); |
||
305 | assert(format_desc->block.width == 1); |
||
306 | assert(format_desc->block.height == 1); |
||
307 | |||
308 | memset(&type, 0, sizeof type); |
||
309 | type.width = format_desc->block.bits; |
||
310 | |||
311 | z_swizzle = format_desc->swizzle[0]; |
||
312 | if (z_swizzle < 4) { |
||
313 | if (format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_FLOAT) { |
||
314 | type.floating = TRUE; |
||
315 | assert(z_swizzle == 0); |
||
316 | assert(format_desc->channel[z_swizzle].size == 32); |
||
317 | } |
||
318 | else if(format_desc->channel[z_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) { |
||
319 | assert(format_desc->block.bits <= 32); |
||
320 | assert(format_desc->channel[z_swizzle].normalized); |
||
321 | if (format_desc->channel[z_swizzle].size < format_desc->block.bits) { |
||
322 | /* Prefer signed integers when possible, as SSE has less support |
||
323 | * for unsigned comparison; |
||
324 | */ |
||
325 | type.sign = TRUE; |
||
326 | } |
||
327 | } |
||
328 | else |
||
329 | assert(0); |
||
330 | } |
||
331 | |||
332 | type.length = length; |
||
333 | |||
334 | return type; |
||
335 | } |
||
336 | |||
337 | |||
338 | /** |
||
339 | * Compute bitmask and bit shift to apply to the incoming fragment Z values |
||
340 | * and the Z buffer values needed before doing the Z comparison. |
||
341 | * |
||
342 | * Note that we leave the Z bits in the position that we find them |
||
343 | * in the Z buffer (typically 0xffffff00 or 0x00ffffff). That lets us |
||
344 | * get by with fewer bit twiddling steps. |
||
345 | */ |
||
346 | static boolean |
||
347 | get_z_shift_and_mask(const struct util_format_description *format_desc, |
||
348 | unsigned *shift, unsigned *width, unsigned *mask) |
||
349 | { |
||
350 | unsigned total_bits; |
||
351 | unsigned z_swizzle; |
||
352 | |||
353 | assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); |
||
354 | assert(format_desc->block.width == 1); |
||
355 | assert(format_desc->block.height == 1); |
||
356 | |||
357 | /* 64bit d/s format is special already extracted 32 bits */ |
||
358 | total_bits = format_desc->block.bits > 32 ? 32 : format_desc->block.bits; |
||
359 | |||
360 | z_swizzle = format_desc->swizzle[0]; |
||
361 | |||
362 | if (z_swizzle == UTIL_FORMAT_SWIZZLE_NONE) |
||
363 | return FALSE; |
||
364 | |||
365 | *width = format_desc->channel[z_swizzle].size; |
||
366 | *shift = format_desc->channel[z_swizzle].shift; |
||
367 | |||
368 | if (*width == total_bits) { |
||
369 | *mask = 0xffffffff; |
||
370 | } else { |
||
371 | *mask = ((1 << *width) - 1) << *shift; |
||
372 | } |
||
373 | |||
374 | return TRUE; |
||
375 | } |
||
376 | |||
377 | |||
378 | /** |
||
379 | * Compute bitmask and bit shift to apply to the framebuffer pixel values |
||
380 | * to put the stencil bits in the least significant position. |
||
381 | * (i.e. 0x000000ff) |
||
382 | */ |
||
383 | static boolean |
||
384 | get_s_shift_and_mask(const struct util_format_description *format_desc, |
||
385 | unsigned *shift, unsigned *mask) |
||
386 | { |
||
387 | unsigned s_swizzle; |
||
388 | unsigned sz; |
||
389 | |||
390 | s_swizzle = format_desc->swizzle[1]; |
||
391 | |||
392 | if (s_swizzle == UTIL_FORMAT_SWIZZLE_NONE) |
||
393 | return FALSE; |
||
394 | |||
395 | /* just special case 64bit d/s format */ |
||
396 | if (format_desc->block.bits > 32) { |
||
397 | /* XXX big-endian? */ |
||
398 | assert(format_desc->format == PIPE_FORMAT_Z32_FLOAT_S8X24_UINT); |
||
399 | *shift = 0; |
||
400 | *mask = 0xff; |
||
401 | return TRUE; |
||
402 | } |
||
403 | |||
404 | *shift = format_desc->channel[s_swizzle].shift; |
||
405 | sz = format_desc->channel[s_swizzle].size; |
||
406 | *mask = (1U << sz) - 1U; |
||
407 | |||
408 | return TRUE; |
||
409 | } |
||
410 | |||
411 | |||
412 | /** |
||
413 | * Perform the occlusion test and increase the counter. |
||
414 | * Test the depth mask. Add the number of channel which has none zero mask |
||
415 | * into the occlusion counter. e.g. maskvalue is {-1, -1, -1, -1}. |
||
416 | * The counter will add 4. |
||
417 | * TODO: could get that out of the fs loop. |
||
418 | * |
||
419 | * \param type holds element type of the mask vector. |
||
420 | * \param maskvalue is the depth test mask. |
||
421 | * \param counter is a pointer of the uint32 counter. |
||
422 | */ |
||
423 | void |
||
424 | lp_build_occlusion_count(struct gallivm_state *gallivm, |
||
425 | struct lp_type type, |
||
426 | LLVMValueRef maskvalue, |
||
427 | LLVMValueRef counter) |
||
428 | { |
||
429 | LLVMBuilderRef builder = gallivm->builder; |
||
430 | LLVMContextRef context = gallivm->context; |
||
431 | LLVMValueRef countmask = lp_build_const_int_vec(gallivm, type, 1); |
||
432 | LLVMValueRef count, newcount; |
||
433 | |||
434 | assert(type.length <= 16); |
||
435 | assert(type.floating); |
||
436 | |||
437 | if(util_cpu_caps.has_sse && type.length == 4) { |
||
438 | const char *movmskintr = "llvm.x86.sse.movmsk.ps"; |
||
439 | const char *popcntintr = "llvm.ctpop.i32"; |
||
440 | LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue, |
||
441 | lp_build_vec_type(gallivm, type), ""); |
||
442 | bits = lp_build_intrinsic_unary(builder, movmskintr, |
||
443 | LLVMInt32TypeInContext(context), bits); |
||
444 | count = lp_build_intrinsic_unary(builder, popcntintr, |
||
445 | LLVMInt32TypeInContext(context), bits); |
||
446 | count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), ""); |
||
447 | } |
||
448 | else if(util_cpu_caps.has_avx && type.length == 8) { |
||
449 | const char *movmskintr = "llvm.x86.avx.movmsk.ps.256"; |
||
450 | const char *popcntintr = "llvm.ctpop.i32"; |
||
451 | LLVMValueRef bits = LLVMBuildBitCast(builder, maskvalue, |
||
452 | lp_build_vec_type(gallivm, type), ""); |
||
453 | bits = lp_build_intrinsic_unary(builder, movmskintr, |
||
454 | LLVMInt32TypeInContext(context), bits); |
||
455 | count = lp_build_intrinsic_unary(builder, popcntintr, |
||
456 | LLVMInt32TypeInContext(context), bits); |
||
457 | count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), ""); |
||
458 | } |
||
459 | else { |
||
460 | unsigned i; |
||
461 | LLVMValueRef countv = LLVMBuildAnd(builder, maskvalue, countmask, "countv"); |
||
462 | LLVMTypeRef counttype = LLVMIntTypeInContext(context, type.length * 8); |
||
463 | LLVMTypeRef i8vntype = LLVMVectorType(LLVMInt8TypeInContext(context), type.length * 4); |
||
464 | LLVMValueRef shufflev, countd; |
||
465 | LLVMValueRef shuffles[16]; |
||
466 | const char *popcntintr = NULL; |
||
467 | |||
468 | countv = LLVMBuildBitCast(builder, countv, i8vntype, ""); |
||
469 | |||
470 | for (i = 0; i < type.length; i++) { |
||
471 | shuffles[i] = lp_build_const_int32(gallivm, 4*i); |
||
472 | } |
||
473 | |||
474 | shufflev = LLVMConstVector(shuffles, type.length); |
||
475 | countd = LLVMBuildShuffleVector(builder, countv, LLVMGetUndef(i8vntype), shufflev, ""); |
||
476 | countd = LLVMBuildBitCast(builder, countd, counttype, "countd"); |
||
477 | |||
478 | /* |
||
479 | * XXX FIXME |
||
480 | * this is bad on cpus without popcount (on x86 supported by intel |
||
481 | * nehalem, amd barcelona, and up - not tied to sse42). |
||
482 | * Would be much faster to just sum the 4 elements of the vector with |
||
483 | * some horizontal add (shuffle/add/shuffle/add after the initial and). |
||
484 | */ |
||
485 | switch (type.length) { |
||
486 | case 4: |
||
487 | popcntintr = "llvm.ctpop.i32"; |
||
488 | break; |
||
489 | case 8: |
||
490 | popcntintr = "llvm.ctpop.i64"; |
||
491 | break; |
||
492 | case 16: |
||
493 | popcntintr = "llvm.ctpop.i128"; |
||
494 | break; |
||
495 | default: |
||
496 | assert(0); |
||
497 | } |
||
498 | count = lp_build_intrinsic_unary(builder, popcntintr, counttype, countd); |
||
499 | |||
500 | if (type.length > 8) { |
||
501 | count = LLVMBuildTrunc(builder, count, LLVMIntTypeInContext(context, 64), ""); |
||
502 | } |
||
503 | else if (type.length < 8) { |
||
504 | count = LLVMBuildZExt(builder, count, LLVMIntTypeInContext(context, 64), ""); |
||
505 | } |
||
506 | } |
||
507 | newcount = LLVMBuildLoad(builder, counter, "origcount"); |
||
508 | newcount = LLVMBuildAdd(builder, newcount, count, "newcount"); |
||
509 | LLVMBuildStore(builder, newcount, counter); |
||
510 | } |
||
511 | |||
512 | |||
513 | /** |
||
514 | * Load depth/stencil values. |
||
515 | * The stored values are linear, swizzle them. |
||
516 | * |
||
517 | * \param type the data type of the fragment depth/stencil values |
||
518 | * \param format_desc description of the depth/stencil surface |
||
519 | * \param is_1d whether this resource has only one dimension |
||
520 | * \param loop_counter the current loop iteration |
||
521 | * \param depth_ptr pointer to the depth/stencil values of this 4x4 block |
||
522 | * \param depth_stride stride of the depth/stencil buffer |
||
523 | * \param z_fb contains z values loaded from fb (may include padding) |
||
524 | * \param s_fb contains s values loaded from fb (may include padding) |
||
525 | */ |
||
526 | void |
||
527 | lp_build_depth_stencil_load_swizzled(struct gallivm_state *gallivm, |
||
528 | struct lp_type z_src_type, |
||
529 | const struct util_format_description *format_desc, |
||
530 | boolean is_1d, |
||
531 | LLVMValueRef depth_ptr, |
||
532 | LLVMValueRef depth_stride, |
||
533 | LLVMValueRef *z_fb, |
||
534 | LLVMValueRef *s_fb, |
||
535 | LLVMValueRef loop_counter) |
||
536 | { |
||
537 | LLVMBuilderRef builder = gallivm->builder; |
||
538 | LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4]; |
||
539 | LLVMValueRef zs_dst1, zs_dst2; |
||
540 | LLVMValueRef zs_dst_ptr; |
||
541 | LLVMValueRef depth_offset1, depth_offset2; |
||
542 | LLVMTypeRef load_ptr_type; |
||
543 | unsigned depth_bytes = format_desc->block.bits / 8; |
||
544 | struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length); |
||
545 | struct lp_type zs_load_type = zs_type; |
||
546 | |||
547 | zs_load_type.length = zs_load_type.length / 2; |
||
548 | load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0); |
||
549 | |||
550 | if (z_src_type.length == 4) { |
||
551 | unsigned i; |
||
552 | LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter, |
||
553 | lp_build_const_int32(gallivm, 1), ""); |
||
554 | LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter, |
||
555 | lp_build_const_int32(gallivm, 2), ""); |
||
556 | LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb, |
||
557 | depth_stride, ""); |
||
558 | depth_offset1 = LLVMBuildMul(builder, looplsb, |
||
559 | lp_build_const_int32(gallivm, depth_bytes * 2), ""); |
||
560 | depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, ""); |
||
561 | |||
562 | /* just concatenate the loaded 2x2 values into 4-wide vector */ |
||
563 | for (i = 0; i < 4; i++) { |
||
564 | shuffles[i] = lp_build_const_int32(gallivm, i); |
||
565 | } |
||
566 | } |
||
567 | else { |
||
568 | unsigned i; |
||
569 | LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter, |
||
570 | lp_build_const_int32(gallivm, 1), ""); |
||
571 | assert(z_src_type.length == 8); |
||
572 | depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, ""); |
||
573 | /* |
||
574 | * We load 2x4 values, and need to swizzle them (order |
||
575 | * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately. |
||
576 | */ |
||
577 | for (i = 0; i < 8; i++) { |
||
578 | shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2); |
||
579 | } |
||
580 | } |
||
581 | |||
582 | depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, ""); |
||
583 | |||
584 | /* Load current z/stencil values from z/stencil buffer */ |
||
585 | zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, ""); |
||
586 | zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, ""); |
||
587 | zs_dst1 = LLVMBuildLoad(builder, zs_dst_ptr, ""); |
||
588 | if (is_1d) { |
||
589 | zs_dst2 = lp_build_undef(gallivm, zs_load_type); |
||
590 | } |
||
591 | else { |
||
592 | zs_dst_ptr = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, ""); |
||
593 | zs_dst_ptr = LLVMBuildBitCast(builder, zs_dst_ptr, load_ptr_type, ""); |
||
594 | zs_dst2 = LLVMBuildLoad(builder, zs_dst_ptr, ""); |
||
595 | } |
||
596 | |||
597 | *z_fb = LLVMBuildShuffleVector(builder, zs_dst1, zs_dst2, |
||
598 | LLVMConstVector(shuffles, zs_type.length), ""); |
||
599 | *s_fb = *z_fb; |
||
600 | |||
601 | if (format_desc->block.bits < z_src_type.width) { |
||
602 | /* Extend destination ZS values (e.g., when reading from Z16_UNORM) */ |
||
603 | *z_fb = LLVMBuildZExt(builder, *z_fb, |
||
604 | lp_build_int_vec_type(gallivm, z_src_type), ""); |
||
605 | } |
||
606 | |||
607 | else if (format_desc->block.bits > 32) { |
||
608 | /* rely on llvm to handle too wide vector we have here nicely */ |
||
609 | unsigned i; |
||
610 | struct lp_type typex2 = zs_type; |
||
611 | struct lp_type s_type = zs_type; |
||
612 | LLVMValueRef shuffles1[LP_MAX_VECTOR_LENGTH / 4]; |
||
613 | LLVMValueRef shuffles2[LP_MAX_VECTOR_LENGTH / 4]; |
||
614 | LLVMValueRef tmp; |
||
615 | |||
616 | typex2.width = typex2.width / 2; |
||
617 | typex2.length = typex2.length * 2; |
||
618 | s_type.width = s_type.width / 2; |
||
619 | s_type.floating = 0; |
||
620 | |||
621 | tmp = LLVMBuildBitCast(builder, *z_fb, |
||
622 | lp_build_vec_type(gallivm, typex2), ""); |
||
623 | |||
624 | for (i = 0; i < zs_type.length; i++) { |
||
625 | shuffles1[i] = lp_build_const_int32(gallivm, i * 2); |
||
626 | shuffles2[i] = lp_build_const_int32(gallivm, i * 2 + 1); |
||
627 | } |
||
628 | *z_fb = LLVMBuildShuffleVector(builder, tmp, tmp, |
||
629 | LLVMConstVector(shuffles1, zs_type.length), ""); |
||
630 | *s_fb = LLVMBuildShuffleVector(builder, tmp, tmp, |
||
631 | LLVMConstVector(shuffles2, zs_type.length), ""); |
||
632 | *s_fb = LLVMBuildBitCast(builder, *s_fb, |
||
633 | lp_build_vec_type(gallivm, s_type), ""); |
||
634 | lp_build_name(*s_fb, "s_dst"); |
||
635 | } |
||
636 | |||
637 | lp_build_name(*z_fb, "z_dst"); |
||
638 | lp_build_name(*s_fb, "s_dst"); |
||
639 | lp_build_name(*z_fb, "z_dst"); |
||
640 | } |
||
641 | |||
642 | /** |
||
643 | * Store depth/stencil values. |
||
644 | * Incoming values are swizzled (typically n 2x2 quads), stored linear. |
||
645 | * If there's a mask it will do select/store otherwise just store. |
||
646 | * |
||
647 | * \param type the data type of the fragment depth/stencil values |
||
648 | * \param format_desc description of the depth/stencil surface |
||
649 | * \param is_1d whether this resource has only one dimension |
||
650 | * \param mask the alive/dead pixel mask for the quad (vector) |
||
651 | * \param z_fb z values read from fb (with padding) |
||
652 | * \param s_fb s values read from fb (with padding) |
||
653 | * \param loop_counter the current loop iteration |
||
654 | * \param depth_ptr pointer to the depth/stencil values of this 4x4 block |
||
655 | * \param depth_stride stride of the depth/stencil buffer |
||
656 | * \param z_value the depth values to store (with padding) |
||
657 | * \param s_value the stencil values to store (with padding) |
||
658 | */ |
||
659 | void |
||
660 | lp_build_depth_stencil_write_swizzled(struct gallivm_state *gallivm, |
||
661 | struct lp_type z_src_type, |
||
662 | const struct util_format_description *format_desc, |
||
663 | boolean is_1d, |
||
664 | struct lp_build_mask_context *mask, |
||
665 | LLVMValueRef z_fb, |
||
666 | LLVMValueRef s_fb, |
||
667 | LLVMValueRef loop_counter, |
||
668 | LLVMValueRef depth_ptr, |
||
669 | LLVMValueRef depth_stride, |
||
670 | LLVMValueRef z_value, |
||
671 | LLVMValueRef s_value) |
||
672 | { |
||
673 | struct lp_build_context z_bld; |
||
674 | LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 4]; |
||
675 | LLVMBuilderRef builder = gallivm->builder; |
||
676 | LLVMValueRef mask_value = NULL; |
||
677 | LLVMValueRef zs_dst1, zs_dst2; |
||
678 | LLVMValueRef zs_dst_ptr1, zs_dst_ptr2; |
||
679 | LLVMValueRef depth_offset1, depth_offset2; |
||
680 | LLVMTypeRef load_ptr_type; |
||
681 | unsigned depth_bytes = format_desc->block.bits / 8; |
||
682 | struct lp_type zs_type = lp_depth_type(format_desc, z_src_type.length); |
||
683 | struct lp_type z_type = zs_type; |
||
684 | struct lp_type zs_load_type = zs_type; |
||
685 | |||
686 | zs_load_type.length = zs_load_type.length / 2; |
||
687 | load_ptr_type = LLVMPointerType(lp_build_vec_type(gallivm, zs_load_type), 0); |
||
688 | |||
689 | z_type.width = z_src_type.width; |
||
690 | |||
691 | lp_build_context_init(&z_bld, gallivm, z_type); |
||
692 | |||
693 | /* |
||
694 | * This is far from ideal, at least for late depth write we should do this |
||
695 | * outside the fs loop to avoid all the swizzle stuff. |
||
696 | */ |
||
697 | if (z_src_type.length == 4) { |
||
698 | LLVMValueRef looplsb = LLVMBuildAnd(builder, loop_counter, |
||
699 | lp_build_const_int32(gallivm, 1), ""); |
||
700 | LLVMValueRef loopmsb = LLVMBuildAnd(builder, loop_counter, |
||
701 | lp_build_const_int32(gallivm, 2), ""); |
||
702 | LLVMValueRef offset2 = LLVMBuildMul(builder, loopmsb, |
||
703 | depth_stride, ""); |
||
704 | depth_offset1 = LLVMBuildMul(builder, looplsb, |
||
705 | lp_build_const_int32(gallivm, depth_bytes * 2), ""); |
||
706 | depth_offset1 = LLVMBuildAdd(builder, depth_offset1, offset2, ""); |
||
707 | } |
||
708 | else { |
||
709 | unsigned i; |
||
710 | LLVMValueRef loopx2 = LLVMBuildShl(builder, loop_counter, |
||
711 | lp_build_const_int32(gallivm, 1), ""); |
||
712 | assert(z_src_type.length == 8); |
||
713 | depth_offset1 = LLVMBuildMul(builder, loopx2, depth_stride, ""); |
||
714 | /* |
||
715 | * We load 2x4 values, and need to swizzle them (order |
||
716 | * 0,1,4,5,2,3,6,7) - not so hot with avx unfortunately. |
||
717 | */ |
||
718 | for (i = 0; i < 8; i++) { |
||
719 | shuffles[i] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2); |
||
720 | } |
||
721 | } |
||
722 | |||
723 | depth_offset2 = LLVMBuildAdd(builder, depth_offset1, depth_stride, ""); |
||
724 | |||
725 | zs_dst_ptr1 = LLVMBuildGEP(builder, depth_ptr, &depth_offset1, 1, ""); |
||
726 | zs_dst_ptr1 = LLVMBuildBitCast(builder, zs_dst_ptr1, load_ptr_type, ""); |
||
727 | zs_dst_ptr2 = LLVMBuildGEP(builder, depth_ptr, &depth_offset2, 1, ""); |
||
728 | zs_dst_ptr2 = LLVMBuildBitCast(builder, zs_dst_ptr2, load_ptr_type, ""); |
||
729 | |||
730 | if (format_desc->block.bits > 32) { |
||
731 | s_value = LLVMBuildBitCast(builder, s_value, z_bld.vec_type, ""); |
||
732 | } |
||
733 | |||
734 | if (mask) { |
||
735 | mask_value = lp_build_mask_value(mask); |
||
736 | z_value = lp_build_select(&z_bld, mask_value, z_value, z_fb); |
||
737 | if (format_desc->block.bits > 32) { |
||
738 | s_fb = LLVMBuildBitCast(builder, s_fb, z_bld.vec_type, ""); |
||
739 | s_value = lp_build_select(&z_bld, mask_value, s_value, s_fb); |
||
740 | } |
||
741 | } |
||
742 | |||
743 | if (zs_type.width < z_src_type.width) { |
||
744 | /* Truncate ZS values (e.g., when writing to Z16_UNORM) */ |
||
745 | z_value = LLVMBuildTrunc(builder, z_value, |
||
746 | lp_build_int_vec_type(gallivm, zs_type), ""); |
||
747 | } |
||
748 | |||
749 | if (format_desc->block.bits <= 32) { |
||
750 | if (z_src_type.length == 4) { |
||
751 | zs_dst1 = lp_build_extract_range(gallivm, z_value, 0, 2); |
||
752 | zs_dst2 = lp_build_extract_range(gallivm, z_value, 2, 2); |
||
753 | } |
||
754 | else { |
||
755 | assert(z_src_type.length == 8); |
||
756 | zs_dst1 = LLVMBuildShuffleVector(builder, z_value, z_value, |
||
757 | LLVMConstVector(&shuffles[0], |
||
758 | zs_load_type.length), ""); |
||
759 | zs_dst2 = LLVMBuildShuffleVector(builder, z_value, z_value, |
||
760 | LLVMConstVector(&shuffles[4], |
||
761 | zs_load_type.length), ""); |
||
762 | } |
||
763 | } |
||
764 | else { |
||
765 | if (z_src_type.length == 4) { |
||
766 | zs_dst1 = lp_build_interleave2(gallivm, z_type, |
||
767 | z_value, s_value, 0); |
||
768 | zs_dst2 = lp_build_interleave2(gallivm, z_type, |
||
769 | z_value, s_value, 1); |
||
770 | } |
||
771 | else { |
||
772 | unsigned i; |
||
773 | LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH / 2]; |
||
774 | assert(z_src_type.length == 8); |
||
775 | for (i = 0; i < 8; i++) { |
||
776 | shuffles[i*2] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2); |
||
777 | shuffles[i*2+1] = lp_build_const_int32(gallivm, (i&1) + (i&2) * 2 + (i&4) / 2 + |
||
778 | z_src_type.length); |
||
779 | } |
||
780 | zs_dst1 = LLVMBuildShuffleVector(builder, z_value, s_value, |
||
781 | LLVMConstVector(&shuffles[0], |
||
782 | z_src_type.length), ""); |
||
783 | zs_dst2 = LLVMBuildShuffleVector(builder, z_value, s_value, |
||
784 | LLVMConstVector(&shuffles[8], |
||
785 | z_src_type.length), ""); |
||
786 | } |
||
787 | zs_dst1 = LLVMBuildBitCast(builder, zs_dst1, |
||
788 | lp_build_vec_type(gallivm, zs_load_type), ""); |
||
789 | zs_dst2 = LLVMBuildBitCast(builder, zs_dst2, |
||
790 | lp_build_vec_type(gallivm, zs_load_type), ""); |
||
791 | } |
||
792 | |||
793 | LLVMBuildStore(builder, zs_dst1, zs_dst_ptr1); |
||
794 | if (!is_1d) { |
||
795 | LLVMBuildStore(builder, zs_dst2, zs_dst_ptr2); |
||
796 | } |
||
797 | } |
||
798 | |||
799 | /** |
||
800 | * Generate code for performing depth and/or stencil tests. |
||
801 | * We operate on a vector of values (typically n 2x2 quads). |
||
802 | * |
||
803 | * \param depth the depth test state |
||
804 | * \param stencil the front/back stencil state |
||
805 | * \param type the data type of the fragment depth/stencil values |
||
806 | * \param format_desc description of the depth/stencil surface |
||
807 | * \param mask the alive/dead pixel mask for the quad (vector) |
||
808 | * \param stencil_refs the front/back stencil ref values (scalar) |
||
809 | * \param z_src the incoming depth/stencil values (n 2x2 quad values, float32) |
||
810 | * \param zs_dst the depth/stencil values in framebuffer |
||
811 | * \param face contains boolean value indicating front/back facing polygon |
||
812 | */ |
||
813 | void |
||
814 | lp_build_depth_stencil_test(struct gallivm_state *gallivm, |
||
815 | const struct pipe_depth_state *depth, |
||
816 | const struct pipe_stencil_state stencil[2], |
||
817 | struct lp_type z_src_type, |
||
818 | const struct util_format_description *format_desc, |
||
819 | struct lp_build_mask_context *mask, |
||
820 | LLVMValueRef stencil_refs[2], |
||
821 | LLVMValueRef z_src, |
||
822 | LLVMValueRef z_fb, |
||
823 | LLVMValueRef s_fb, |
||
824 | LLVMValueRef face, |
||
825 | LLVMValueRef *z_value, |
||
826 | LLVMValueRef *s_value, |
||
827 | boolean do_branch) |
||
828 | { |
||
829 | LLVMBuilderRef builder = gallivm->builder; |
||
830 | struct lp_type z_type; |
||
831 | struct lp_build_context z_bld; |
||
832 | struct lp_build_context s_bld; |
||
833 | struct lp_type s_type; |
||
834 | unsigned z_shift = 0, z_width = 0, z_mask = 0; |
||
835 | LLVMValueRef z_dst = NULL; |
||
836 | LLVMValueRef stencil_vals = NULL; |
||
837 | LLVMValueRef z_bitmask = NULL, stencil_shift = NULL; |
||
838 | LLVMValueRef z_pass = NULL, s_pass_mask = NULL; |
||
839 | LLVMValueRef current_mask = lp_build_mask_value(mask); |
||
840 | LLVMValueRef front_facing = NULL; |
||
841 | boolean have_z, have_s; |
||
842 | |||
843 | /* |
||
844 | * Depths are expected to be between 0 and 1, even if they are stored in |
||
845 | * floats. Setting these bits here will ensure that the lp_build_conv() call |
||
846 | * below won't try to unnecessarily clamp the incoming values. |
||
847 | */ |
||
848 | if(z_src_type.floating) { |
||
849 | z_src_type.sign = FALSE; |
||
850 | z_src_type.norm = TRUE; |
||
851 | } |
||
852 | else { |
||
853 | assert(!z_src_type.sign); |
||
854 | assert(z_src_type.norm); |
||
855 | } |
||
856 | |||
857 | /* Pick the type matching the depth-stencil format. */ |
||
858 | z_type = lp_depth_type(format_desc, z_src_type.length); |
||
859 | |||
860 | /* Pick the intermediate type for depth operations. */ |
||
861 | z_type.width = z_src_type.width; |
||
862 | assert(z_type.length == z_src_type.length); |
||
863 | |||
864 | /* FIXME: for non-float depth/stencil might generate better code |
||
865 | * if we'd always split it up to use 128bit operations. |
||
866 | * For stencil we'd almost certainly want to pack to 8xi16 values, |
||
867 | * for z just run twice. |
||
868 | */ |
||
869 | |||
870 | /* Sanity checking */ |
||
871 | { |
||
872 | const unsigned z_swizzle = format_desc->swizzle[0]; |
||
873 | const unsigned s_swizzle = format_desc->swizzle[1]; |
||
874 | |||
875 | assert(z_swizzle != UTIL_FORMAT_SWIZZLE_NONE || |
||
876 | s_swizzle != UTIL_FORMAT_SWIZZLE_NONE); |
||
877 | |||
878 | assert(depth->enabled || stencil[0].enabled); |
||
879 | |||
880 | assert(format_desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS); |
||
881 | assert(format_desc->block.width == 1); |
||
882 | assert(format_desc->block.height == 1); |
||
883 | |||
884 | if (stencil[0].enabled) { |
||
885 | assert(s_swizzle < 4); |
||
886 | assert(format_desc->channel[s_swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED); |
||
887 | assert(format_desc->channel[s_swizzle].pure_integer); |
||
888 | assert(!format_desc->channel[s_swizzle].normalized); |
||
889 | assert(format_desc->channel[s_swizzle].size == 8); |
||
890 | } |
||
891 | |||
892 | if (depth->enabled) { |
||
893 | assert(z_swizzle < 4); |
||
894 | if (z_type.floating) { |
||
895 | assert(z_swizzle == 0); |
||
896 | assert(format_desc->channel[z_swizzle].type == |
||
897 | UTIL_FORMAT_TYPE_FLOAT); |
||
898 | assert(format_desc->channel[z_swizzle].size == 32); |
||
899 | } |
||
900 | else { |
||
901 | assert(format_desc->channel[z_swizzle].type == |
||
902 | UTIL_FORMAT_TYPE_UNSIGNED); |
||
903 | assert(format_desc->channel[z_swizzle].normalized); |
||
904 | assert(!z_type.fixed); |
||
905 | } |
||
906 | } |
||
907 | } |
||
908 | |||
909 | |||
910 | /* Setup build context for Z vals */ |
||
911 | lp_build_context_init(&z_bld, gallivm, z_type); |
||
912 | |||
913 | /* Setup build context for stencil vals */ |
||
914 | s_type = lp_int_type(z_type); |
||
915 | lp_build_context_init(&s_bld, gallivm, s_type); |
||
916 | |||
917 | /* Compute and apply the Z/stencil bitmasks and shifts. |
||
918 | */ |
||
919 | { |
||
920 | unsigned s_shift, s_mask; |
||
921 | |||
922 | z_dst = z_fb; |
||
923 | stencil_vals = s_fb; |
||
924 | |||
925 | have_z = get_z_shift_and_mask(format_desc, &z_shift, &z_width, &z_mask); |
||
926 | have_s = get_s_shift_and_mask(format_desc, &s_shift, &s_mask); |
||
927 | |||
928 | if (have_z) { |
||
929 | if (z_mask != 0xffffffff) { |
||
930 | z_bitmask = lp_build_const_int_vec(gallivm, z_type, z_mask); |
||
931 | } |
||
932 | |||
933 | /* |
||
934 | * Align the framebuffer Z 's LSB to the right. |
||
935 | */ |
||
936 | if (z_shift) { |
||
937 | LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift); |
||
938 | z_dst = LLVMBuildLShr(builder, z_dst, shift, "z_dst"); |
||
939 | } else if (z_bitmask) { |
||
940 | z_dst = LLVMBuildAnd(builder, z_dst, z_bitmask, "z_dst"); |
||
941 | } else { |
||
942 | lp_build_name(z_dst, "z_dst"); |
||
943 | } |
||
944 | } |
||
945 | |||
946 | if (have_s) { |
||
947 | if (s_shift) { |
||
948 | LLVMValueRef shift = lp_build_const_int_vec(gallivm, s_type, s_shift); |
||
949 | stencil_vals = LLVMBuildLShr(builder, stencil_vals, shift, ""); |
||
950 | stencil_shift = shift; /* used below */ |
||
951 | } |
||
952 | |||
953 | if (s_mask != 0xffffffff) { |
||
954 | LLVMValueRef mask = lp_build_const_int_vec(gallivm, s_type, s_mask); |
||
955 | stencil_vals = LLVMBuildAnd(builder, stencil_vals, mask, ""); |
||
956 | } |
||
957 | |||
958 | lp_build_name(stencil_vals, "s_dst"); |
||
959 | } |
||
960 | } |
||
961 | |||
962 | if (stencil[0].enabled) { |
||
963 | |||
964 | if (face) { |
||
965 | LLVMValueRef zero = lp_build_const_int32(gallivm, 0); |
||
966 | |||
967 | /* front_facing = face != 0 ? ~0 : 0 */ |
||
968 | front_facing = LLVMBuildICmp(builder, LLVMIntNE, face, zero, ""); |
||
969 | front_facing = LLVMBuildSExt(builder, front_facing, |
||
970 | LLVMIntTypeInContext(gallivm->context, |
||
971 | s_bld.type.length*s_bld.type.width), |
||
972 | ""); |
||
973 | front_facing = LLVMBuildBitCast(builder, front_facing, |
||
974 | s_bld.int_vec_type, ""); |
||
975 | } |
||
976 | |||
977 | /* convert scalar stencil refs into vectors */ |
||
978 | stencil_refs[0] = lp_build_broadcast_scalar(&s_bld, stencil_refs[0]); |
||
979 | stencil_refs[1] = lp_build_broadcast_scalar(&s_bld, stencil_refs[1]); |
||
980 | |||
981 | s_pass_mask = lp_build_stencil_test(&s_bld, stencil, |
||
982 | stencil_refs, stencil_vals, |
||
983 | front_facing); |
||
984 | |||
985 | /* apply stencil-fail operator */ |
||
986 | { |
||
987 | LLVMValueRef s_fail_mask = lp_build_andnot(&s_bld, current_mask, s_pass_mask); |
||
988 | stencil_vals = lp_build_stencil_op(&s_bld, stencil, S_FAIL_OP, |
||
989 | stencil_refs, stencil_vals, |
||
990 | s_fail_mask, front_facing); |
||
991 | } |
||
992 | } |
||
993 | |||
994 | if (depth->enabled) { |
||
995 | /* |
||
996 | * Convert fragment Z to the desired type, aligning the LSB to the right. |
||
997 | */ |
||
998 | |||
999 | assert(z_type.width == z_src_type.width); |
||
1000 | assert(z_type.length == z_src_type.length); |
||
1001 | assert(lp_check_value(z_src_type, z_src)); |
||
1002 | if (z_src_type.floating) { |
||
1003 | /* |
||
1004 | * Convert from floating point values |
||
1005 | */ |
||
1006 | |||
1007 | if (!z_type.floating) { |
||
1008 | z_src = lp_build_clamped_float_to_unsigned_norm(gallivm, |
||
1009 | z_src_type, |
||
1010 | z_width, |
||
1011 | z_src); |
||
1012 | } |
||
1013 | } else { |
||
1014 | /* |
||
1015 | * Convert from unsigned normalized values. |
||
1016 | */ |
||
1017 | |||
1018 | assert(!z_src_type.sign); |
||
1019 | assert(!z_src_type.fixed); |
||
1020 | assert(z_src_type.norm); |
||
1021 | assert(!z_type.floating); |
||
1022 | if (z_src_type.width > z_width) { |
||
1023 | LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_src_type, |
||
1024 | z_src_type.width - z_width); |
||
1025 | z_src = LLVMBuildLShr(builder, z_src, shift, ""); |
||
1026 | } |
||
1027 | } |
||
1028 | assert(lp_check_value(z_type, z_src)); |
||
1029 | |||
1030 | lp_build_name(z_src, "z_src"); |
||
1031 | |||
1032 | /* compare src Z to dst Z, returning 'pass' mask */ |
||
1033 | z_pass = lp_build_cmp(&z_bld, depth->func, z_src, z_dst); |
||
1034 | |||
1035 | /* mask off bits that failed stencil test */ |
||
1036 | if (s_pass_mask) { |
||
1037 | current_mask = LLVMBuildAnd(builder, current_mask, s_pass_mask, ""); |
||
1038 | } |
||
1039 | |||
1040 | if (!stencil[0].enabled) { |
||
1041 | /* We can potentially skip all remaining operations here, but only |
||
1042 | * if stencil is disabled because we still need to update the stencil |
||
1043 | * buffer values. Don't need to update Z buffer values. |
||
1044 | */ |
||
1045 | lp_build_mask_update(mask, z_pass); |
||
1046 | |||
1047 | if (do_branch) { |
||
1048 | lp_build_mask_check(mask); |
||
1049 | } |
||
1050 | } |
||
1051 | |||
1052 | if (depth->writemask) { |
||
1053 | LLVMValueRef z_pass_mask; |
||
1054 | |||
1055 | /* mask off bits that failed Z test */ |
||
1056 | z_pass_mask = LLVMBuildAnd(builder, current_mask, z_pass, ""); |
||
1057 | |||
1058 | /* Mix the old and new Z buffer values. |
||
1059 | * z_dst[i] = zselectmask[i] ? z_src[i] : z_dst[i] |
||
1060 | */ |
||
1061 | z_dst = lp_build_select(&z_bld, z_pass_mask, z_src, z_dst); |
||
1062 | } |
||
1063 | |||
1064 | if (stencil[0].enabled) { |
||
1065 | /* update stencil buffer values according to z pass/fail result */ |
||
1066 | LLVMValueRef z_fail_mask, z_pass_mask; |
||
1067 | |||
1068 | /* apply Z-fail operator */ |
||
1069 | z_fail_mask = lp_build_andnot(&s_bld, current_mask, z_pass); |
||
1070 | stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_FAIL_OP, |
||
1071 | stencil_refs, stencil_vals, |
||
1072 | z_fail_mask, front_facing); |
||
1073 | |||
1074 | /* apply Z-pass operator */ |
||
1075 | z_pass_mask = LLVMBuildAnd(builder, current_mask, z_pass, ""); |
||
1076 | stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, |
||
1077 | stencil_refs, stencil_vals, |
||
1078 | z_pass_mask, front_facing); |
||
1079 | } |
||
1080 | } |
||
1081 | else { |
||
1082 | /* No depth test: apply Z-pass operator to stencil buffer values which |
||
1083 | * passed the stencil test. |
||
1084 | */ |
||
1085 | s_pass_mask = LLVMBuildAnd(builder, current_mask, s_pass_mask, ""); |
||
1086 | stencil_vals = lp_build_stencil_op(&s_bld, stencil, Z_PASS_OP, |
||
1087 | stencil_refs, stencil_vals, |
||
1088 | s_pass_mask, front_facing); |
||
1089 | } |
||
1090 | |||
1091 | /* Put Z and stencil bits in the right place */ |
||
1092 | if (have_z && z_shift) { |
||
1093 | LLVMValueRef shift = lp_build_const_int_vec(gallivm, z_type, z_shift); |
||
1094 | z_dst = LLVMBuildShl(builder, z_dst, shift, ""); |
||
1095 | } |
||
1096 | if (stencil_vals && stencil_shift) |
||
1097 | stencil_vals = LLVMBuildShl(builder, stencil_vals, |
||
1098 | stencil_shift, ""); |
||
1099 | |||
1100 | /* Finally, merge the z/stencil values */ |
||
1101 | if (format_desc->block.bits <= 32) { |
||
1102 | if (have_z && have_s) |
||
1103 | *z_value = LLVMBuildOr(builder, z_dst, stencil_vals, ""); |
||
1104 | else if (have_z) |
||
1105 | *z_value = z_dst; |
||
1106 | else |
||
1107 | *z_value = stencil_vals; |
||
1108 | *s_value = *z_value; |
||
1109 | } |
||
1110 | else { |
||
1111 | *z_value = z_dst; |
||
1112 | *s_value = stencil_vals; |
||
1113 | } |
||
1114 | |||
1115 | if (s_pass_mask) |
||
1116 | lp_build_mask_update(mask, s_pass_mask); |
||
1117 | |||
1118 | if (depth->enabled && stencil[0].enabled) |
||
1119 | lp_build_mask_update(mask, z_pass); |
||
1120 | }=>>>>=>>>>>>>>>=>><>><>><>>=>>=> |
||
1121 |