Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
5564 | serge | 1 | /************************************************************************** |
2 | * |
||
3 | * Copyright 2009 VMware, Inc. |
||
4 | * All Rights Reserved. |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
7 | * copy of this software and associated documentation files (the |
||
8 | * "Software"), to deal in the Software without restriction, including |
||
9 | * without limitation the rights to use, copy, modify, merge, publish, |
||
10 | * distribute, sub license, and/or sell copies of the Software, and to |
||
11 | * permit persons to whom the Software is furnished to do so, subject to |
||
12 | * the following conditions: |
||
13 | * |
||
14 | * The above copyright notice and this permission notice (including the |
||
15 | * next paragraph) shall be included in all copies or substantial portions |
||
16 | * of the Software. |
||
17 | * |
||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
||
21 | * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR |
||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
25 | * |
||
26 | **************************************************************************/ |
||
27 | |||
28 | /** |
||
29 | * @file |
||
30 | * Texture sampling -- SoA. |
||
31 | * |
||
32 | * @author Jose Fonseca |
||
33 | * @author Brian Paul |
||
34 | */ |
||
35 | |||
36 | #include "pipe/p_defines.h" |
||
37 | #include "pipe/p_state.h" |
||
38 | #include "pipe/p_shader_tokens.h" |
||
39 | #include "util/u_debug.h" |
||
40 | #include "util/u_dump.h" |
||
41 | #include "util/u_memory.h" |
||
42 | #include "util/u_math.h" |
||
43 | #include "util/u_format.h" |
||
44 | #include "util/u_cpu_detect.h" |
||
45 | #include "util/u_format_rgb9e5.h" |
||
46 | #include "lp_bld_debug.h" |
||
47 | #include "lp_bld_type.h" |
||
48 | #include "lp_bld_const.h" |
||
49 | #include "lp_bld_conv.h" |
||
50 | #include "lp_bld_arit.h" |
||
51 | #include "lp_bld_bitarit.h" |
||
52 | #include "lp_bld_logic.h" |
||
53 | #include "lp_bld_printf.h" |
||
54 | #include "lp_bld_swizzle.h" |
||
55 | #include "lp_bld_flow.h" |
||
56 | #include "lp_bld_gather.h" |
||
57 | #include "lp_bld_format.h" |
||
58 | #include "lp_bld_sample.h" |
||
59 | #include "lp_bld_sample_aos.h" |
||
60 | #include "lp_bld_struct.h" |
||
61 | #include "lp_bld_quad.h" |
||
62 | #include "lp_bld_pack.h" |
||
63 | |||
64 | |||
65 | /** |
||
66 | * Generate code to fetch a texel from a texture at int coords (x, y, z). |
||
67 | * The computation depends on whether the texture is 1D, 2D or 3D. |
||
68 | * The result, texel, will be float vectors: |
||
69 | * texel[0] = red values |
||
70 | * texel[1] = green values |
||
71 | * texel[2] = blue values |
||
72 | * texel[3] = alpha values |
||
73 | */ |
||
74 | static void |
||
75 | lp_build_sample_texel_soa(struct lp_build_sample_context *bld, |
||
76 | LLVMValueRef width, |
||
77 | LLVMValueRef height, |
||
78 | LLVMValueRef depth, |
||
79 | LLVMValueRef x, |
||
80 | LLVMValueRef y, |
||
81 | LLVMValueRef z, |
||
82 | LLVMValueRef y_stride, |
||
83 | LLVMValueRef z_stride, |
||
84 | LLVMValueRef data_ptr, |
||
85 | LLVMValueRef mipoffsets, |
||
86 | LLVMValueRef texel_out[4]) |
||
87 | { |
||
88 | const struct lp_static_sampler_state *static_state = bld->static_sampler_state; |
||
89 | const unsigned dims = bld->dims; |
||
90 | struct lp_build_context *int_coord_bld = &bld->int_coord_bld; |
||
91 | LLVMBuilderRef builder = bld->gallivm->builder; |
||
92 | LLVMValueRef offset; |
||
93 | LLVMValueRef i, j; |
||
94 | LLVMValueRef use_border = NULL; |
||
95 | |||
96 | /* use_border = x < 0 || x >= width || y < 0 || y >= height */ |
||
97 | if (lp_sampler_wrap_mode_uses_border_color(static_state->wrap_s, |
||
98 | static_state->min_img_filter, |
||
99 | static_state->mag_img_filter)) { |
||
100 | LLVMValueRef b1, b2; |
||
101 | b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero); |
||
102 | b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width); |
||
103 | use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2"); |
||
104 | } |
||
105 | |||
106 | if (dims >= 2 && |
||
107 | lp_sampler_wrap_mode_uses_border_color(static_state->wrap_t, |
||
108 | static_state->min_img_filter, |
||
109 | static_state->mag_img_filter)) { |
||
110 | LLVMValueRef b1, b2; |
||
111 | b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero); |
||
112 | b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height); |
||
113 | if (use_border) { |
||
114 | use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1"); |
||
115 | use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2"); |
||
116 | } |
||
117 | else { |
||
118 | use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2"); |
||
119 | } |
||
120 | } |
||
121 | |||
122 | if (dims == 3 && |
||
123 | lp_sampler_wrap_mode_uses_border_color(static_state->wrap_r, |
||
124 | static_state->min_img_filter, |
||
125 | static_state->mag_img_filter)) { |
||
126 | LLVMValueRef b1, b2; |
||
127 | b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero); |
||
128 | b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth); |
||
129 | if (use_border) { |
||
130 | use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1"); |
||
131 | use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2"); |
||
132 | } |
||
133 | else { |
||
134 | use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2"); |
||
135 | } |
||
136 | } |
||
137 | |||
138 | /* convert x,y,z coords to linear offset from start of texture, in bytes */ |
||
139 | lp_build_sample_offset(&bld->int_coord_bld, |
||
140 | bld->format_desc, |
||
141 | x, y, z, y_stride, z_stride, |
||
142 | &offset, &i, &j); |
||
143 | if (mipoffsets) { |
||
144 | offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets); |
||
145 | } |
||
146 | |||
147 | if (use_border) { |
||
148 | /* If we can sample the border color, it means that texcoords may |
||
149 | * lie outside the bounds of the texture image. We need to do |
||
150 | * something to prevent reading out of bounds and causing a segfault. |
||
151 | * |
||
152 | * Simply AND the texture coords with !use_border. This will cause |
||
153 | * coords which are out of bounds to become zero. Zero's guaranteed |
||
154 | * to be inside the texture image. |
||
155 | */ |
||
156 | offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border); |
||
157 | } |
||
158 | |||
159 | lp_build_fetch_rgba_soa(bld->gallivm, |
||
160 | bld->format_desc, |
||
161 | bld->texel_type, |
||
162 | data_ptr, offset, |
||
163 | i, j, |
||
164 | texel_out); |
||
165 | |||
166 | /* |
||
167 | * Note: if we find an app which frequently samples the texture border |
||
168 | * we might want to implement a true conditional here to avoid sampling |
||
169 | * the texture whenever possible (since that's quite a bit of code). |
||
170 | * Ex: |
||
171 | * if (use_border) { |
||
172 | * texel = border_color; |
||
173 | * } |
||
174 | * else { |
||
175 | * texel = sample_texture(coord); |
||
176 | * } |
||
177 | * As it is now, we always sample the texture, then selectively replace |
||
178 | * the texel color results with the border color. |
||
179 | */ |
||
180 | |||
181 | if (use_border) { |
||
182 | /* select texel color or border color depending on use_border. */ |
||
183 | const struct util_format_description *format_desc = bld->format_desc; |
||
184 | int chan; |
||
185 | struct lp_type border_type = bld->texel_type; |
||
186 | border_type.length = 4; |
||
187 | /* |
||
188 | * Only replace channels which are actually present. The others should |
||
189 | * get optimized away eventually by sampler_view swizzle anyway but it's |
||
190 | * easier too. |
||
191 | */ |
||
192 | for (chan = 0; chan < 4; chan++) { |
||
193 | unsigned chan_s; |
||
194 | /* reverse-map channel... */ |
||
195 | for (chan_s = 0; chan_s < 4; chan_s++) { |
||
196 | if (chan_s == format_desc->swizzle[chan]) { |
||
197 | break; |
||
198 | } |
||
199 | } |
||
200 | if (chan_s <= 3) { |
||
201 | /* use the already clamped color */ |
||
202 | LLVMValueRef idx = lp_build_const_int32(bld->gallivm, chan); |
||
203 | LLVMValueRef border_chan; |
||
204 | |||
205 | border_chan = lp_build_extract_broadcast(bld->gallivm, |
||
206 | border_type, |
||
207 | bld->texel_type, |
||
208 | bld->border_color_clamped, |
||
209 | idx); |
||
210 | texel_out[chan] = lp_build_select(&bld->texel_bld, use_border, |
||
211 | border_chan, texel_out[chan]); |
||
212 | } |
||
213 | } |
||
214 | } |
||
215 | } |
||
216 | |||
217 | |||
218 | /** |
||
219 | * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes. |
||
220 | */ |
||
221 | static LLVMValueRef |
||
222 | lp_build_coord_mirror(struct lp_build_sample_context *bld, |
||
223 | LLVMValueRef coord) |
||
224 | { |
||
225 | struct lp_build_context *coord_bld = &bld->coord_bld; |
||
226 | struct lp_build_context *int_coord_bld = &bld->int_coord_bld; |
||
227 | LLVMValueRef fract, flr, isOdd; |
||
228 | |||
229 | lp_build_ifloor_fract(coord_bld, coord, &flr, &fract); |
||
230 | |||
231 | /* isOdd = flr & 1 */ |
||
232 | isOdd = LLVMBuildAnd(bld->gallivm->builder, flr, int_coord_bld->one, ""); |
||
233 | |||
234 | /* make coord positive or negative depending on isOdd */ |
||
235 | coord = lp_build_set_sign(coord_bld, fract, isOdd); |
||
236 | |||
237 | /* convert isOdd to float */ |
||
238 | isOdd = lp_build_int_to_float(coord_bld, isOdd); |
||
239 | |||
240 | /* add isOdd to coord */ |
||
241 | coord = lp_build_add(coord_bld, coord, isOdd); |
||
242 | |||
243 | return coord; |
||
244 | } |
||
245 | |||
246 | |||
247 | /** |
||
248 | * Helper to compute the first coord and the weight for |
||
249 | * linear wrap repeat npot textures |
||
250 | */ |
||
251 | void |
||
252 | lp_build_coord_repeat_npot_linear(struct lp_build_sample_context *bld, |
||
253 | LLVMValueRef coord_f, |
||
254 | LLVMValueRef length_i, |
||
255 | LLVMValueRef length_f, |
||
256 | LLVMValueRef *coord0_i, |
||
257 | LLVMValueRef *weight_f) |
||
258 | { |
||
259 | struct lp_build_context *coord_bld = &bld->coord_bld; |
||
260 | struct lp_build_context *int_coord_bld = &bld->int_coord_bld; |
||
261 | LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5); |
||
262 | LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i, |
||
263 | int_coord_bld->one); |
||
264 | LLVMValueRef mask; |
||
265 | /* wrap with normalized floats is just fract */ |
||
266 | coord_f = lp_build_fract(coord_bld, coord_f); |
||
267 | /* mul by size and subtract 0.5 */ |
||
268 | coord_f = lp_build_mul(coord_bld, coord_f, length_f); |
||
269 | coord_f = lp_build_sub(coord_bld, coord_f, half); |
||
270 | /* |
||
271 | * we avoided the 0.5/length division before the repeat wrap, |
||
272 | * now need to fix up edge cases with selects |
||
273 | */ |
||
274 | /* convert to int, compute lerp weight */ |
||
275 | lp_build_ifloor_fract(coord_bld, coord_f, coord0_i, weight_f); |
||
276 | mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type, |
||
277 | PIPE_FUNC_LESS, *coord0_i, int_coord_bld->zero); |
||
278 | *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i); |
||
279 | } |
||
280 | |||
281 | |||
282 | /** |
||
283 | * Build LLVM code for texture wrap mode for linear filtering. |
||
284 | * \param x0_out returns first integer texcoord |
||
285 | * \param x1_out returns second integer texcoord |
||
286 | * \param weight_out returns linear interpolation weight |
||
287 | */ |
||
288 | static void |
||
289 | lp_build_sample_wrap_linear(struct lp_build_sample_context *bld, |
||
290 | LLVMValueRef coord, |
||
291 | LLVMValueRef length, |
||
292 | LLVMValueRef length_f, |
||
293 | LLVMValueRef offset, |
||
294 | boolean is_pot, |
||
295 | unsigned wrap_mode, |
||
296 | LLVMValueRef *x0_out, |
||
297 | LLVMValueRef *x1_out, |
||
298 | LLVMValueRef *weight_out) |
||
299 | { |
||
300 | struct lp_build_context *coord_bld = &bld->coord_bld; |
||
301 | struct lp_build_context *int_coord_bld = &bld->int_coord_bld; |
||
302 | LLVMBuilderRef builder = bld->gallivm->builder; |
||
303 | LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5); |
||
304 | LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); |
||
305 | LLVMValueRef coord0, coord1, weight; |
||
306 | |||
307 | switch(wrap_mode) { |
||
308 | case PIPE_TEX_WRAP_REPEAT: |
||
309 | if (is_pot) { |
||
310 | /* mul by size and subtract 0.5 */ |
||
311 | coord = lp_build_mul(coord_bld, coord, length_f); |
||
312 | coord = lp_build_sub(coord_bld, coord, half); |
||
313 | if (offset) { |
||
314 | offset = lp_build_int_to_float(coord_bld, offset); |
||
315 | coord = lp_build_add(coord_bld, coord, offset); |
||
316 | } |
||
317 | /* convert to int, compute lerp weight */ |
||
318 | lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); |
||
319 | coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); |
||
320 | /* repeat wrap */ |
||
321 | coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, ""); |
||
322 | coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, ""); |
||
323 | } |
||
324 | else { |
||
325 | LLVMValueRef mask; |
||
326 | if (offset) { |
||
327 | offset = lp_build_int_to_float(coord_bld, offset); |
||
328 | offset = lp_build_div(coord_bld, offset, length_f); |
||
329 | coord = lp_build_add(coord_bld, coord, offset); |
||
330 | } |
||
331 | lp_build_coord_repeat_npot_linear(bld, coord, |
||
332 | length, length_f, |
||
333 | &coord0, &weight); |
||
334 | mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type, |
||
335 | PIPE_FUNC_NOTEQUAL, coord0, length_minus_one); |
||
336 | coord1 = LLVMBuildAnd(builder, |
||
337 | lp_build_add(int_coord_bld, coord0, int_coord_bld->one), |
||
338 | mask, ""); |
||
339 | } |
||
340 | break; |
||
341 | |||
342 | case PIPE_TEX_WRAP_CLAMP: |
||
343 | if (bld->static_sampler_state->normalized_coords) { |
||
344 | /* scale coord to length */ |
||
345 | coord = lp_build_mul(coord_bld, coord, length_f); |
||
346 | } |
||
347 | if (offset) { |
||
348 | offset = lp_build_int_to_float(coord_bld, offset); |
||
349 | coord = lp_build_add(coord_bld, coord, offset); |
||
350 | } |
||
351 | |||
352 | /* clamp to [0, length] */ |
||
353 | coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f); |
||
354 | |||
355 | coord = lp_build_sub(coord_bld, coord, half); |
||
356 | |||
357 | /* convert to int, compute lerp weight */ |
||
358 | lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); |
||
359 | coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); |
||
360 | break; |
||
361 | |||
362 | case PIPE_TEX_WRAP_CLAMP_TO_EDGE: |
||
363 | { |
||
364 | struct lp_build_context abs_coord_bld = bld->coord_bld; |
||
365 | abs_coord_bld.type.sign = FALSE; |
||
366 | |||
367 | if (bld->static_sampler_state->normalized_coords) { |
||
368 | /* mul by tex size */ |
||
369 | coord = lp_build_mul(coord_bld, coord, length_f); |
||
370 | } |
||
371 | if (offset) { |
||
372 | offset = lp_build_int_to_float(coord_bld, offset); |
||
373 | coord = lp_build_add(coord_bld, coord, offset); |
||
374 | } |
||
375 | |||
376 | /* clamp to length max */ |
||
377 | coord = lp_build_min(coord_bld, coord, length_f); |
||
378 | /* subtract 0.5 */ |
||
379 | coord = lp_build_sub(coord_bld, coord, half); |
||
380 | /* clamp to [0, length - 0.5] */ |
||
381 | coord = lp_build_max(coord_bld, coord, coord_bld->zero); |
||
382 | /* convert to int, compute lerp weight */ |
||
383 | lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight); |
||
384 | coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); |
||
385 | /* coord1 = min(coord1, length-1) */ |
||
386 | coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one); |
||
387 | break; |
||
388 | } |
||
389 | |||
390 | case PIPE_TEX_WRAP_CLAMP_TO_BORDER: |
||
391 | if (bld->static_sampler_state->normalized_coords) { |
||
392 | /* scale coord to length */ |
||
393 | coord = lp_build_mul(coord_bld, coord, length_f); |
||
394 | } |
||
395 | if (offset) { |
||
396 | offset = lp_build_int_to_float(coord_bld, offset); |
||
397 | coord = lp_build_add(coord_bld, coord, offset); |
||
398 | } |
||
399 | /* was: clamp to [-0.5, length + 0.5], then sub 0.5 */ |
||
400 | /* can skip clamp (though might not work for very large coord values */ |
||
401 | coord = lp_build_sub(coord_bld, coord, half); |
||
402 | /* convert to int, compute lerp weight */ |
||
403 | lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); |
||
404 | coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); |
||
405 | break; |
||
406 | |||
407 | case PIPE_TEX_WRAP_MIRROR_REPEAT: |
||
408 | /* compute mirror function */ |
||
409 | coord = lp_build_coord_mirror(bld, coord); |
||
410 | |||
411 | /* scale coord to length */ |
||
412 | coord = lp_build_mul(coord_bld, coord, length_f); |
||
413 | coord = lp_build_sub(coord_bld, coord, half); |
||
414 | if (offset) { |
||
415 | offset = lp_build_int_to_float(coord_bld, offset); |
||
416 | coord = lp_build_add(coord_bld, coord, offset); |
||
417 | } |
||
418 | |||
419 | /* convert to int, compute lerp weight */ |
||
420 | lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); |
||
421 | coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); |
||
422 | |||
423 | /* coord0 = max(coord0, 0) */ |
||
424 | coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero); |
||
425 | /* coord1 = min(coord1, length-1) */ |
||
426 | coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one); |
||
427 | break; |
||
428 | |||
429 | case PIPE_TEX_WRAP_MIRROR_CLAMP: |
||
430 | if (bld->static_sampler_state->normalized_coords) { |
||
431 | /* scale coord to length */ |
||
432 | coord = lp_build_mul(coord_bld, coord, length_f); |
||
433 | } |
||
434 | if (offset) { |
||
435 | offset = lp_build_int_to_float(coord_bld, offset); |
||
436 | coord = lp_build_add(coord_bld, coord, offset); |
||
437 | } |
||
438 | coord = lp_build_abs(coord_bld, coord); |
||
439 | |||
440 | /* clamp to [0, length] */ |
||
441 | coord = lp_build_min(coord_bld, coord, length_f); |
||
442 | |||
443 | coord = lp_build_sub(coord_bld, coord, half); |
||
444 | |||
445 | /* convert to int, compute lerp weight */ |
||
446 | lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); |
||
447 | coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); |
||
448 | break; |
||
449 | |||
450 | case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: |
||
451 | { |
||
452 | struct lp_build_context abs_coord_bld = bld->coord_bld; |
||
453 | abs_coord_bld.type.sign = FALSE; |
||
454 | |||
455 | if (bld->static_sampler_state->normalized_coords) { |
||
456 | /* scale coord to length */ |
||
457 | coord = lp_build_mul(coord_bld, coord, length_f); |
||
458 | } |
||
459 | if (offset) { |
||
460 | offset = lp_build_int_to_float(coord_bld, offset); |
||
461 | coord = lp_build_add(coord_bld, coord, offset); |
||
462 | } |
||
463 | coord = lp_build_abs(coord_bld, coord); |
||
464 | |||
465 | /* clamp to length max */ |
||
466 | coord = lp_build_min(coord_bld, coord, length_f); |
||
467 | /* subtract 0.5 */ |
||
468 | coord = lp_build_sub(coord_bld, coord, half); |
||
469 | /* clamp to [0, length - 0.5] */ |
||
470 | coord = lp_build_max(coord_bld, coord, coord_bld->zero); |
||
471 | |||
472 | /* convert to int, compute lerp weight */ |
||
473 | lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight); |
||
474 | coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); |
||
475 | /* coord1 = min(coord1, length-1) */ |
||
476 | coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one); |
||
477 | } |
||
478 | break; |
||
479 | |||
480 | case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: |
||
481 | { |
||
482 | if (bld->static_sampler_state->normalized_coords) { |
||
483 | /* scale coord to length */ |
||
484 | coord = lp_build_mul(coord_bld, coord, length_f); |
||
485 | } |
||
486 | if (offset) { |
||
487 | offset = lp_build_int_to_float(coord_bld, offset); |
||
488 | coord = lp_build_add(coord_bld, coord, offset); |
||
489 | } |
||
490 | coord = lp_build_abs(coord_bld, coord); |
||
491 | |||
492 | /* was: clamp to [-0.5, length + 0.5] then sub 0.5 */ |
||
493 | /* skip clamp - always positive, and other side |
||
494 | only potentially matters for very large coords */ |
||
495 | coord = lp_build_sub(coord_bld, coord, half); |
||
496 | |||
497 | /* convert to int, compute lerp weight */ |
||
498 | lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight); |
||
499 | coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); |
||
500 | } |
||
501 | break; |
||
502 | |||
503 | default: |
||
504 | assert(0); |
||
505 | coord0 = NULL; |
||
506 | coord1 = NULL; |
||
507 | weight = NULL; |
||
508 | } |
||
509 | |||
510 | *x0_out = coord0; |
||
511 | *x1_out = coord1; |
||
512 | *weight_out = weight; |
||
513 | } |
||
514 | |||
515 | |||
516 | /** |
||
517 | * Build LLVM code for texture wrap mode for nearest filtering. |
||
518 | * \param coord the incoming texcoord (nominally in [0,1]) |
||
519 | * \param length the texture size along one dimension, as int vector |
||
520 | * \param length_f the texture size along one dimension, as float vector |
||
521 | * \param offset texel offset along one dimension (as int vector) |
||
522 | * \param is_pot if TRUE, length is a power of two |
||
523 | * \param wrap_mode one of PIPE_TEX_WRAP_x |
||
524 | */ |
||
525 | static LLVMValueRef |
||
526 | lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld, |
||
527 | LLVMValueRef coord, |
||
528 | LLVMValueRef length, |
||
529 | LLVMValueRef length_f, |
||
530 | LLVMValueRef offset, |
||
531 | boolean is_pot, |
||
532 | unsigned wrap_mode) |
||
533 | { |
||
534 | struct lp_build_context *coord_bld = &bld->coord_bld; |
||
535 | struct lp_build_context *int_coord_bld = &bld->int_coord_bld; |
||
536 | LLVMBuilderRef builder = bld->gallivm->builder; |
||
537 | LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); |
||
538 | LLVMValueRef icoord; |
||
539 | |||
540 | switch(wrap_mode) { |
||
541 | case PIPE_TEX_WRAP_REPEAT: |
||
542 | if (is_pot) { |
||
543 | coord = lp_build_mul(coord_bld, coord, length_f); |
||
544 | icoord = lp_build_ifloor(coord_bld, coord); |
||
545 | if (offset) { |
||
546 | icoord = lp_build_add(int_coord_bld, icoord, offset); |
||
547 | } |
||
548 | icoord = LLVMBuildAnd(builder, icoord, length_minus_one, ""); |
||
549 | } |
||
550 | else { |
||
551 | if (offset) { |
||
552 | offset = lp_build_int_to_float(coord_bld, offset); |
||
553 | offset = lp_build_div(coord_bld, offset, length_f); |
||
554 | coord = lp_build_add(coord_bld, coord, offset); |
||
555 | } |
||
556 | /* take fraction, unnormalize */ |
||
557 | coord = lp_build_fract_safe(coord_bld, coord); |
||
558 | coord = lp_build_mul(coord_bld, coord, length_f); |
||
559 | icoord = lp_build_itrunc(coord_bld, coord); |
||
560 | } |
||
561 | break; |
||
562 | |||
563 | case PIPE_TEX_WRAP_CLAMP: |
||
564 | case PIPE_TEX_WRAP_CLAMP_TO_EDGE: |
||
565 | if (bld->static_sampler_state->normalized_coords) { |
||
566 | /* scale coord to length */ |
||
567 | coord = lp_build_mul(coord_bld, coord, length_f); |
||
568 | } |
||
569 | |||
570 | /* floor */ |
||
571 | /* use itrunc instead since we clamp to 0 anyway */ |
||
572 | icoord = lp_build_itrunc(coord_bld, coord); |
||
573 | if (offset) { |
||
574 | icoord = lp_build_add(int_coord_bld, icoord, offset); |
||
575 | } |
||
576 | |||
577 | /* clamp to [0, length - 1]. */ |
||
578 | icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero, |
||
579 | length_minus_one); |
||
580 | break; |
||
581 | |||
582 | case PIPE_TEX_WRAP_CLAMP_TO_BORDER: |
||
583 | if (bld->static_sampler_state->normalized_coords) { |
||
584 | /* scale coord to length */ |
||
585 | coord = lp_build_mul(coord_bld, coord, length_f); |
||
586 | } |
||
587 | /* no clamp necessary, border masking will handle this */ |
||
588 | icoord = lp_build_ifloor(coord_bld, coord); |
||
589 | if (offset) { |
||
590 | icoord = lp_build_add(int_coord_bld, icoord, offset); |
||
591 | } |
||
592 | break; |
||
593 | |||
594 | case PIPE_TEX_WRAP_MIRROR_REPEAT: |
||
595 | if (offset) { |
||
596 | offset = lp_build_int_to_float(coord_bld, offset); |
||
597 | offset = lp_build_div(coord_bld, offset, length_f); |
||
598 | coord = lp_build_add(coord_bld, coord, offset); |
||
599 | } |
||
600 | /* compute mirror function */ |
||
601 | coord = lp_build_coord_mirror(bld, coord); |
||
602 | |||
603 | /* scale coord to length */ |
||
604 | assert(bld->static_sampler_state->normalized_coords); |
||
605 | coord = lp_build_mul(coord_bld, coord, length_f); |
||
606 | |||
607 | /* itrunc == ifloor here */ |
||
608 | icoord = lp_build_itrunc(coord_bld, coord); |
||
609 | |||
610 | /* clamp to [0, length - 1] */ |
||
611 | icoord = lp_build_min(int_coord_bld, icoord, length_minus_one); |
||
612 | break; |
||
613 | |||
614 | case PIPE_TEX_WRAP_MIRROR_CLAMP: |
||
615 | case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: |
||
616 | if (bld->static_sampler_state->normalized_coords) { |
||
617 | /* scale coord to length */ |
||
618 | coord = lp_build_mul(coord_bld, coord, length_f); |
||
619 | } |
||
620 | if (offset) { |
||
621 | offset = lp_build_int_to_float(coord_bld, offset); |
||
622 | coord = lp_build_add(coord_bld, coord, offset); |
||
623 | } |
||
624 | coord = lp_build_abs(coord_bld, coord); |
||
625 | |||
626 | /* itrunc == ifloor here */ |
||
627 | icoord = lp_build_itrunc(coord_bld, coord); |
||
628 | |||
629 | /* clamp to [0, length - 1] */ |
||
630 | icoord = lp_build_min(int_coord_bld, icoord, length_minus_one); |
||
631 | break; |
||
632 | |||
633 | case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: |
||
634 | if (bld->static_sampler_state->normalized_coords) { |
||
635 | /* scale coord to length */ |
||
636 | coord = lp_build_mul(coord_bld, coord, length_f); |
||
637 | } |
||
638 | if (offset) { |
||
639 | offset = lp_build_int_to_float(coord_bld, offset); |
||
640 | coord = lp_build_add(coord_bld, coord, offset); |
||
641 | } |
||
642 | coord = lp_build_abs(coord_bld, coord); |
||
643 | |||
644 | /* itrunc == ifloor here */ |
||
645 | icoord = lp_build_itrunc(coord_bld, coord); |
||
646 | break; |
||
647 | |||
648 | default: |
||
649 | assert(0); |
||
650 | icoord = NULL; |
||
651 | } |
||
652 | |||
653 | return icoord; |
||
654 | } |
||
655 | |||
656 | |||
657 | /** |
||
658 | * Do shadow test/comparison. |
||
659 | * \param p shadow ref value |
||
660 | * \param texel the texel to compare against |
||
661 | */ |
||
662 | static LLVMValueRef |
||
663 | lp_build_sample_comparefunc(struct lp_build_sample_context *bld, |
||
664 | LLVMValueRef p, |
||
665 | LLVMValueRef texel) |
||
666 | { |
||
667 | struct lp_build_context *texel_bld = &bld->texel_bld; |
||
668 | LLVMValueRef res; |
||
669 | |||
670 | if (0) { |
||
671 | //lp_build_print_value(bld->gallivm, "shadow cmp coord", p); |
||
672 | lp_build_print_value(bld->gallivm, "shadow cmp texel", texel); |
||
673 | } |
||
674 | |||
675 | /* result = (p FUNC texel) ? 1 : 0 */ |
||
676 | /* |
||
677 | * honor d3d10 floating point rules here, which state that comparisons |
||
678 | * are ordered except NOT_EQUAL which is unordered. |
||
679 | */ |
||
680 | if (bld->static_sampler_state->compare_func != PIPE_FUNC_NOTEQUAL) { |
||
681 | res = lp_build_cmp_ordered(texel_bld, bld->static_sampler_state->compare_func, |
||
682 | p, texel); |
||
683 | } |
||
684 | else { |
||
685 | res = lp_build_cmp(texel_bld, bld->static_sampler_state->compare_func, |
||
686 | p, texel); |
||
687 | } |
||
688 | return res; |
||
689 | } |
||
690 | |||
691 | |||
692 | /** |
||
693 | * Generate code to sample a mipmap level with nearest filtering. |
||
694 | * If sampling a cube texture, r = cube face in [0,5]. |
||
695 | */ |
||
696 | static void |
||
697 | lp_build_sample_image_nearest(struct lp_build_sample_context *bld, |
||
698 | LLVMValueRef size, |
||
699 | LLVMValueRef row_stride_vec, |
||
700 | LLVMValueRef img_stride_vec, |
||
701 | LLVMValueRef data_ptr, |
||
702 | LLVMValueRef mipoffsets, |
||
703 | LLVMValueRef *coords, |
||
704 | const LLVMValueRef *offsets, |
||
705 | LLVMValueRef colors_out[4]) |
||
706 | { |
||
707 | const unsigned dims = bld->dims; |
||
708 | LLVMValueRef width_vec; |
||
709 | LLVMValueRef height_vec; |
||
710 | LLVMValueRef depth_vec; |
||
711 | LLVMValueRef flt_size; |
||
712 | LLVMValueRef flt_width_vec; |
||
713 | LLVMValueRef flt_height_vec; |
||
714 | LLVMValueRef flt_depth_vec; |
||
715 | LLVMValueRef x, y = NULL, z = NULL; |
||
716 | |||
717 | lp_build_extract_image_sizes(bld, |
||
718 | &bld->int_size_bld, |
||
719 | bld->int_coord_type, |
||
720 | size, |
||
721 | &width_vec, &height_vec, &depth_vec); |
||
722 | |||
723 | flt_size = lp_build_int_to_float(&bld->float_size_bld, size); |
||
724 | |||
725 | lp_build_extract_image_sizes(bld, |
||
726 | &bld->float_size_bld, |
||
727 | bld->coord_type, |
||
728 | flt_size, |
||
729 | &flt_width_vec, &flt_height_vec, &flt_depth_vec); |
||
730 | |||
731 | /* |
||
732 | * Compute integer texcoords. |
||
733 | */ |
||
734 | x = lp_build_sample_wrap_nearest(bld, coords[0], width_vec, |
||
735 | flt_width_vec, offsets[0], |
||
736 | bld->static_texture_state->pot_width, |
||
737 | bld->static_sampler_state->wrap_s); |
||
738 | lp_build_name(x, "tex.x.wrapped"); |
||
739 | |||
740 | if (dims >= 2) { |
||
741 | y = lp_build_sample_wrap_nearest(bld, coords[1], height_vec, |
||
742 | flt_height_vec, offsets[1], |
||
743 | bld->static_texture_state->pot_height, |
||
744 | bld->static_sampler_state->wrap_t); |
||
745 | lp_build_name(y, "tex.y.wrapped"); |
||
746 | |||
747 | if (dims == 3) { |
||
748 | z = lp_build_sample_wrap_nearest(bld, coords[2], depth_vec, |
||
749 | flt_depth_vec, offsets[2], |
||
750 | bld->static_texture_state->pot_depth, |
||
751 | bld->static_sampler_state->wrap_r); |
||
752 | lp_build_name(z, "tex.z.wrapped"); |
||
753 | } |
||
754 | } |
||
755 | if (has_layer_coord(bld->static_texture_state->target)) { |
||
756 | if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) { |
||
757 | /* add cube layer to face */ |
||
758 | z = lp_build_add(&bld->int_coord_bld, coords[2], coords[3]); |
||
759 | } |
||
760 | else { |
||
761 | z = coords[2]; |
||
762 | } |
||
763 | lp_build_name(z, "tex.z.layer"); |
||
764 | } |
||
765 | |||
766 | /* |
||
767 | * Get texture colors. |
||
768 | */ |
||
769 | lp_build_sample_texel_soa(bld, |
||
770 | width_vec, height_vec, depth_vec, |
||
771 | x, y, z, |
||
772 | row_stride_vec, img_stride_vec, |
||
773 | data_ptr, mipoffsets, colors_out); |
||
774 | |||
775 | if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) { |
||
776 | LLVMValueRef cmpval; |
||
777 | cmpval = lp_build_sample_comparefunc(bld, coords[4], colors_out[0]); |
||
778 | /* this is really just a AND 1.0, cmpval but llvm is clever enough */ |
||
779 | colors_out[0] = lp_build_select(&bld->texel_bld, cmpval, |
||
780 | bld->texel_bld.one, bld->texel_bld.zero); |
||
781 | colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0]; |
||
782 | } |
||
783 | |||
784 | } |
||
785 | |||
786 | |||
787 | /** |
||
788 | * Like a lerp, but inputs are 0/~0 masks, so can simplify slightly. |
||
789 | */ |
||
790 | static LLVMValueRef |
||
791 | lp_build_masklerp(struct lp_build_context *bld, |
||
792 | LLVMValueRef weight, |
||
793 | LLVMValueRef mask0, |
||
794 | LLVMValueRef mask1) |
||
795 | { |
||
796 | struct gallivm_state *gallivm = bld->gallivm; |
||
797 | LLVMBuilderRef builder = gallivm->builder; |
||
798 | LLVMValueRef weight2; |
||
799 | |||
800 | weight2 = lp_build_sub(bld, bld->one, weight); |
||
801 | weight = LLVMBuildBitCast(builder, weight, |
||
802 | lp_build_int_vec_type(gallivm, bld->type), ""); |
||
803 | weight2 = LLVMBuildBitCast(builder, weight2, |
||
804 | lp_build_int_vec_type(gallivm, bld->type), ""); |
||
805 | weight = LLVMBuildAnd(builder, weight, mask1, ""); |
||
806 | weight2 = LLVMBuildAnd(builder, weight2, mask0, ""); |
||
807 | weight = LLVMBuildBitCast(builder, weight, bld->vec_type, ""); |
||
808 | weight2 = LLVMBuildBitCast(builder, weight2, bld->vec_type, ""); |
||
809 | return lp_build_add(bld, weight, weight2); |
||
810 | } |
||
811 | |||
812 | /** |
||
813 | * Like a 2d lerp, but inputs are 0/~0 masks, so can simplify slightly. |
||
814 | */ |
||
815 | static LLVMValueRef |
||
816 | lp_build_masklerp2d(struct lp_build_context *bld, |
||
817 | LLVMValueRef weight0, |
||
818 | LLVMValueRef weight1, |
||
819 | LLVMValueRef mask00, |
||
820 | LLVMValueRef mask01, |
||
821 | LLVMValueRef mask10, |
||
822 | LLVMValueRef mask11) |
||
823 | { |
||
824 | LLVMValueRef val0 = lp_build_masklerp(bld, weight0, mask00, mask01); |
||
825 | LLVMValueRef val1 = lp_build_masklerp(bld, weight0, mask10, mask11); |
||
826 | return lp_build_lerp(bld, weight1, val0, val1, 0); |
||
827 | } |
||
828 | |||
829 | /* |
||
830 | * this is a bit excessive code for something OpenGL just recommends |
||
831 | * but does not require. |
||
832 | */ |
||
833 | #define ACCURATE_CUBE_CORNERS 1 |
||
834 | |||
835 | /** |
||
836 | * Generate code to sample a mipmap level with linear filtering. |
||
837 | * If sampling a cube texture, r = cube face in [0,5]. |
||
838 | * If linear_mask is present, only pixels having their mask set |
||
839 | * will receive linear filtering, the rest will use nearest. |
||
840 | */ |
||
841 | static void |
||
842 | lp_build_sample_image_linear(struct lp_build_sample_context *bld, |
||
843 | boolean is_gather, |
||
844 | LLVMValueRef size, |
||
845 | LLVMValueRef linear_mask, |
||
846 | LLVMValueRef row_stride_vec, |
||
847 | LLVMValueRef img_stride_vec, |
||
848 | LLVMValueRef data_ptr, |
||
849 | LLVMValueRef mipoffsets, |
||
850 | LLVMValueRef *coords, |
||
851 | const LLVMValueRef *offsets, |
||
852 | LLVMValueRef colors_out[4]) |
||
853 | { |
||
854 | LLVMBuilderRef builder = bld->gallivm->builder; |
||
855 | struct lp_build_context *ivec_bld = &bld->int_coord_bld; |
||
856 | struct lp_build_context *coord_bld = &bld->coord_bld; |
||
857 | struct lp_build_context *texel_bld = &bld->texel_bld; |
||
858 | const unsigned dims = bld->dims; |
||
859 | LLVMValueRef width_vec; |
||
860 | LLVMValueRef height_vec; |
||
861 | LLVMValueRef depth_vec; |
||
862 | LLVMValueRef flt_size; |
||
863 | LLVMValueRef flt_width_vec; |
||
864 | LLVMValueRef flt_height_vec; |
||
865 | LLVMValueRef flt_depth_vec; |
||
866 | LLVMValueRef fall_off[4], have_corners; |
||
867 | LLVMValueRef z1 = NULL; |
||
868 | LLVMValueRef z00 = NULL, z01 = NULL, z10 = NULL, z11 = NULL; |
||
869 | LLVMValueRef x00 = NULL, x01 = NULL, x10 = NULL, x11 = NULL; |
||
870 | LLVMValueRef y00 = NULL, y01 = NULL, y10 = NULL, y11 = NULL; |
||
871 | LLVMValueRef s_fpart, t_fpart = NULL, r_fpart = NULL; |
||
872 | LLVMValueRef xs[4], ys[4], zs[4]; |
||
873 | LLVMValueRef neighbors[2][2][4]; |
||
874 | int chan, texel_index; |
||
875 | boolean seamless_cube_filter, accurate_cube_corners; |
||
876 | |||
877 | seamless_cube_filter = (bld->static_texture_state->target == PIPE_TEXTURE_CUBE || |
||
878 | bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) && |
||
879 | bld->static_sampler_state->seamless_cube_map; |
||
880 | /* |
||
881 | * XXX I don't know how this is really supposed to work with gather. From GL |
||
882 | * spec wording (not gather specific) it sounds like the 4th missing texel |
||
883 | * should be an average of the other 3, hence for gather could return this. |
||
884 | * This is however NOT how the code here works, which just fixes up the |
||
885 | * weights used for filtering instead. And of course for gather there is |
||
886 | * no filter to tweak... |
||
887 | */ |
||
888 | accurate_cube_corners = ACCURATE_CUBE_CORNERS && seamless_cube_filter && |
||
889 | !is_gather; |
||
890 | |||
891 | lp_build_extract_image_sizes(bld, |
||
892 | &bld->int_size_bld, |
||
893 | bld->int_coord_type, |
||
894 | size, |
||
895 | &width_vec, &height_vec, &depth_vec); |
||
896 | |||
897 | flt_size = lp_build_int_to_float(&bld->float_size_bld, size); |
||
898 | |||
899 | lp_build_extract_image_sizes(bld, |
||
900 | &bld->float_size_bld, |
||
901 | bld->coord_type, |
||
902 | flt_size, |
||
903 | &flt_width_vec, &flt_height_vec, &flt_depth_vec); |
||
904 | |||
905 | /* |
||
906 | * Compute integer texcoords. |
||
907 | */ |
||
908 | |||
909 | if (!seamless_cube_filter) { |
||
910 | lp_build_sample_wrap_linear(bld, coords[0], width_vec, |
||
911 | flt_width_vec, offsets[0], |
||
912 | bld->static_texture_state->pot_width, |
||
913 | bld->static_sampler_state->wrap_s, |
||
914 | &x00, &x01, &s_fpart); |
||
915 | lp_build_name(x00, "tex.x0.wrapped"); |
||
916 | lp_build_name(x01, "tex.x1.wrapped"); |
||
917 | x10 = x00; |
||
918 | x11 = x01; |
||
919 | |||
920 | if (dims >= 2) { |
||
921 | lp_build_sample_wrap_linear(bld, coords[1], height_vec, |
||
922 | flt_height_vec, offsets[1], |
||
923 | bld->static_texture_state->pot_height, |
||
924 | bld->static_sampler_state->wrap_t, |
||
925 | &y00, &y10, &t_fpart); |
||
926 | lp_build_name(y00, "tex.y0.wrapped"); |
||
927 | lp_build_name(y10, "tex.y1.wrapped"); |
||
928 | y01 = y00; |
||
929 | y11 = y10; |
||
930 | |||
931 | if (dims == 3) { |
||
932 | lp_build_sample_wrap_linear(bld, coords[2], depth_vec, |
||
933 | flt_depth_vec, offsets[2], |
||
934 | bld->static_texture_state->pot_depth, |
||
935 | bld->static_sampler_state->wrap_r, |
||
936 | &z00, &z1, &r_fpart); |
||
937 | z01 = z10 = z11 = z00; |
||
938 | lp_build_name(z00, "tex.z0.wrapped"); |
||
939 | lp_build_name(z1, "tex.z1.wrapped"); |
||
940 | } |
||
941 | } |
||
942 | if (has_layer_coord(bld->static_texture_state->target)) { |
||
943 | if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) { |
||
944 | /* add cube layer to face */ |
||
945 | z00 = z01 = z10 = z11 = z1 = |
||
946 | lp_build_add(&bld->int_coord_bld, coords[2], coords[3]); |
||
947 | } |
||
948 | else { |
||
949 | z00 = z01 = z10 = z11 = z1 = coords[2]; /* cube face or layer */ |
||
950 | } |
||
951 | lp_build_name(z00, "tex.z0.layer"); |
||
952 | lp_build_name(z1, "tex.z1.layer"); |
||
953 | } |
||
954 | } |
||
955 | else { |
||
956 | struct lp_build_if_state edge_if; |
||
957 | LLVMTypeRef int1t; |
||
958 | LLVMValueRef new_faces[4], new_xcoords[4][2], new_ycoords[4][2]; |
||
959 | LLVMValueRef coord, have_edge, have_corner; |
||
960 | LLVMValueRef fall_off_ym_notxm, fall_off_ym_notxp, fall_off_x, fall_off_y; |
||
961 | LLVMValueRef fall_off_yp_notxm, fall_off_yp_notxp; |
||
962 | LLVMValueRef x0, x1, y0, y1, y0_clamped, y1_clamped; |
||
963 | LLVMValueRef face = coords[2]; |
||
964 | LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5f); |
||
965 | LLVMValueRef length_minus_one = lp_build_sub(ivec_bld, width_vec, ivec_bld->one); |
||
966 | /* XXX drop height calcs. Could (should) do this without seamless filtering too */ |
||
967 | height_vec = width_vec; |
||
968 | flt_height_vec = flt_width_vec; |
||
969 | |||
970 | /* XXX the overflow logic is actually sort of duplicated with trilinear, |
||
971 | * since an overflow in one mip should also have a corresponding overflow |
||
972 | * in another. |
||
973 | */ |
||
974 | /* should always have normalized coords, and offsets are undefined */ |
||
975 | assert(bld->static_sampler_state->normalized_coords); |
||
976 | coord = lp_build_mul(coord_bld, coords[0], flt_width_vec); |
||
977 | /* instead of clamp, build mask if overflowed */ |
||
978 | coord = lp_build_sub(coord_bld, coord, half); |
||
979 | /* convert to int, compute lerp weight */ |
||
980 | /* not ideal with AVX (and no AVX2) */ |
||
981 | lp_build_ifloor_fract(coord_bld, coord, &x0, &s_fpart); |
||
982 | x1 = lp_build_add(ivec_bld, x0, ivec_bld->one); |
||
983 | coord = lp_build_mul(coord_bld, coords[1], flt_height_vec); |
||
984 | coord = lp_build_sub(coord_bld, coord, half); |
||
985 | lp_build_ifloor_fract(coord_bld, coord, &y0, &t_fpart); |
||
986 | y1 = lp_build_add(ivec_bld, y0, ivec_bld->one); |
||
987 | |||
988 | fall_off[0] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, x0, ivec_bld->zero); |
||
989 | fall_off[1] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, x1, length_minus_one); |
||
990 | fall_off[2] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, y0, ivec_bld->zero); |
||
991 | fall_off[3] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, y1, length_minus_one); |
||
992 | |||
993 | fall_off_x = lp_build_or(ivec_bld, fall_off[0], fall_off[1]); |
||
994 | fall_off_y = lp_build_or(ivec_bld, fall_off[2], fall_off[3]); |
||
995 | have_edge = lp_build_or(ivec_bld, fall_off_x, fall_off_y); |
||
996 | have_edge = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_edge); |
||
997 | |||
998 | /* needed for accurate corner filtering branch later, rely on 0 init */ |
||
999 | int1t = LLVMInt1TypeInContext(bld->gallivm->context); |
||
1000 | have_corners = lp_build_alloca(bld->gallivm, int1t, "have_corner"); |
||
1001 | |||
1002 | for (texel_index = 0; texel_index < 4; texel_index++) { |
||
1003 | xs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "xs"); |
||
1004 | ys[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "ys"); |
||
1005 | zs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "zs"); |
||
1006 | } |
||
1007 | |||
1008 | lp_build_if(&edge_if, bld->gallivm, have_edge); |
||
1009 | |||
1010 | have_corner = lp_build_and(ivec_bld, fall_off_x, fall_off_y); |
||
1011 | have_corner = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_corner); |
||
1012 | LLVMBuildStore(builder, have_corner, have_corners); |
||
1013 | |||
1014 | /* |
||
1015 | * Need to feed clamped values here for cheap corner handling, |
||
1016 | * but only for y coord (as when falling off both edges we only |
||
1017 | * fall off the x one) - this should be sufficient. |
||
1018 | */ |
||
1019 | y0_clamped = lp_build_max(ivec_bld, y0, ivec_bld->zero); |
||
1020 | y1_clamped = lp_build_min(ivec_bld, y1, length_minus_one); |
||
1021 | |||
1022 | /* |
||
1023 | * Get all possible new coords. |
||
1024 | */ |
||
1025 | lp_build_cube_new_coords(ivec_bld, face, |
||
1026 | x0, x1, y0_clamped, y1_clamped, |
||
1027 | length_minus_one, |
||
1028 | new_faces, new_xcoords, new_ycoords); |
||
1029 | |||
1030 | /* handle fall off x-, x+ direction */ |
||
1031 | /* determine new coords, face (not both fall_off vars can be true at same time) */ |
||
1032 | x00 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][0], x0); |
||
1033 | y00 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][0], y0_clamped); |
||
1034 | x10 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][1], x0); |
||
1035 | y10 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][1], y1_clamped); |
||
1036 | x01 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][0], x1); |
||
1037 | y01 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][0], y0_clamped); |
||
1038 | x11 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][1], x1); |
||
1039 | y11 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][1], y1_clamped); |
||
1040 | |||
1041 | z00 = z10 = lp_build_select(ivec_bld, fall_off[0], new_faces[0], face); |
||
1042 | z01 = z11 = lp_build_select(ivec_bld, fall_off[1], new_faces[1], face); |
||
1043 | |||
1044 | /* handle fall off y-, y+ direction */ |
||
1045 | /* |
||
1046 | * Cheap corner logic: just hack up things so a texel doesn't fall |
||
1047 | * off both sides (which means filter weights will be wrong but we'll only |
||
1048 | * use valid texels in the filter). |
||
1049 | * This means however (y) coords must additionally be clamped (see above). |
||
1050 | * This corner handling should be fully OpenGL (but not d3d10) compliant. |
||
1051 | */ |
||
1052 | fall_off_ym_notxm = lp_build_andnot(ivec_bld, fall_off[2], fall_off[0]); |
||
1053 | fall_off_ym_notxp = lp_build_andnot(ivec_bld, fall_off[2], fall_off[1]); |
||
1054 | fall_off_yp_notxm = lp_build_andnot(ivec_bld, fall_off[3], fall_off[0]); |
||
1055 | fall_off_yp_notxp = lp_build_andnot(ivec_bld, fall_off[3], fall_off[1]); |
||
1056 | |||
1057 | x00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_xcoords[2][0], x00); |
||
1058 | y00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_ycoords[2][0], y00); |
||
1059 | x01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_xcoords[2][1], x01); |
||
1060 | y01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_ycoords[2][1], y01); |
||
1061 | x10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_xcoords[3][0], x10); |
||
1062 | y10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_ycoords[3][0], y10); |
||
1063 | x11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_xcoords[3][1], x11); |
||
1064 | y11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_ycoords[3][1], y11); |
||
1065 | |||
1066 | z00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_faces[2], z00); |
||
1067 | z01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_faces[2], z01); |
||
1068 | z10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_faces[3], z10); |
||
1069 | z11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_faces[3], z11); |
||
1070 | |||
1071 | if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) { |
||
1072 | /* now can add cube layer to face (per sample) */ |
||
1073 | z00 = lp_build_add(ivec_bld, z00, coords[3]); |
||
1074 | z01 = lp_build_add(ivec_bld, z01, coords[3]); |
||
1075 | z10 = lp_build_add(ivec_bld, z10, coords[3]); |
||
1076 | z11 = lp_build_add(ivec_bld, z11, coords[3]); |
||
1077 | } |
||
1078 | |||
1079 | LLVMBuildStore(builder, x00, xs[0]); |
||
1080 | LLVMBuildStore(builder, x01, xs[1]); |
||
1081 | LLVMBuildStore(builder, x10, xs[2]); |
||
1082 | LLVMBuildStore(builder, x11, xs[3]); |
||
1083 | LLVMBuildStore(builder, y00, ys[0]); |
||
1084 | LLVMBuildStore(builder, y01, ys[1]); |
||
1085 | LLVMBuildStore(builder, y10, ys[2]); |
||
1086 | LLVMBuildStore(builder, y11, ys[3]); |
||
1087 | LLVMBuildStore(builder, z00, zs[0]); |
||
1088 | LLVMBuildStore(builder, z01, zs[1]); |
||
1089 | LLVMBuildStore(builder, z10, zs[2]); |
||
1090 | LLVMBuildStore(builder, z11, zs[3]); |
||
1091 | |||
1092 | lp_build_else(&edge_if); |
||
1093 | |||
1094 | LLVMBuildStore(builder, x0, xs[0]); |
||
1095 | LLVMBuildStore(builder, x1, xs[1]); |
||
1096 | LLVMBuildStore(builder, x0, xs[2]); |
||
1097 | LLVMBuildStore(builder, x1, xs[3]); |
||
1098 | LLVMBuildStore(builder, y0, ys[0]); |
||
1099 | LLVMBuildStore(builder, y0, ys[1]); |
||
1100 | LLVMBuildStore(builder, y1, ys[2]); |
||
1101 | LLVMBuildStore(builder, y1, ys[3]); |
||
1102 | if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) { |
||
1103 | LLVMValueRef cube_layer = lp_build_add(ivec_bld, face, coords[3]); |
||
1104 | LLVMBuildStore(builder, cube_layer, zs[0]); |
||
1105 | LLVMBuildStore(builder, cube_layer, zs[1]); |
||
1106 | LLVMBuildStore(builder, cube_layer, zs[2]); |
||
1107 | LLVMBuildStore(builder, cube_layer, zs[3]); |
||
1108 | } |
||
1109 | else { |
||
1110 | LLVMBuildStore(builder, face, zs[0]); |
||
1111 | LLVMBuildStore(builder, face, zs[1]); |
||
1112 | LLVMBuildStore(builder, face, zs[2]); |
||
1113 | LLVMBuildStore(builder, face, zs[3]); |
||
1114 | } |
||
1115 | |||
1116 | lp_build_endif(&edge_if); |
||
1117 | |||
1118 | x00 = LLVMBuildLoad(builder, xs[0], ""); |
||
1119 | x01 = LLVMBuildLoad(builder, xs[1], ""); |
||
1120 | x10 = LLVMBuildLoad(builder, xs[2], ""); |
||
1121 | x11 = LLVMBuildLoad(builder, xs[3], ""); |
||
1122 | y00 = LLVMBuildLoad(builder, ys[0], ""); |
||
1123 | y01 = LLVMBuildLoad(builder, ys[1], ""); |
||
1124 | y10 = LLVMBuildLoad(builder, ys[2], ""); |
||
1125 | y11 = LLVMBuildLoad(builder, ys[3], ""); |
||
1126 | z00 = LLVMBuildLoad(builder, zs[0], ""); |
||
1127 | z01 = LLVMBuildLoad(builder, zs[1], ""); |
||
1128 | z10 = LLVMBuildLoad(builder, zs[2], ""); |
||
1129 | z11 = LLVMBuildLoad(builder, zs[3], ""); |
||
1130 | } |
||
1131 | |||
1132 | if (linear_mask) { |
||
1133 | /* |
||
1134 | * Whack filter weights into place. Whatever texel had more weight is |
||
1135 | * the one which should have been selected by nearest filtering hence |
||
1136 | * just use 100% weight for it. |
||
1137 | */ |
||
1138 | struct lp_build_context *c_bld = &bld->coord_bld; |
||
1139 | LLVMValueRef w1_mask, w1_weight; |
||
1140 | LLVMValueRef half = lp_build_const_vec(bld->gallivm, c_bld->type, 0.5f); |
||
1141 | |||
1142 | w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, s_fpart, half); |
||
1143 | /* this select is really just a "and" */ |
||
1144 | w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero); |
||
1145 | s_fpart = lp_build_select(c_bld, linear_mask, s_fpart, w1_weight); |
||
1146 | if (dims >= 2) { |
||
1147 | w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, t_fpart, half); |
||
1148 | w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero); |
||
1149 | t_fpart = lp_build_select(c_bld, linear_mask, t_fpart, w1_weight); |
||
1150 | if (dims == 3) { |
||
1151 | w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, r_fpart, half); |
||
1152 | w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero); |
||
1153 | r_fpart = lp_build_select(c_bld, linear_mask, r_fpart, w1_weight); |
||
1154 | } |
||
1155 | } |
||
1156 | } |
||
1157 | |||
1158 | /* |
||
1159 | * Get texture colors. |
||
1160 | */ |
||
1161 | /* get x0/x1 texels */ |
||
1162 | lp_build_sample_texel_soa(bld, |
||
1163 | width_vec, height_vec, depth_vec, |
||
1164 | x00, y00, z00, |
||
1165 | row_stride_vec, img_stride_vec, |
||
1166 | data_ptr, mipoffsets, neighbors[0][0]); |
||
1167 | lp_build_sample_texel_soa(bld, |
||
1168 | width_vec, height_vec, depth_vec, |
||
1169 | x01, y01, z01, |
||
1170 | row_stride_vec, img_stride_vec, |
||
1171 | data_ptr, mipoffsets, neighbors[0][1]); |
||
1172 | |||
1173 | if (dims == 1) { |
||
1174 | assert(!is_gather); |
||
1175 | if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) { |
||
1176 | /* Interpolate two samples from 1D image to produce one color */ |
||
1177 | for (chan = 0; chan < 4; chan++) { |
||
1178 | colors_out[chan] = lp_build_lerp(texel_bld, s_fpart, |
||
1179 | neighbors[0][0][chan], |
||
1180 | neighbors[0][1][chan], |
||
1181 | 0); |
||
1182 | } |
||
1183 | } |
||
1184 | else { |
||
1185 | LLVMValueRef cmpval0, cmpval1; |
||
1186 | cmpval0 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]); |
||
1187 | cmpval1 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]); |
||
1188 | /* simplified lerp, AND mask with weight and add */ |
||
1189 | colors_out[0] = lp_build_masklerp(texel_bld, s_fpart, |
||
1190 | cmpval0, cmpval1); |
||
1191 | colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0]; |
||
1192 | } |
||
1193 | } |
||
1194 | else { |
||
1195 | /* 2D/3D texture */ |
||
1196 | struct lp_build_if_state corner_if; |
||
1197 | LLVMValueRef colors0[4], colorss[4]; |
||
1198 | |||
1199 | /* get x0/x1 texels at y1 */ |
||
1200 | lp_build_sample_texel_soa(bld, |
||
1201 | width_vec, height_vec, depth_vec, |
||
1202 | x10, y10, z10, |
||
1203 | row_stride_vec, img_stride_vec, |
||
1204 | data_ptr, mipoffsets, neighbors[1][0]); |
||
1205 | lp_build_sample_texel_soa(bld, |
||
1206 | width_vec, height_vec, depth_vec, |
||
1207 | x11, y11, z11, |
||
1208 | row_stride_vec, img_stride_vec, |
||
1209 | data_ptr, mipoffsets, neighbors[1][1]); |
||
1210 | |||
1211 | /* |
||
1212 | * To avoid having to duplicate linear_mask / fetch code use |
||
1213 | * another branch (with corner condition though edge would work |
||
1214 | * as well) here. |
||
1215 | */ |
||
1216 | if (accurate_cube_corners) { |
||
1217 | LLVMValueRef w00, w01, w10, w11, wx0, wy0; |
||
1218 | LLVMValueRef c_weight, c00, c01, c10, c11; |
||
1219 | LLVMValueRef have_corner, one_third, tmp; |
||
1220 | |||
1221 | colorss[0] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs"); |
||
1222 | colorss[1] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs"); |
||
1223 | colorss[2] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs"); |
||
1224 | colorss[3] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs"); |
||
1225 | |||
1226 | have_corner = LLVMBuildLoad(builder, have_corners, ""); |
||
1227 | |||
1228 | lp_build_if(&corner_if, bld->gallivm, have_corner); |
||
1229 | |||
1230 | /* |
||
1231 | * we can't use standard 2d lerp as we need per-element weight |
||
1232 | * in case of corners, so just calculate bilinear result as |
||
1233 | * w00*s00 + w01*s01 + w10*s10 + w11*s11. |
||
1234 | * (This is actually less work than using 2d lerp, 7 vs. 9 instructions, |
||
1235 | * however calculating the weights needs another 6, so actually probably |
||
1236 | * not slower than 2d lerp only for 4 channels as weights only need |
||
1237 | * to be calculated once - of course fixing the weights has additional cost.) |
||
1238 | */ |
||
1239 | wx0 = lp_build_sub(coord_bld, coord_bld->one, s_fpart); |
||
1240 | wy0 = lp_build_sub(coord_bld, coord_bld->one, t_fpart); |
||
1241 | w00 = lp_build_mul(coord_bld, wx0, wy0); |
||
1242 | w01 = lp_build_mul(coord_bld, s_fpart, wy0); |
||
1243 | w10 = lp_build_mul(coord_bld, wx0, t_fpart); |
||
1244 | w11 = lp_build_mul(coord_bld, s_fpart, t_fpart); |
||
1245 | |||
1246 | /* find corner weight */ |
||
1247 | c00 = lp_build_and(ivec_bld, fall_off[0], fall_off[2]); |
||
1248 | c_weight = lp_build_select(coord_bld, c00, w00, coord_bld->zero); |
||
1249 | c01 = lp_build_and(ivec_bld, fall_off[1], fall_off[2]); |
||
1250 | c_weight = lp_build_select(coord_bld, c01, w01, c_weight); |
||
1251 | c10 = lp_build_and(ivec_bld, fall_off[0], fall_off[3]); |
||
1252 | c_weight = lp_build_select(coord_bld, c10, w10, c_weight); |
||
1253 | c11 = lp_build_and(ivec_bld, fall_off[1], fall_off[3]); |
||
1254 | c_weight = lp_build_select(coord_bld, c11, w11, c_weight); |
||
1255 | |||
1256 | /* |
||
1257 | * add 1/3 of the corner weight to each of the 3 other samples |
||
1258 | * and null out corner weight |
||
1259 | */ |
||
1260 | one_third = lp_build_const_vec(bld->gallivm, coord_bld->type, 1.0f/3.0f); |
||
1261 | c_weight = lp_build_mul(coord_bld, c_weight, one_third); |
||
1262 | w00 = lp_build_add(coord_bld, w00, c_weight); |
||
1263 | c00 = LLVMBuildBitCast(builder, c00, coord_bld->vec_type, ""); |
||
1264 | w00 = lp_build_andnot(coord_bld, w00, c00); |
||
1265 | w01 = lp_build_add(coord_bld, w01, c_weight); |
||
1266 | c01 = LLVMBuildBitCast(builder, c01, coord_bld->vec_type, ""); |
||
1267 | w01 = lp_build_andnot(coord_bld, w01, c01); |
||
1268 | w10 = lp_build_add(coord_bld, w10, c_weight); |
||
1269 | c10 = LLVMBuildBitCast(builder, c10, coord_bld->vec_type, ""); |
||
1270 | w10 = lp_build_andnot(coord_bld, w10, c10); |
||
1271 | w11 = lp_build_add(coord_bld, w11, c_weight); |
||
1272 | c11 = LLVMBuildBitCast(builder, c11, coord_bld->vec_type, ""); |
||
1273 | w11 = lp_build_andnot(coord_bld, w11, c11); |
||
1274 | |||
1275 | if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) { |
||
1276 | for (chan = 0; chan < 4; chan++) { |
||
1277 | colors0[chan] = lp_build_mul(coord_bld, w00, neighbors[0][0][chan]); |
||
1278 | tmp = lp_build_mul(coord_bld, w01, neighbors[0][1][chan]); |
||
1279 | colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]); |
||
1280 | tmp = lp_build_mul(coord_bld, w10, neighbors[1][0][chan]); |
||
1281 | colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]); |
||
1282 | tmp = lp_build_mul(coord_bld, w11, neighbors[1][1][chan]); |
||
1283 | colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]); |
||
1284 | } |
||
1285 | } |
||
1286 | else { |
||
1287 | LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11; |
||
1288 | cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]); |
||
1289 | cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]); |
||
1290 | cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]); |
||
1291 | cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]); |
||
1292 | /* inputs to interpolation are just masks so just add masked weights together */ |
||
1293 | cmpval00 = LLVMBuildBitCast(builder, cmpval00, coord_bld->vec_type, ""); |
||
1294 | cmpval01 = LLVMBuildBitCast(builder, cmpval01, coord_bld->vec_type, ""); |
||
1295 | cmpval10 = LLVMBuildBitCast(builder, cmpval10, coord_bld->vec_type, ""); |
||
1296 | cmpval11 = LLVMBuildBitCast(builder, cmpval11, coord_bld->vec_type, ""); |
||
1297 | colors0[0] = lp_build_and(coord_bld, w00, cmpval00); |
||
1298 | tmp = lp_build_and(coord_bld, w01, cmpval01); |
||
1299 | colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]); |
||
1300 | tmp = lp_build_and(coord_bld, w10, cmpval10); |
||
1301 | colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]); |
||
1302 | tmp = lp_build_and(coord_bld, w11, cmpval11); |
||
1303 | colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]); |
||
1304 | colors0[1] = colors0[2] = colors0[3] = colors0[0]; |
||
1305 | } |
||
1306 | |||
1307 | LLVMBuildStore(builder, colors0[0], colorss[0]); |
||
1308 | LLVMBuildStore(builder, colors0[1], colorss[1]); |
||
1309 | LLVMBuildStore(builder, colors0[2], colorss[2]); |
||
1310 | LLVMBuildStore(builder, colors0[3], colorss[3]); |
||
1311 | |||
1312 | lp_build_else(&corner_if); |
||
1313 | } |
||
1314 | |||
1315 | if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) { |
||
1316 | if (is_gather) { |
||
1317 | /* |
||
1318 | * Just assign the red channel (no component selection yet). |
||
1319 | * This is a bit hackish, we usually do the swizzle at the |
||
1320 | * end of sampling (much less values to swizzle), but this |
||
1321 | * obviously cannot work when using gather. |
||
1322 | */ |
||
1323 | unsigned chan_swiz = bld->static_texture_state->swizzle_r; |
||
1324 | colors0[0] = lp_build_swizzle_soa_channel(texel_bld, |
||
1325 | neighbors[1][0], |
||
1326 | chan_swiz); |
||
1327 | colors0[1] = lp_build_swizzle_soa_channel(texel_bld, |
||
1328 | neighbors[1][1], |
||
1329 | chan_swiz); |
||
1330 | colors0[2] = lp_build_swizzle_soa_channel(texel_bld, |
||
1331 | neighbors[0][1], |
||
1332 | chan_swiz); |
||
1333 | colors0[3] = lp_build_swizzle_soa_channel(texel_bld, |
||
1334 | neighbors[0][0], |
||
1335 | chan_swiz); |
||
1336 | } |
||
1337 | else { |
||
1338 | /* Bilinear interpolate the four samples from the 2D image / 3D slice */ |
||
1339 | for (chan = 0; chan < 4; chan++) { |
||
1340 | colors0[chan] = lp_build_lerp_2d(texel_bld, |
||
1341 | s_fpart, t_fpart, |
||
1342 | neighbors[0][0][chan], |
||
1343 | neighbors[0][1][chan], |
||
1344 | neighbors[1][0][chan], |
||
1345 | neighbors[1][1][chan], |
||
1346 | 0); |
||
1347 | } |
||
1348 | } |
||
1349 | } |
||
1350 | else { |
||
1351 | LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11; |
||
1352 | cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]); |
||
1353 | cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]); |
||
1354 | cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]); |
||
1355 | cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]); |
||
1356 | |||
1357 | if (is_gather) { |
||
1358 | /* more hacks for swizzling, should be X, ONE or ZERO... */ |
||
1359 | unsigned chan_swiz = bld->static_texture_state->swizzle_r; |
||
1360 | if (chan_swiz <= PIPE_SWIZZLE_ALPHA) { |
||
1361 | colors0[0] = lp_build_select(texel_bld, cmpval10, |
||
1362 | texel_bld->one, texel_bld->zero); |
||
1363 | colors0[1] = lp_build_select(texel_bld, cmpval11, |
||
1364 | texel_bld->one, texel_bld->zero); |
||
1365 | colors0[2] = lp_build_select(texel_bld, cmpval01, |
||
1366 | texel_bld->one, texel_bld->zero); |
||
1367 | colors0[3] = lp_build_select(texel_bld, cmpval00, |
||
1368 | texel_bld->one, texel_bld->zero); |
||
1369 | } |
||
1370 | else if (chan_swiz == PIPE_SWIZZLE_ZERO) { |
||
1371 | colors0[0] = colors0[1] = colors0[2] = colors0[3] = |
||
1372 | texel_bld->zero; |
||
1373 | } |
||
1374 | else { |
||
1375 | colors0[0] = colors0[1] = colors0[2] = colors0[3] = |
||
1376 | texel_bld->one; |
||
1377 | } |
||
1378 | } |
||
1379 | else { |
||
1380 | colors0[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart, |
||
1381 | cmpval00, cmpval01, cmpval10, cmpval11); |
||
1382 | colors0[1] = colors0[2] = colors0[3] = colors0[0]; |
||
1383 | } |
||
1384 | } |
||
1385 | |||
1386 | if (accurate_cube_corners) { |
||
1387 | LLVMBuildStore(builder, colors0[0], colorss[0]); |
||
1388 | LLVMBuildStore(builder, colors0[1], colorss[1]); |
||
1389 | LLVMBuildStore(builder, colors0[2], colorss[2]); |
||
1390 | LLVMBuildStore(builder, colors0[3], colorss[3]); |
||
1391 | |||
1392 | lp_build_endif(&corner_if); |
||
1393 | |||
1394 | colors0[0] = LLVMBuildLoad(builder, colorss[0], ""); |
||
1395 | colors0[1] = LLVMBuildLoad(builder, colorss[1], ""); |
||
1396 | colors0[2] = LLVMBuildLoad(builder, colorss[2], ""); |
||
1397 | colors0[3] = LLVMBuildLoad(builder, colorss[3], ""); |
||
1398 | } |
||
1399 | |||
1400 | if (dims == 3) { |
||
1401 | LLVMValueRef neighbors1[2][2][4]; |
||
1402 | LLVMValueRef colors1[4]; |
||
1403 | |||
1404 | assert(!is_gather); |
||
1405 | |||
1406 | /* get x0/x1/y0/y1 texels at z1 */ |
||
1407 | lp_build_sample_texel_soa(bld, |
||
1408 | width_vec, height_vec, depth_vec, |
||
1409 | x00, y00, z1, |
||
1410 | row_stride_vec, img_stride_vec, |
||
1411 | data_ptr, mipoffsets, neighbors1[0][0]); |
||
1412 | lp_build_sample_texel_soa(bld, |
||
1413 | width_vec, height_vec, depth_vec, |
||
1414 | x01, y01, z1, |
||
1415 | row_stride_vec, img_stride_vec, |
||
1416 | data_ptr, mipoffsets, neighbors1[0][1]); |
||
1417 | lp_build_sample_texel_soa(bld, |
||
1418 | width_vec, height_vec, depth_vec, |
||
1419 | x10, y10, z1, |
||
1420 | row_stride_vec, img_stride_vec, |
||
1421 | data_ptr, mipoffsets, neighbors1[1][0]); |
||
1422 | lp_build_sample_texel_soa(bld, |
||
1423 | width_vec, height_vec, depth_vec, |
||
1424 | x11, y11, z1, |
||
1425 | row_stride_vec, img_stride_vec, |
||
1426 | data_ptr, mipoffsets, neighbors1[1][1]); |
||
1427 | |||
1428 | if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) { |
||
1429 | /* Bilinear interpolate the four samples from the second Z slice */ |
||
1430 | for (chan = 0; chan < 4; chan++) { |
||
1431 | colors1[chan] = lp_build_lerp_2d(texel_bld, |
||
1432 | s_fpart, t_fpart, |
||
1433 | neighbors1[0][0][chan], |
||
1434 | neighbors1[0][1][chan], |
||
1435 | neighbors1[1][0][chan], |
||
1436 | neighbors1[1][1][chan], |
||
1437 | 0); |
||
1438 | } |
||
1439 | /* Linearly interpolate the two samples from the two 3D slices */ |
||
1440 | for (chan = 0; chan < 4; chan++) { |
||
1441 | colors_out[chan] = lp_build_lerp(texel_bld, |
||
1442 | r_fpart, |
||
1443 | colors0[chan], colors1[chan], |
||
1444 | 0); |
||
1445 | } |
||
1446 | } |
||
1447 | else { |
||
1448 | LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11; |
||
1449 | cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]); |
||
1450 | cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]); |
||
1451 | cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]); |
||
1452 | cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]); |
||
1453 | colors1[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart, |
||
1454 | cmpval00, cmpval01, cmpval10, cmpval11); |
||
1455 | /* Linearly interpolate the two samples from the two 3D slices */ |
||
1456 | colors_out[0] = lp_build_lerp(texel_bld, |
||
1457 | r_fpart, |
||
1458 | colors0[0], colors1[0], |
||
1459 | 0); |
||
1460 | colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0]; |
||
1461 | } |
||
1462 | } |
||
1463 | else { |
||
1464 | /* 2D tex */ |
||
1465 | for (chan = 0; chan < 4; chan++) { |
||
1466 | colors_out[chan] = colors0[chan]; |
||
1467 | } |
||
1468 | } |
||
1469 | } |
||
1470 | } |
||
1471 | |||
1472 | |||
1473 | /** |
||
1474 | * Sample the texture/mipmap using given image filter and mip filter. |
||
1475 | * ilevel0 and ilevel1 indicate the two mipmap levels to sample |
||
1476 | * from (vectors or scalars). |
||
1477 | * If we're using nearest miplevel sampling the '1' values will be null/unused. |
||
1478 | */ |
||
1479 | static void |
||
1480 | lp_build_sample_mipmap(struct lp_build_sample_context *bld, |
||
1481 | unsigned img_filter, |
||
1482 | unsigned mip_filter, |
||
1483 | boolean is_gather, |
||
1484 | LLVMValueRef *coords, |
||
1485 | const LLVMValueRef *offsets, |
||
1486 | LLVMValueRef ilevel0, |
||
1487 | LLVMValueRef ilevel1, |
||
1488 | LLVMValueRef lod_fpart, |
||
1489 | LLVMValueRef *colors_out) |
||
1490 | { |
||
1491 | LLVMBuilderRef builder = bld->gallivm->builder; |
||
1492 | LLVMValueRef size0 = NULL; |
||
1493 | LLVMValueRef size1 = NULL; |
||
1494 | LLVMValueRef row_stride0_vec = NULL; |
||
1495 | LLVMValueRef row_stride1_vec = NULL; |
||
1496 | LLVMValueRef img_stride0_vec = NULL; |
||
1497 | LLVMValueRef img_stride1_vec = NULL; |
||
1498 | LLVMValueRef data_ptr0 = NULL; |
||
1499 | LLVMValueRef data_ptr1 = NULL; |
||
1500 | LLVMValueRef mipoff0 = NULL; |
||
1501 | LLVMValueRef mipoff1 = NULL; |
||
1502 | LLVMValueRef colors0[4], colors1[4]; |
||
1503 | unsigned chan; |
||
1504 | |||
1505 | /* sample the first mipmap level */ |
||
1506 | lp_build_mipmap_level_sizes(bld, ilevel0, |
||
1507 | &size0, |
||
1508 | &row_stride0_vec, &img_stride0_vec); |
||
1509 | if (bld->num_mips == 1) { |
||
1510 | data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); |
||
1511 | } |
||
1512 | else { |
||
1513 | /* This path should work for num_lods 1 too but slightly less efficient */ |
||
1514 | data_ptr0 = bld->base_ptr; |
||
1515 | mipoff0 = lp_build_get_mip_offsets(bld, ilevel0); |
||
1516 | } |
||
1517 | if (img_filter == PIPE_TEX_FILTER_NEAREST) { |
||
1518 | lp_build_sample_image_nearest(bld, size0, |
||
1519 | row_stride0_vec, img_stride0_vec, |
||
1520 | data_ptr0, mipoff0, coords, offsets, |
||
1521 | colors0); |
||
1522 | } |
||
1523 | else { |
||
1524 | assert(img_filter == PIPE_TEX_FILTER_LINEAR); |
||
1525 | lp_build_sample_image_linear(bld, is_gather, size0, NULL, |
||
1526 | row_stride0_vec, img_stride0_vec, |
||
1527 | data_ptr0, mipoff0, coords, offsets, |
||
1528 | colors0); |
||
1529 | } |
||
1530 | |||
1531 | /* Store the first level's colors in the output variables */ |
||
1532 | for (chan = 0; chan < 4; chan++) { |
||
1533 | LLVMBuildStore(builder, colors0[chan], colors_out[chan]); |
||
1534 | } |
||
1535 | |||
1536 | if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { |
||
1537 | struct lp_build_if_state if_ctx; |
||
1538 | LLVMValueRef need_lerp; |
||
1539 | |||
1540 | /* need_lerp = lod_fpart > 0 */ |
||
1541 | if (bld->num_lods == 1) { |
||
1542 | need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT, |
||
1543 | lod_fpart, bld->lodf_bld.zero, |
||
1544 | "need_lerp"); |
||
1545 | } |
||
1546 | else { |
||
1547 | /* |
||
1548 | * We'll do mip filtering if any of the quads (or individual |
||
1549 | * pixel in case of per-pixel lod) need it. |
||
1550 | * It might be better to split the vectors here and only fetch/filter |
||
1551 | * quads which need it (if there's one lod per quad). |
||
1552 | */ |
||
1553 | need_lerp = lp_build_compare(bld->gallivm, bld->lodf_bld.type, |
||
1554 | PIPE_FUNC_GREATER, |
||
1555 | lod_fpart, bld->lodf_bld.zero); |
||
1556 | need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, need_lerp); |
||
1557 | } |
||
1558 | |||
1559 | lp_build_if(&if_ctx, bld->gallivm, need_lerp); |
||
1560 | { |
||
1561 | /* |
||
1562 | * We unfortunately need to clamp lod_fpart here since we can get |
||
1563 | * negative values which would screw up filtering if not all |
||
1564 | * lod_fpart values have same sign. |
||
1565 | */ |
||
1566 | lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart, |
||
1567 | bld->lodf_bld.zero); |
||
1568 | /* sample the second mipmap level */ |
||
1569 | lp_build_mipmap_level_sizes(bld, ilevel1, |
||
1570 | &size1, |
||
1571 | &row_stride1_vec, &img_stride1_vec); |
||
1572 | if (bld->num_mips == 1) { |
||
1573 | data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); |
||
1574 | } |
||
1575 | else { |
||
1576 | data_ptr1 = bld->base_ptr; |
||
1577 | mipoff1 = lp_build_get_mip_offsets(bld, ilevel1); |
||
1578 | } |
||
1579 | if (img_filter == PIPE_TEX_FILTER_NEAREST) { |
||
1580 | lp_build_sample_image_nearest(bld, size1, |
||
1581 | row_stride1_vec, img_stride1_vec, |
||
1582 | data_ptr1, mipoff1, coords, offsets, |
||
1583 | colors1); |
||
1584 | } |
||
1585 | else { |
||
1586 | lp_build_sample_image_linear(bld, FALSE, size1, NULL, |
||
1587 | row_stride1_vec, img_stride1_vec, |
||
1588 | data_ptr1, mipoff1, coords, offsets, |
||
1589 | colors1); |
||
1590 | } |
||
1591 | |||
1592 | /* interpolate samples from the two mipmap levels */ |
||
1593 | |||
1594 | if (bld->num_lods != bld->coord_type.length) |
||
1595 | lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm, |
||
1596 | bld->lodf_bld.type, |
||
1597 | bld->texel_bld.type, |
||
1598 | lod_fpart); |
||
1599 | |||
1600 | for (chan = 0; chan < 4; chan++) { |
||
1601 | colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart, |
||
1602 | colors0[chan], colors1[chan], |
||
1603 | 0); |
||
1604 | LLVMBuildStore(builder, colors0[chan], colors_out[chan]); |
||
1605 | } |
||
1606 | } |
||
1607 | lp_build_endif(&if_ctx); |
||
1608 | } |
||
1609 | } |
||
1610 | |||
1611 | |||
1612 | /** |
||
1613 | * Sample the texture/mipmap using given mip filter, and using |
||
1614 | * both nearest and linear filtering at the same time depending |
||
1615 | * on linear_mask. |
||
1616 | * lod can be per quad but linear_mask is always per pixel. |
||
1617 | * ilevel0 and ilevel1 indicate the two mipmap levels to sample |
||
1618 | * from (vectors or scalars). |
||
1619 | * If we're using nearest miplevel sampling the '1' values will be null/unused. |
||
1620 | */ |
||
1621 | static void |
||
1622 | lp_build_sample_mipmap_both(struct lp_build_sample_context *bld, |
||
1623 | LLVMValueRef linear_mask, |
||
1624 | unsigned mip_filter, |
||
1625 | LLVMValueRef *coords, |
||
1626 | const LLVMValueRef *offsets, |
||
1627 | LLVMValueRef ilevel0, |
||
1628 | LLVMValueRef ilevel1, |
||
1629 | LLVMValueRef lod_fpart, |
||
1630 | LLVMValueRef lod_positive, |
||
1631 | LLVMValueRef *colors_out) |
||
1632 | { |
||
1633 | LLVMBuilderRef builder = bld->gallivm->builder; |
||
1634 | LLVMValueRef size0 = NULL; |
||
1635 | LLVMValueRef size1 = NULL; |
||
1636 | LLVMValueRef row_stride0_vec = NULL; |
||
1637 | LLVMValueRef row_stride1_vec = NULL; |
||
1638 | LLVMValueRef img_stride0_vec = NULL; |
||
1639 | LLVMValueRef img_stride1_vec = NULL; |
||
1640 | LLVMValueRef data_ptr0 = NULL; |
||
1641 | LLVMValueRef data_ptr1 = NULL; |
||
1642 | LLVMValueRef mipoff0 = NULL; |
||
1643 | LLVMValueRef mipoff1 = NULL; |
||
1644 | LLVMValueRef colors0[4], colors1[4]; |
||
1645 | unsigned chan; |
||
1646 | |||
1647 | /* sample the first mipmap level */ |
||
1648 | lp_build_mipmap_level_sizes(bld, ilevel0, |
||
1649 | &size0, |
||
1650 | &row_stride0_vec, &img_stride0_vec); |
||
1651 | if (bld->num_mips == 1) { |
||
1652 | data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); |
||
1653 | } |
||
1654 | else { |
||
1655 | /* This path should work for num_lods 1 too but slightly less efficient */ |
||
1656 | data_ptr0 = bld->base_ptr; |
||
1657 | mipoff0 = lp_build_get_mip_offsets(bld, ilevel0); |
||
1658 | } |
||
1659 | |||
1660 | lp_build_sample_image_linear(bld, FALSE, size0, linear_mask, |
||
1661 | row_stride0_vec, img_stride0_vec, |
||
1662 | data_ptr0, mipoff0, coords, offsets, |
||
1663 | colors0); |
||
1664 | |||
1665 | /* Store the first level's colors in the output variables */ |
||
1666 | for (chan = 0; chan < 4; chan++) { |
||
1667 | LLVMBuildStore(builder, colors0[chan], colors_out[chan]); |
||
1668 | } |
||
1669 | |||
1670 | if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { |
||
1671 | struct lp_build_if_state if_ctx; |
||
1672 | LLVMValueRef need_lerp; |
||
1673 | |||
1674 | /* |
||
1675 | * We'll do mip filtering if any of the quads (or individual |
||
1676 | * pixel in case of per-pixel lod) need it. |
||
1677 | * Note using lod_positive here not lod_fpart since it may be the same |
||
1678 | * condition as that used in the outer "if" in the caller hence llvm |
||
1679 | * should be able to merge the branches in this case. |
||
1680 | */ |
||
1681 | need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_positive); |
||
1682 | |||
1683 | lp_build_if(&if_ctx, bld->gallivm, need_lerp); |
||
1684 | { |
||
1685 | /* |
||
1686 | * We unfortunately need to clamp lod_fpart here since we can get |
||
1687 | * negative values which would screw up filtering if not all |
||
1688 | * lod_fpart values have same sign. |
||
1689 | */ |
||
1690 | lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart, |
||
1691 | bld->lodf_bld.zero); |
||
1692 | /* sample the second mipmap level */ |
||
1693 | lp_build_mipmap_level_sizes(bld, ilevel1, |
||
1694 | &size1, |
||
1695 | &row_stride1_vec, &img_stride1_vec); |
||
1696 | if (bld->num_mips == 1) { |
||
1697 | data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); |
||
1698 | } |
||
1699 | else { |
||
1700 | data_ptr1 = bld->base_ptr; |
||
1701 | mipoff1 = lp_build_get_mip_offsets(bld, ilevel1); |
||
1702 | } |
||
1703 | |||
1704 | lp_build_sample_image_linear(bld, FALSE, size1, linear_mask, |
||
1705 | row_stride1_vec, img_stride1_vec, |
||
1706 | data_ptr1, mipoff1, coords, offsets, |
||
1707 | colors1); |
||
1708 | |||
1709 | /* interpolate samples from the two mipmap levels */ |
||
1710 | |||
1711 | if (bld->num_lods != bld->coord_type.length) |
||
1712 | lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm, |
||
1713 | bld->lodf_bld.type, |
||
1714 | bld->texel_bld.type, |
||
1715 | lod_fpart); |
||
1716 | |||
1717 | for (chan = 0; chan < 4; chan++) { |
||
1718 | colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart, |
||
1719 | colors0[chan], colors1[chan], |
||
1720 | 0); |
||
1721 | LLVMBuildStore(builder, colors0[chan], colors_out[chan]); |
||
1722 | } |
||
1723 | } |
||
1724 | lp_build_endif(&if_ctx); |
||
1725 | } |
||
1726 | } |
||
1727 | |||
1728 | |||
1729 | /** |
||
1730 | * Build (per-coord) layer value. |
||
1731 | * Either clamp layer to valid values or fill in optional out_of_bounds |
||
1732 | * value and just return value unclamped. |
||
1733 | */ |
||
1734 | static LLVMValueRef |
||
1735 | lp_build_layer_coord(struct lp_build_sample_context *bld, |
||
1736 | unsigned texture_unit, |
||
1737 | boolean is_cube_array, |
||
1738 | LLVMValueRef layer, |
||
1739 | LLVMValueRef *out_of_bounds) |
||
1740 | { |
||
1741 | LLVMValueRef num_layers; |
||
1742 | struct lp_build_context *int_coord_bld = &bld->int_coord_bld; |
||
1743 | |||
1744 | num_layers = bld->dynamic_state->depth(bld->dynamic_state, bld->gallivm, |
||
1745 | bld->context_ptr, texture_unit); |
||
1746 | |||
1747 | if (out_of_bounds) { |
||
1748 | LLVMValueRef out1, out; |
||
1749 | assert(!is_cube_array); |
||
1750 | num_layers = lp_build_broadcast_scalar(int_coord_bld, num_layers); |
||
1751 | out = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, layer, int_coord_bld->zero); |
||
1752 | out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, layer, num_layers); |
||
1753 | *out_of_bounds = lp_build_or(int_coord_bld, out, out1); |
||
1754 | return layer; |
||
1755 | } |
||
1756 | else { |
||
1757 | LLVMValueRef maxlayer; |
||
1758 | LLVMValueRef s = is_cube_array ? lp_build_const_int32(bld->gallivm, 6) : |
||
1759 | bld->int_bld.one; |
||
1760 | maxlayer = lp_build_sub(&bld->int_bld, num_layers, s); |
||
1761 | maxlayer = lp_build_broadcast_scalar(int_coord_bld, maxlayer); |
||
1762 | return lp_build_clamp(int_coord_bld, layer, int_coord_bld->zero, maxlayer); |
||
1763 | } |
||
1764 | } |
||
1765 | |||
1766 | |||
1767 | /** |
||
1768 | * Calculate cube face, lod, mip levels. |
||
1769 | */ |
||
1770 | static void |
||
1771 | lp_build_sample_common(struct lp_build_sample_context *bld, |
||
1772 | unsigned texture_index, |
||
1773 | unsigned sampler_index, |
||
1774 | LLVMValueRef *coords, |
||
1775 | const struct lp_derivatives *derivs, /* optional */ |
||
1776 | LLVMValueRef lod_bias, /* optional */ |
||
1777 | LLVMValueRef explicit_lod, /* optional */ |
||
1778 | LLVMValueRef *lod_pos_or_zero, |
||
1779 | LLVMValueRef *lod_fpart, |
||
1780 | LLVMValueRef *ilevel0, |
||
1781 | LLVMValueRef *ilevel1) |
||
1782 | { |
||
1783 | const unsigned mip_filter = bld->static_sampler_state->min_mip_filter; |
||
1784 | const unsigned min_filter = bld->static_sampler_state->min_img_filter; |
||
1785 | const unsigned mag_filter = bld->static_sampler_state->mag_img_filter; |
||
1786 | const unsigned target = bld->static_texture_state->target; |
||
1787 | LLVMValueRef first_level, cube_rho = NULL; |
||
1788 | LLVMValueRef lod_ipart = NULL; |
||
1789 | struct lp_derivatives cube_derivs; |
||
1790 | |||
1791 | /* |
||
1792 | printf("%s mip %d min %d mag %d\n", __FUNCTION__, |
||
1793 | mip_filter, min_filter, mag_filter); |
||
1794 | */ |
||
1795 | |||
1796 | /* |
||
1797 | * Choose cube face, recompute texcoords for the chosen face and |
||
1798 | * compute rho here too (as it requires transform of derivatives). |
||
1799 | */ |
||
1800 | if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY) { |
||
1801 | boolean need_derivs; |
||
1802 | need_derivs = ((min_filter != mag_filter || |
||
1803 | mip_filter != PIPE_TEX_MIPFILTER_NONE) && |
||
1804 | !bld->static_sampler_state->min_max_lod_equal && |
||
1805 | !explicit_lod); |
||
1806 | lp_build_cube_lookup(bld, coords, derivs, &cube_rho, &cube_derivs, need_derivs); |
||
1807 | derivs = &cube_derivs; |
||
1808 | if (target == PIPE_TEXTURE_CUBE_ARRAY) { |
||
1809 | /* calculate cube layer coord now */ |
||
1810 | LLVMValueRef layer = lp_build_iround(&bld->coord_bld, coords[3]); |
||
1811 | LLVMValueRef six = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 6); |
||
1812 | layer = lp_build_mul(&bld->int_coord_bld, layer, six); |
||
1813 | coords[3] = lp_build_layer_coord(bld, texture_index, TRUE, layer, NULL); |
||
1814 | /* because of seamless filtering can't add it to face (coords[2]) here. */ |
||
1815 | } |
||
1816 | } |
||
1817 | else if (target == PIPE_TEXTURE_1D_ARRAY || |
||
1818 | target == PIPE_TEXTURE_2D_ARRAY) { |
||
1819 | coords[2] = lp_build_iround(&bld->coord_bld, coords[2]); |
||
1820 | coords[2] = lp_build_layer_coord(bld, texture_index, FALSE, coords[2], NULL); |
||
1821 | } |
||
1822 | |||
1823 | if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) { |
||
1824 | /* |
||
1825 | * Clamp p coords to [0,1] for fixed function depth texture format here. |
||
1826 | * Technically this is not entirely correct for unorm depth as the ref value |
||
1827 | * should be converted to the depth format (quantization!) and comparison |
||
1828 | * then done in texture format. This would actually help performance (since |
||
1829 | * only need to do it once and could save the per-sample conversion of texels |
||
1830 | * to floats instead), but it would need more messy code (would need to push |
||
1831 | * at least some bits down to actual fetch so conversion could be skipped, |
||
1832 | * and would have ugly interaction with border color, would need to convert |
||
1833 | * border color to that format too or do some other tricks to make it work). |
||
1834 | */ |
||
1835 | const struct util_format_description *format_desc = bld->format_desc; |
||
1836 | unsigned chan_type; |
||
1837 | /* not entirely sure we couldn't end up with non-valid swizzle here */ |
||
1838 | chan_type = format_desc->swizzle[0] <= UTIL_FORMAT_SWIZZLE_W ? |
||
1839 | format_desc->channel[format_desc->swizzle[0]].type : |
||
1840 | UTIL_FORMAT_TYPE_FLOAT; |
||
1841 | if (chan_type != UTIL_FORMAT_TYPE_FLOAT) { |
||
1842 | coords[4] = lp_build_clamp(&bld->coord_bld, coords[4], |
||
1843 | bld->coord_bld.zero, bld->coord_bld.one); |
||
1844 | } |
||
1845 | } |
||
1846 | |||
1847 | /* |
||
1848 | * Compute the level of detail (float). |
||
1849 | */ |
||
1850 | if (min_filter != mag_filter || |
||
1851 | mip_filter != PIPE_TEX_MIPFILTER_NONE) { |
||
1852 | /* Need to compute lod either to choose mipmap levels or to |
||
1853 | * distinguish between minification/magnification with one mipmap level. |
||
1854 | */ |
||
1855 | lp_build_lod_selector(bld, texture_index, sampler_index, |
||
1856 | coords[0], coords[1], coords[2], cube_rho, |
||
1857 | derivs, lod_bias, explicit_lod, |
||
1858 | mip_filter, |
||
1859 | &lod_ipart, lod_fpart, lod_pos_or_zero); |
||
1860 | } else { |
||
1861 | lod_ipart = bld->lodi_bld.zero; |
||
1862 | *lod_pos_or_zero = bld->lodi_bld.zero; |
||
1863 | } |
||
1864 | |||
1865 | if (bld->num_lods != bld->num_mips) { |
||
1866 | /* only makes sense if there's just a single mip level */ |
||
1867 | assert(bld->num_mips == 1); |
||
1868 | lod_ipart = lp_build_extract_range(bld->gallivm, lod_ipart, 0, 1); |
||
1869 | } |
||
1870 | |||
1871 | /* |
||
1872 | * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1 |
||
1873 | */ |
||
1874 | switch (mip_filter) { |
||
1875 | default: |
||
1876 | assert(0 && "bad mip_filter value in lp_build_sample_soa()"); |
||
1877 | /* fall-through */ |
||
1878 | case PIPE_TEX_MIPFILTER_NONE: |
||
1879 | /* always use mip level 0 */ |
||
1880 | first_level = bld->dynamic_state->first_level(bld->dynamic_state, |
||
1881 | bld->gallivm, bld->context_ptr, |
||
1882 | texture_index); |
||
1883 | first_level = lp_build_broadcast_scalar(&bld->leveli_bld, first_level); |
||
1884 | *ilevel0 = first_level; |
||
1885 | break; |
||
1886 | case PIPE_TEX_MIPFILTER_NEAREST: |
||
1887 | assert(lod_ipart); |
||
1888 | lp_build_nearest_mip_level(bld, texture_index, lod_ipart, ilevel0, NULL); |
||
1889 | break; |
||
1890 | case PIPE_TEX_MIPFILTER_LINEAR: |
||
1891 | assert(lod_ipart); |
||
1892 | assert(*lod_fpart); |
||
1893 | lp_build_linear_mip_levels(bld, texture_index, |
||
1894 | lod_ipart, lod_fpart, |
||
1895 | ilevel0, ilevel1); |
||
1896 | break; |
||
1897 | } |
||
1898 | } |
||
1899 | |||
1900 | static void |
||
1901 | lp_build_clamp_border_color(struct lp_build_sample_context *bld, |
||
1902 | unsigned sampler_unit) |
||
1903 | { |
||
1904 | struct gallivm_state *gallivm = bld->gallivm; |
||
1905 | LLVMBuilderRef builder = gallivm->builder; |
||
1906 | LLVMValueRef border_color_ptr = |
||
1907 | bld->dynamic_state->border_color(bld->dynamic_state, gallivm, |
||
1908 | bld->context_ptr, sampler_unit); |
||
1909 | LLVMValueRef border_color; |
||
1910 | const struct util_format_description *format_desc = bld->format_desc; |
||
1911 | struct lp_type vec4_type = bld->texel_type; |
||
1912 | struct lp_build_context vec4_bld; |
||
1913 | LLVMValueRef min_clamp = NULL; |
||
1914 | LLVMValueRef max_clamp = NULL; |
||
1915 | |||
1916 | /* |
||
1917 | * For normalized format need to clamp border color (technically |
||
1918 | * probably should also quantize the data). Really sucks doing this |
||
1919 | * here but can't avoid at least for now since this is part of |
||
1920 | * sampler state and texture format is part of sampler_view state. |
||
1921 | * GL expects also expects clamping for uint/sint formats too so |
||
1922 | * do that as well (d3d10 can't end up here with uint/sint since it |
||
1923 | * only supports them with ld). |
||
1924 | */ |
||
1925 | vec4_type.length = 4; |
||
1926 | lp_build_context_init(&vec4_bld, gallivm, vec4_type); |
||
1927 | |||
1928 | /* |
||
1929 | * Vectorized clamping of border color. Loading is a bit of a hack since |
||
1930 | * we just cast the pointer to float array to pointer to vec4 |
||
1931 | * (int or float). |
||
1932 | */ |
||
1933 | border_color_ptr = lp_build_array_get_ptr(gallivm, border_color_ptr, |
||
1934 | lp_build_const_int32(gallivm, 0)); |
||
1935 | border_color_ptr = LLVMBuildBitCast(builder, border_color_ptr, |
||
1936 | LLVMPointerType(vec4_bld.vec_type, 0), ""); |
||
1937 | border_color = LLVMBuildLoad(builder, border_color_ptr, ""); |
||
1938 | /* we don't have aligned type in the dynamic state unfortunately */ |
||
1939 | lp_set_load_alignment(border_color, 4); |
||
1940 | |||
1941 | /* |
||
1942 | * Instead of having some incredibly complex logic which will try to figure out |
||
1943 | * clamping necessary for each channel, simply use the first channel, and treat |
||
1944 | * mixed signed/unsigned normalized formats specially. |
||
1945 | * (Mixed non-normalized, which wouldn't work at all here, do not exist for a |
||
1946 | * good reason.) |
||
1947 | */ |
||
1948 | if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) { |
||
1949 | int chan; |
||
1950 | /* d/s needs special handling because both present means just sampling depth */ |
||
1951 | if (util_format_is_depth_and_stencil(format_desc->format)) { |
||
1952 | chan = format_desc->swizzle[0]; |
||
1953 | } |
||
1954 | else { |
||
1955 | chan = util_format_get_first_non_void_channel(format_desc->format); |
||
1956 | } |
||
1957 | if (chan >= 0 && chan <= UTIL_FORMAT_SWIZZLE_W) { |
||
1958 | unsigned chan_type = format_desc->channel[chan].type; |
||
1959 | unsigned chan_norm = format_desc->channel[chan].normalized; |
||
1960 | unsigned chan_pure = format_desc->channel[chan].pure_integer; |
||
1961 | if (chan_type == UTIL_FORMAT_TYPE_SIGNED) { |
||
1962 | if (chan_norm) { |
||
1963 | min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F); |
||
1964 | max_clamp = vec4_bld.one; |
||
1965 | } |
||
1966 | else if (chan_pure) { |
||
1967 | /* |
||
1968 | * Border color was stored as int, hence need min/max clamp |
||
1969 | * only if chan has less than 32 bits.. |
||
1970 | */ |
||
1971 | unsigned chan_size = format_desc->channel[chan].size; |
||
1972 | if (chan_size < 32) { |
||
1973 | min_clamp = lp_build_const_int_vec(gallivm, vec4_type, |
||
1974 | |||
1975 | max_clamp = lp_build_const_int_vec(gallivm, vec4_type, |
||
1976 | (1 << (chan_size - 1)) - 1); |
||
1977 | } |
||
1978 | } |
||
1979 | /* TODO: no idea about non-pure, non-normalized! */ |
||
1980 | } |
||
1981 | else if (chan_type == UTIL_FORMAT_TYPE_UNSIGNED) { |
||
1982 | if (chan_norm) { |
||
1983 | min_clamp = vec4_bld.zero; |
||
1984 | max_clamp = vec4_bld.one; |
||
1985 | } |
||
1986 | /* |
||
1987 | * Need a ugly hack here, because we don't have Z32_FLOAT_X8X24 |
||
1988 | * we use Z32_FLOAT_S8X24 to imply sampling depth component |
||
1989 | * and ignoring stencil, which will blow up here if we try to |
||
1990 | * do a uint clamp in a float texel build... |
||
1991 | * And even if we had that format, mesa st also thinks using z24s8 |
||
1992 | * means depth sampling ignoring stencil. |
||
1993 | */ |
||
1994 | else if (chan_pure) { |
||
1995 | /* |
||
1996 | * Border color was stored as uint, hence never need min |
||
1997 | * clamp, and only need max clamp if chan has less than 32 bits. |
||
1998 | */ |
||
1999 | unsigned chan_size = format_desc->channel[chan].size; |
||
2000 | if (chan_size < 32) { |
||
2001 | max_clamp = lp_build_const_int_vec(gallivm, vec4_type, |
||
2002 | (1 << chan_size) - 1); |
||
2003 | } |
||
2004 | /* TODO: no idea about non-pure, non-normalized! */ |
||
2005 | } |
||
2006 | } |
||
2007 | else if (chan_type == UTIL_FORMAT_TYPE_FIXED) { |
||
2008 | /* TODO: I have no idea what clamp this would need if any! */ |
||
2009 | } |
||
2010 | } |
||
2011 | /* mixed plain formats (or different pure size) */ |
||
2012 | switch (format_desc->format) { |
||
2013 | case PIPE_FORMAT_B10G10R10A2_UINT: |
||
2014 | case PIPE_FORMAT_R10G10B10A2_UINT: |
||
2015 | { |
||
2016 | unsigned max10 = (1 << 10) - 1; |
||
2017 | max_clamp = lp_build_const_aos(gallivm, vec4_type, max10, max10, |
||
2018 | max10, (1 << 2) - 1, NULL); |
||
2019 | } |
||
2020 | break; |
||
2021 | case PIPE_FORMAT_R10SG10SB10SA2U_NORM: |
||
2022 | min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F, |
||
2023 | -1.0F, 0.0F, NULL); |
||
2024 | max_clamp = vec4_bld.one; |
||
2025 | break; |
||
2026 | case PIPE_FORMAT_R8SG8SB8UX8U_NORM: |
||
2027 | case PIPE_FORMAT_R5SG5SB6U_NORM: |
||
2028 | min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F, |
||
2029 | 0.0F, 0.0F, NULL); |
||
2030 | max_clamp = vec4_bld.one; |
||
2031 | break; |
||
2032 | default: |
||
2033 | break; |
||
2034 | } |
||
2035 | } |
||
2036 | else { |
||
2037 | /* cannot figure this out from format description */ |
||
2038 | if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) { |
||
2039 | /* s3tc formats are always unorm */ |
||
2040 | min_clamp = vec4_bld.zero; |
||
2041 | max_clamp = vec4_bld.one; |
||
2042 | } |
||
2043 | else if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC || |
||
2044 | format_desc->layout == UTIL_FORMAT_LAYOUT_ETC) { |
||
2045 | switch (format_desc->format) { |
||
2046 | case PIPE_FORMAT_RGTC1_UNORM: |
||
2047 | case PIPE_FORMAT_RGTC2_UNORM: |
||
2048 | case PIPE_FORMAT_LATC1_UNORM: |
||
2049 | case PIPE_FORMAT_LATC2_UNORM: |
||
2050 | case PIPE_FORMAT_ETC1_RGB8: |
||
2051 | min_clamp = vec4_bld.zero; |
||
2052 | max_clamp = vec4_bld.one; |
||
2053 | break; |
||
2054 | case PIPE_FORMAT_RGTC1_SNORM: |
||
2055 | case PIPE_FORMAT_RGTC2_SNORM: |
||
2056 | case PIPE_FORMAT_LATC1_SNORM: |
||
2057 | case PIPE_FORMAT_LATC2_SNORM: |
||
2058 | min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F); |
||
2059 | max_clamp = vec4_bld.one; |
||
2060 | break; |
||
2061 | default: |
||
2062 | assert(0); |
||
2063 | break; |
||
2064 | } |
||
2065 | } |
||
2066 | /* |
||
2067 | * all others from subsampled/other group, though we don't care |
||
2068 | * about yuv (and should not have any from zs here) |
||
2069 | */ |
||
2070 | else if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_YUV){ |
||
2071 | switch (format_desc->format) { |
||
2072 | case PIPE_FORMAT_R8G8_B8G8_UNORM: |
||
2073 | case PIPE_FORMAT_G8R8_G8B8_UNORM: |
||
2074 | case PIPE_FORMAT_G8R8_B8R8_UNORM: |
||
2075 | case PIPE_FORMAT_R8G8_R8B8_UNORM: |
||
2076 | case PIPE_FORMAT_R1_UNORM: /* doesn't make sense but ah well */ |
||
2077 | min_clamp = vec4_bld.zero; |
||
2078 | max_clamp = vec4_bld.one; |
||
2079 | break; |
||
2080 | case PIPE_FORMAT_R8G8Bx_SNORM: |
||
2081 | min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F); |
||
2082 | max_clamp = vec4_bld.one; |
||
2083 | break; |
||
2084 | /* |
||
2085 | * Note smallfloat formats usually don't need clamping |
||
2086 | * (they still have infinite range) however this is not |
||
2087 | * true for r11g11b10 and r9g9b9e5, which can't represent |
||
2088 | * negative numbers (and additionally r9g9b9e5 can't represent |
||
2089 | * very large numbers). d3d10 seems happy without clamping in |
||
2090 | * this case, but gl spec is pretty clear: "for floating |
||
2091 | * point and integer formats, border values are clamped to |
||
2092 | * the representable range of the format" so do that here. |
||
2093 | */ |
||
2094 | case PIPE_FORMAT_R11G11B10_FLOAT: |
||
2095 | min_clamp = vec4_bld.zero; |
||
2096 | break; |
||
2097 | case PIPE_FORMAT_R9G9B9E5_FLOAT: |
||
2098 | min_clamp = vec4_bld.zero; |
||
2099 | max_clamp = lp_build_const_vec(gallivm, vec4_type, MAX_RGB9E5); |
||
2100 | break; |
||
2101 | default: |
||
2102 | assert(0); |
||
2103 | break; |
||
2104 | } |
||
2105 | } |
||
2106 | } |
||
2107 | |||
2108 | if (min_clamp) { |
||
2109 | border_color = lp_build_max(&vec4_bld, border_color, min_clamp); |
||
2110 | } |
||
2111 | if (max_clamp) { |
||
2112 | border_color = lp_build_min(&vec4_bld, border_color, max_clamp); |
||
2113 | } |
||
2114 | |||
2115 | bld->border_color_clamped = border_color; |
||
2116 | } |
||
2117 | |||
2118 | |||
2119 | /** |
||
2120 | * General texture sampling codegen. |
||
2121 | * This function handles texture sampling for all texture targets (1D, |
||
2122 | * 2D, 3D, cube) and all filtering modes. |
||
2123 | */ |
||
2124 | static void |
||
2125 | lp_build_sample_general(struct lp_build_sample_context *bld, |
||
2126 | unsigned sampler_unit, |
||
2127 | boolean is_gather, |
||
2128 | LLVMValueRef *coords, |
||
2129 | const LLVMValueRef *offsets, |
||
2130 | LLVMValueRef lod_positive, |
||
2131 | LLVMValueRef lod_fpart, |
||
2132 | LLVMValueRef ilevel0, |
||
2133 | LLVMValueRef ilevel1, |
||
2134 | LLVMValueRef *colors_out) |
||
2135 | { |
||
2136 | LLVMBuilderRef builder = bld->gallivm->builder; |
||
2137 | const struct lp_static_sampler_state *sampler_state = bld->static_sampler_state; |
||
2138 | const unsigned mip_filter = sampler_state->min_mip_filter; |
||
2139 | const unsigned min_filter = sampler_state->min_img_filter; |
||
2140 | const unsigned mag_filter = sampler_state->mag_img_filter; |
||
2141 | LLVMValueRef texels[4]; |
||
2142 | unsigned chan; |
||
2143 | |||
2144 | /* if we need border color, (potentially) clamp it now */ |
||
2145 | if (lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_s, |
||
2146 | min_filter, |
||
2147 | mag_filter) || |
||
2148 | (bld->dims > 1 && |
||
2149 | lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_t, |
||
2150 | min_filter, |
||
2151 | mag_filter)) || |
||
2152 | (bld->dims > 2 && |
||
2153 | lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_r, |
||
2154 | min_filter, |
||
2155 | mag_filter))) { |
||
2156 | lp_build_clamp_border_color(bld, sampler_unit); |
||
2157 | } |
||
2158 | |||
2159 | |||
2160 | /* |
||
2161 | * Get/interpolate texture colors. |
||
2162 | */ |
||
2163 | |||
2164 | for (chan = 0; chan < 4; ++chan) { |
||
2165 | texels[chan] = lp_build_alloca(bld->gallivm, bld->texel_bld.vec_type, ""); |
||
2166 | lp_build_name(texels[chan], "sampler%u_texel_%c_var", sampler_unit, "xyzw"[chan]); |
||
2167 | } |
||
2168 | |||
2169 | if (min_filter == mag_filter) { |
||
2170 | /* no need to distinguish between minification and magnification */ |
||
2171 | lp_build_sample_mipmap(bld, min_filter, mip_filter, |
||
2172 | is_gather, |
||
2173 | coords, offsets, |
||
2174 | ilevel0, ilevel1, lod_fpart, |
||
2175 | texels); |
||
2176 | } |
||
2177 | else { |
||
2178 | /* |
||
2179 | * Could also get rid of the if-logic and always use mipmap_both, both |
||
2180 | * for the single lod and multi-lod case if nothing really uses this. |
||
2181 | */ |
||
2182 | if (bld->num_lods == 1) { |
||
2183 | /* Emit conditional to choose min image filter or mag image filter |
||
2184 | * depending on the lod being > 0 or <= 0, respectively. |
||
2185 | */ |
||
2186 | struct lp_build_if_state if_ctx; |
||
2187 | |||
2188 | lod_positive = LLVMBuildTrunc(builder, lod_positive, |
||
2189 | LLVMInt1TypeInContext(bld->gallivm->context), ""); |
||
2190 | |||
2191 | lp_build_if(&if_ctx, bld->gallivm, lod_positive); |
||
2192 | { |
||
2193 | /* Use the minification filter */ |
||
2194 | lp_build_sample_mipmap(bld, min_filter, mip_filter, FALSE, |
||
2195 | coords, offsets, |
||
2196 | ilevel0, ilevel1, lod_fpart, |
||
2197 | texels); |
||
2198 | } |
||
2199 | lp_build_else(&if_ctx); |
||
2200 | { |
||
2201 | /* Use the magnification filter */ |
||
2202 | lp_build_sample_mipmap(bld, mag_filter, PIPE_TEX_MIPFILTER_NONE, |
||
2203 | FALSE, |
||
2204 | coords, offsets, |
||
2205 | ilevel0, NULL, NULL, |
||
2206 | texels); |
||
2207 | } |
||
2208 | lp_build_endif(&if_ctx); |
||
2209 | } |
||
2210 | else { |
||
2211 | LLVMValueRef need_linear, linear_mask; |
||
2212 | unsigned mip_filter_for_nearest; |
||
2213 | struct lp_build_if_state if_ctx; |
||
2214 | |||
2215 | if (min_filter == PIPE_TEX_FILTER_LINEAR) { |
||
2216 | linear_mask = lod_positive; |
||
2217 | mip_filter_for_nearest = PIPE_TEX_MIPFILTER_NONE; |
||
2218 | } |
||
2219 | else { |
||
2220 | linear_mask = lp_build_not(&bld->lodi_bld, lod_positive); |
||
2221 | mip_filter_for_nearest = mip_filter; |
||
2222 | } |
||
2223 | need_linear = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, |
||
2224 | linear_mask); |
||
2225 | |||
2226 | if (bld->num_lods != bld->coord_type.length) { |
||
2227 | linear_mask = lp_build_unpack_broadcast_aos_scalars(bld->gallivm, |
||
2228 | bld->lodi_type, |
||
2229 | bld->int_coord_type, |
||
2230 | linear_mask); |
||
2231 | } |
||
2232 | |||
2233 | lp_build_if(&if_ctx, bld->gallivm, need_linear); |
||
2234 | { |
||
2235 | /* |
||
2236 | * Do sampling with both filters simultaneously. This means using |
||
2237 | * a linear filter and doing some tricks (with weights) for the pixels |
||
2238 | * which need nearest filter. |
||
2239 | * Note that it's probably rare some pixels need nearest and some |
||
2240 | * linear filter but the fixups required for the nearest pixels |
||
2241 | * aren't all that complicated so just always run a combined path |
||
2242 | * if at least some pixels require linear. |
||
2243 | */ |
||
2244 | lp_build_sample_mipmap_both(bld, linear_mask, mip_filter, |
||
2245 | coords, offsets, |
||
2246 | ilevel0, ilevel1, |
||
2247 | lod_fpart, lod_positive, |
||
2248 | texels); |
||
2249 | } |
||
2250 | lp_build_else(&if_ctx); |
||
2251 | { |
||
2252 | /* |
||
2253 | * All pixels require just nearest filtering, which is way |
||
2254 | * cheaper than linear, hence do a separate path for that. |
||
2255 | */ |
||
2256 | lp_build_sample_mipmap(bld, PIPE_TEX_FILTER_NEAREST, FALSE, |
||
2257 | mip_filter_for_nearest, |
||
2258 | coords, offsets, |
||
2259 | ilevel0, ilevel1, lod_fpart, |
||
2260 | texels); |
||
2261 | } |
||
2262 | lp_build_endif(&if_ctx); |
||
2263 | } |
||
2264 | } |
||
2265 | |||
2266 | for (chan = 0; chan < 4; ++chan) { |
||
2267 | colors_out[chan] = LLVMBuildLoad(builder, texels[chan], ""); |
||
2268 | lp_build_name(colors_out[chan], "sampler%u_texel_%c", sampler_unit, "xyzw"[chan]); |
||
2269 | } |
||
2270 | } |
||
2271 | |||
2272 | |||
2273 | /** |
||
2274 | * Texel fetch function. |
||
2275 | * In contrast to general sampling there is no filtering, no coord minification, |
||
2276 | * lod (if any) is always explicit uint, coords are uints (in terms of texel units) |
||
2277 | * directly to be applied to the selected mip level (after adding texel offsets). |
||
2278 | * This function handles texel fetch for all targets where texel fetch is supported |
||
2279 | * (no cube maps, but 1d, 2d, 3d are supported, arrays and buffers should be too). |
||
2280 | */ |
||
2281 | static void |
||
2282 | lp_build_fetch_texel(struct lp_build_sample_context *bld, |
||
2283 | unsigned texture_unit, |
||
2284 | const LLVMValueRef *coords, |
||
2285 | LLVMValueRef explicit_lod, |
||
2286 | const LLVMValueRef *offsets, |
||
2287 | LLVMValueRef *colors_out) |
||
2288 | { |
||
2289 | struct lp_build_context *perquadi_bld = &bld->lodi_bld; |
||
2290 | struct lp_build_context *int_coord_bld = &bld->int_coord_bld; |
||
2291 | unsigned dims = bld->dims, chan; |
||
2292 | unsigned target = bld->static_texture_state->target; |
||
2293 | boolean out_of_bound_ret_zero = TRUE; |
||
2294 | LLVMValueRef size, ilevel; |
||
2295 | LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL; |
||
2296 | LLVMValueRef x = coords[0], y = coords[1], z = coords[2]; |
||
2297 | LLVMValueRef width, height, depth, i, j; |
||
2298 | LLVMValueRef offset, out_of_bounds, out1; |
||
2299 | |||
2300 | out_of_bounds = int_coord_bld->zero; |
||
2301 | |||
2302 | if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) { |
||
2303 | if (bld->num_mips != int_coord_bld->type.length) { |
||
2304 | ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type, |
||
2305 | perquadi_bld->type, explicit_lod, 0); |
||
2306 | } |
||
2307 | else { |
||
2308 | ilevel = explicit_lod; |
||
2309 | } |
||
2310 | lp_build_nearest_mip_level(bld, texture_unit, ilevel, &ilevel, |
||
2311 | out_of_bound_ret_zero ? &out_of_bounds : NULL); |
||
2312 | } |
||
2313 | else { |
||
2314 | assert(bld->num_mips == 1); |
||
2315 | if (bld->static_texture_state->target != PIPE_BUFFER) { |
||
2316 | ilevel = bld->dynamic_state->first_level(bld->dynamic_state, bld->gallivm, |
||
2317 | bld->context_ptr, texture_unit); |
||
2318 | } |
||
2319 | else { |
||
2320 | ilevel = lp_build_const_int32(bld->gallivm, 0); |
||
2321 | } |
||
2322 | } |
||
2323 | lp_build_mipmap_level_sizes(bld, ilevel, |
||
2324 | &size, |
||
2325 | &row_stride_vec, &img_stride_vec); |
||
2326 | lp_build_extract_image_sizes(bld, &bld->int_size_bld, int_coord_bld->type, |
||
2327 | size, &width, &height, &depth); |
||
2328 | |||
2329 | if (target == PIPE_TEXTURE_1D_ARRAY || |
||
2330 | target == PIPE_TEXTURE_2D_ARRAY) { |
||
2331 | if (out_of_bound_ret_zero) { |
||
2332 | z = lp_build_layer_coord(bld, texture_unit, FALSE, z, &out1); |
||
2333 | out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); |
||
2334 | } |
||
2335 | else { |
||
2336 | z = lp_build_layer_coord(bld, texture_unit, FALSE, z, NULL); |
||
2337 | } |
||
2338 | } |
||
2339 | |||
2340 | /* This is a lot like border sampling */ |
||
2341 | if (offsets[0]) { |
||
2342 | /* |
||
2343 | * coords are really unsigned, offsets are signed, but I don't think |
||
2344 | * exceeding 31 bits is possible |
||
2345 | */ |
||
2346 | x = lp_build_add(int_coord_bld, x, offsets[0]); |
||
2347 | } |
||
2348 | out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero); |
||
2349 | out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); |
||
2350 | out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width); |
||
2351 | out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); |
||
2352 | |||
2353 | if (dims >= 2) { |
||
2354 | if (offsets[1]) { |
||
2355 | y = lp_build_add(int_coord_bld, y, offsets[1]); |
||
2356 | } |
||
2357 | out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero); |
||
2358 | out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); |
||
2359 | out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height); |
||
2360 | out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); |
||
2361 | |||
2362 | if (dims >= 3) { |
||
2363 | if (offsets[2]) { |
||
2364 | z = lp_build_add(int_coord_bld, z, offsets[2]); |
||
2365 | } |
||
2366 | out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero); |
||
2367 | out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); |
||
2368 | out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth); |
||
2369 | out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1); |
||
2370 | } |
||
2371 | } |
||
2372 | |||
2373 | lp_build_sample_offset(int_coord_bld, |
||
2374 | bld->format_desc, |
||
2375 | x, y, z, row_stride_vec, img_stride_vec, |
||
2376 | &offset, &i, &j); |
||
2377 | |||
2378 | if (bld->static_texture_state->target != PIPE_BUFFER) { |
||
2379 | offset = lp_build_add(int_coord_bld, offset, |
||
2380 | lp_build_get_mip_offsets(bld, ilevel)); |
||
2381 | } |
||
2382 | |||
2383 | offset = lp_build_andnot(int_coord_bld, offset, out_of_bounds); |
||
2384 | |||
2385 | lp_build_fetch_rgba_soa(bld->gallivm, |
||
2386 | bld->format_desc, |
||
2387 | bld->texel_type, |
||
2388 | bld->base_ptr, offset, |
||
2389 | i, j, |
||
2390 | colors_out); |
||
2391 | |||
2392 | if (out_of_bound_ret_zero) { |
||
2393 | /* |
||
2394 | * Only needed for ARB_robust_buffer_access_behavior and d3d10. |
||
2395 | * Could use min/max above instead of out-of-bounds comparisons |
||
2396 | * if we don't care about the result returned for out-of-bounds. |
||
2397 | */ |
||
2398 | for (chan = 0; chan < 4; chan++) { |
||
2399 | colors_out[chan] = lp_build_select(&bld->texel_bld, out_of_bounds, |
||
2400 | bld->texel_bld.zero, colors_out[chan]); |
||
2401 | } |
||
2402 | } |
||
2403 | } |
||
2404 | |||
2405 | |||
2406 | /** |
||
2407 | * Just set texels to white instead of actually sampling the texture. |
||
2408 | * For debugging. |
||
2409 | */ |
||
2410 | void |
||
2411 | lp_build_sample_nop(struct gallivm_state *gallivm, |
||
2412 | struct lp_type type, |
||
2413 | const LLVMValueRef *coords, |
||
2414 | LLVMValueRef texel_out[4]) |
||
2415 | { |
||
2416 | LLVMValueRef one = lp_build_one(gallivm, type); |
||
2417 | unsigned chan; |
||
2418 | |||
2419 | for (chan = 0; chan < 4; chan++) { |
||
2420 | texel_out[chan] = one; |
||
2421 | } |
||
2422 | } |
||
2423 | |||
2424 | |||
2425 | /** |
||
2426 | * Build the actual texture sampling code. |
||
2427 | * 'texel' will return a vector of four LLVMValueRefs corresponding to |
||
2428 | * R, G, B, A. |
||
2429 | * \param type vector float type to use for coords, etc. |
||
2430 | * \param sample_key |
||
2431 | * \param derivs partial derivatives of (s,t,r,q) with respect to x and y |
||
2432 | */ |
||
2433 | static void |
||
2434 | lp_build_sample_soa_code(struct gallivm_state *gallivm, |
||
2435 | const struct lp_static_texture_state *static_texture_state, |
||
2436 | const struct lp_static_sampler_state *static_sampler_state, |
||
2437 | struct lp_sampler_dynamic_state *dynamic_state, |
||
2438 | struct lp_type type, |
||
2439 | unsigned sample_key, |
||
2440 | unsigned texture_index, |
||
2441 | unsigned sampler_index, |
||
2442 | LLVMValueRef context_ptr, |
||
2443 | const LLVMValueRef *coords, |
||
2444 | const LLVMValueRef *offsets, |
||
2445 | const struct lp_derivatives *derivs, /* optional */ |
||
2446 | LLVMValueRef lod, /* optional */ |
||
2447 | LLVMValueRef texel_out[4]) |
||
2448 | { |
||
2449 | unsigned target = static_texture_state->target; |
||
2450 | unsigned dims = texture_dims(target); |
||
2451 | unsigned num_quads = type.length / 4; |
||
2452 | unsigned mip_filter, min_img_filter, mag_img_filter, i; |
||
2453 | struct lp_build_sample_context bld; |
||
2454 | struct lp_static_sampler_state derived_sampler_state = *static_sampler_state; |
||
2455 | LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context); |
||
2456 | LLVMBuilderRef builder = gallivm->builder; |
||
2457 | LLVMValueRef tex_width, newcoords[5]; |
||
2458 | enum lp_sampler_lod_property lod_property; |
||
2459 | enum lp_sampler_lod_control lod_control; |
||
2460 | enum lp_sampler_op_type op_type; |
||
2461 | LLVMValueRef lod_bias = NULL; |
||
2462 | LLVMValueRef explicit_lod = NULL; |
||
2463 | boolean op_is_tex; |
||
2464 | |||
2465 | if (0) { |
||
2466 | enum pipe_format fmt = static_texture_state->format; |
||
2467 | debug_printf("Sample from %s\n", util_format_name(fmt)); |
||
2468 | } |
||
2469 | |||
2470 | lod_property = (sample_key & LP_SAMPLER_LOD_PROPERTY_MASK) >> |
||
2471 | LP_SAMPLER_LOD_PROPERTY_SHIFT; |
||
2472 | lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >> |
||
2473 | LP_SAMPLER_LOD_CONTROL_SHIFT; |
||
2474 | op_type = (sample_key & LP_SAMPLER_OP_TYPE_MASK) >> |
||
2475 | LP_SAMPLER_OP_TYPE_SHIFT; |
||
2476 | |||
2477 | op_is_tex = op_type == LP_SAMPLER_OP_TEXTURE; |
||
2478 | |||
2479 | if (lod_control == LP_SAMPLER_LOD_BIAS) { |
||
2480 | lod_bias = lod; |
||
2481 | assert(lod); |
||
2482 | assert(derivs == NULL); |
||
2483 | } |
||
2484 | else if (lod_control == LP_SAMPLER_LOD_EXPLICIT) { |
||
2485 | explicit_lod = lod; |
||
2486 | assert(lod); |
||
2487 | assert(derivs == NULL); |
||
2488 | } |
||
2489 | else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) { |
||
2490 | assert(derivs); |
||
2491 | assert(lod == NULL); |
||
2492 | } |
||
2493 | else { |
||
2494 | assert(derivs == NULL); |
||
2495 | assert(lod == NULL); |
||
2496 | } |
||
2497 | |||
2498 | if (static_texture_state->format == PIPE_FORMAT_NONE) { |
||
2499 | /* |
||
2500 | * If there's nothing bound, format is NONE, and we must return |
||
2501 | * all zero as mandated by d3d10 in this case. |
||
2502 | */ |
||
2503 | unsigned chan; |
||
2504 | LLVMValueRef zero = lp_build_const_vec(gallivm, type, 0.0F); |
||
2505 | for (chan = 0; chan < 4; chan++) { |
||
2506 | texel_out[chan] = zero; |
||
2507 | } |
||
2508 | return; |
||
2509 | } |
||
2510 | |||
2511 | assert(type.floating); |
||
2512 | |||
2513 | /* Setup our build context */ |
||
2514 | memset(&bld, 0, sizeof bld); |
||
2515 | bld.gallivm = gallivm; |
||
2516 | bld.context_ptr = context_ptr; |
||
2517 | bld.static_sampler_state = &derived_sampler_state; |
||
2518 | bld.static_texture_state = static_texture_state; |
||
2519 | bld.dynamic_state = dynamic_state; |
||
2520 | bld.format_desc = util_format_description(static_texture_state->format); |
||
2521 | bld.dims = dims; |
||
2522 | |||
2523 | bld.vector_width = lp_type_width(type); |
||
2524 | |||
2525 | bld.float_type = lp_type_float(32); |
||
2526 | bld.int_type = lp_type_int(32); |
||
2527 | bld.coord_type = type; |
||
2528 | bld.int_coord_type = lp_int_type(type); |
||
2529 | bld.float_size_in_type = lp_type_float(32); |
||
2530 | bld.float_size_in_type.length = dims > 1 ? 4 : 1; |
||
2531 | bld.int_size_in_type = lp_int_type(bld.float_size_in_type); |
||
2532 | bld.texel_type = type; |
||
2533 | |||
2534 | /* always using the first channel hopefully should be safe, |
||
2535 | * if not things WILL break in other places anyway. |
||
2536 | */ |
||
2537 | if (bld.format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB && |
||
2538 | bld.format_desc->channel[0].pure_integer) { |
||
2539 | if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) { |
||
2540 | bld.texel_type = lp_type_int_vec(type.width, type.width * type.length); |
||
2541 | } |
||
2542 | else if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) { |
||
2543 | bld.texel_type = lp_type_uint_vec(type.width, type.width * type.length); |
||
2544 | } |
||
2545 | } |
||
2546 | else if (util_format_has_stencil(bld.format_desc) && |
||
2547 | !util_format_has_depth(bld.format_desc)) { |
||
2548 | /* for stencil only formats, sample stencil (uint) */ |
||
2549 | bld.texel_type = lp_type_int_vec(type.width, type.width * type.length); |
||
2550 | } |
||
2551 | |||
2552 | if (!static_texture_state->level_zero_only) { |
||
2553 | derived_sampler_state.min_mip_filter = static_sampler_state->min_mip_filter; |
||
2554 | } else { |
||
2555 | derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; |
||
2556 | } |
||
2557 | if (op_type == LP_SAMPLER_OP_GATHER) { |
||
2558 | /* |
||
2559 | * gather4 is exactly like GL_LINEAR filtering but in the end skipping |
||
2560 | * the actual filtering. Using mostly the same paths, so cube face |
||
2561 | * selection, coord wrapping etc. all naturally uses the same code. |
||
2562 | */ |
||
2563 | derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE; |
||
2564 | derived_sampler_state.min_img_filter = PIPE_TEX_FILTER_LINEAR; |
||
2565 | derived_sampler_state.mag_img_filter = PIPE_TEX_FILTER_LINEAR; |
||
2566 | } |
||
2567 | mip_filter = derived_sampler_state.min_mip_filter; |
||
2568 | |||
2569 | if (0) { |
||
2570 | debug_printf(" .min_mip_filter = %u\n", derived_sampler_state.min_mip_filter); |
||
2571 | } |
||
2572 | |||
2573 | if (static_texture_state->target == PIPE_TEXTURE_CUBE || |
||
2574 | static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) |
||
2575 | { |
||
2576 | /* |
||
2577 | * Seamless filtering ignores wrap modes. |
||
2578 | * Setting to CLAMP_TO_EDGE is correct for nearest filtering, for |
||
2579 | * bilinear it's not correct but way better than using for instance repeat. |
||
2580 | * Note we even set this for non-seamless. Technically GL allows any wrap |
||
2581 | * mode, which made sense when supporting true borders (can get seamless |
||
2582 | * effect with border and CLAMP_TO_BORDER), but gallium doesn't support |
||
2583 | * borders and d3d9 requires wrap modes to be ignored and it's a pain to fix |
||
2584 | * up the sampler state (as it makes it texture dependent). |
||
2585 | */ |
||
2586 | derived_sampler_state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE; |
||
2587 | derived_sampler_state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE; |
||
2588 | } |
||
2589 | |||
2590 | min_img_filter = derived_sampler_state.min_img_filter; |
||
2591 | mag_img_filter = derived_sampler_state.mag_img_filter; |
||
2592 | |||
2593 | |||
2594 | /* |
||
2595 | * This is all a bit complicated different paths are chosen for performance |
||
2596 | * reasons. |
||
2597 | * Essentially, there can be 1 lod per element, 1 lod per quad or 1 lod for |
||
2598 | * everything (the last two options are equivalent for 4-wide case). |
||
2599 | * If there's per-quad lod but we split to 4-wide so we can use AoS, per-quad |
||
2600 | * lod is calculated then the lod value extracted afterwards so making this |
||
2601 | * case basically the same as far as lod handling is concerned for the |
||
2602 | * further sample/filter code as the 1 lod for everything case. |
||
2603 | * Different lod handling mostly shows up when building mipmap sizes |
||
2604 | * (lp_build_mipmap_level_sizes() and friends) and also in filtering |
||
2605 | * (getting the fractional part of the lod to the right texels). |
||
2606 | */ |
||
2607 | |||
2608 | /* |
||
2609 | * There are other situations where at least the multiple int lods could be |
||
2610 | * avoided like min and max lod being equal. |
||
2611 | */ |
||
2612 | bld.num_mips = bld.num_lods = 1; |
||
2613 | |||
2614 | if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) && |
||
2615 | (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) && |
||
2616 | (static_texture_state->target == PIPE_TEXTURE_CUBE || |
||
2617 | static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) && |
||
2618 | (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) { |
||
2619 | /* |
||
2620 | * special case for using per-pixel lod even for implicit lod, |
||
2621 | * which is generally never required (ok by APIs) except to please |
||
2622 | * some (somewhat broken imho) tests (because per-pixel face selection |
||
2623 | * can cause derivatives to be different for pixels outside the primitive |
||
2624 | * due to the major axis division even if pre-project derivatives are |
||
2625 | * looking normal). |
||
2626 | */ |
||
2627 | bld.num_mips = type.length; |
||
2628 | bld.num_lods = type.length; |
||
2629 | } |
||
2630 | else if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT || |
||
2631 | (explicit_lod || lod_bias || derivs)) { |
||
2632 | if ((!op_is_tex && target != PIPE_BUFFER) || |
||
2633 | (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) { |
||
2634 | bld.num_mips = type.length; |
||
2635 | bld.num_lods = type.length; |
||
2636 | } |
||
2637 | else if (op_is_tex && min_img_filter != mag_img_filter) { |
||
2638 | bld.num_mips = 1; |
||
2639 | bld.num_lods = type.length; |
||
2640 | } |
||
2641 | } |
||
2642 | /* TODO: for true scalar_lod should only use 1 lod value */ |
||
2643 | else if ((!op_is_tex && explicit_lod && target != PIPE_BUFFER) || |
||
2644 | (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) { |
||
2645 | bld.num_mips = num_quads; |
||
2646 | bld.num_lods = num_quads; |
||
2647 | } |
||
2648 | else if (op_is_tex && min_img_filter != mag_img_filter) { |
||
2649 | bld.num_mips = 1; |
||
2650 | bld.num_lods = num_quads; |
||
2651 | } |
||
2652 | |||
2653 | |||
2654 | bld.lodf_type = type; |
||
2655 | /* we want native vector size to be able to use our intrinsics */ |
||
2656 | if (bld.num_lods != type.length) { |
||
2657 | /* TODO: this currently always has to be per-quad or per-element */ |
||
2658 | bld.lodf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1; |
||
2659 | } |
||
2660 | bld.lodi_type = lp_int_type(bld.lodf_type); |
||
2661 | bld.levelf_type = bld.lodf_type; |
||
2662 | if (bld.num_mips == 1) { |
||
2663 | bld.levelf_type.length = 1; |
||
2664 | } |
||
2665 | bld.leveli_type = lp_int_type(bld.levelf_type); |
||
2666 | bld.float_size_type = bld.float_size_in_type; |
||
2667 | /* Note: size vectors may not be native. They contain minified w/h/d/_ values, |
||
2668 | * with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to 8x4f32 */ |
||
2669 | if (bld.num_mips > 1) { |
||
2670 | bld.float_size_type.length = bld.num_mips == type.length ? |
||
2671 | bld.num_mips * bld.float_size_in_type.length : |
||
2672 | type.length; |
||
2673 | } |
||
2674 | bld.int_size_type = lp_int_type(bld.float_size_type); |
||
2675 | |||
2676 | lp_build_context_init(&bld.float_bld, gallivm, bld.float_type); |
||
2677 | lp_build_context_init(&bld.float_vec_bld, gallivm, type); |
||
2678 | lp_build_context_init(&bld.int_bld, gallivm, bld.int_type); |
||
2679 | lp_build_context_init(&bld.coord_bld, gallivm, bld.coord_type); |
||
2680 | lp_build_context_init(&bld.int_coord_bld, gallivm, bld.int_coord_type); |
||
2681 | lp_build_context_init(&bld.int_size_in_bld, gallivm, bld.int_size_in_type); |
||
2682 | lp_build_context_init(&bld.float_size_in_bld, gallivm, bld.float_size_in_type); |
||
2683 | lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type); |
||
2684 | lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type); |
||
2685 | lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type); |
||
2686 | lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type); |
||
2687 | lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type); |
||
2688 | lp_build_context_init(&bld.lodf_bld, gallivm, bld.lodf_type); |
||
2689 | lp_build_context_init(&bld.lodi_bld, gallivm, bld.lodi_type); |
||
2690 | |||
2691 | /* Get the dynamic state */ |
||
2692 | tex_width = dynamic_state->width(dynamic_state, gallivm, |
||
2693 | context_ptr, texture_index); |
||
2694 | bld.row_stride_array = dynamic_state->row_stride(dynamic_state, gallivm, |
||
2695 | context_ptr, texture_index); |
||
2696 | bld.img_stride_array = dynamic_state->img_stride(dynamic_state, gallivm, |
||
2697 | context_ptr, texture_index); |
||
2698 | bld.base_ptr = dynamic_state->base_ptr(dynamic_state, gallivm, |
||
2699 | context_ptr, texture_index); |
||
2700 | bld.mip_offsets = dynamic_state->mip_offsets(dynamic_state, gallivm, |
||
2701 | context_ptr, texture_index); |
||
2702 | /* Note that mip_offsets is an array[level] of offsets to texture images */ |
||
2703 | |||
2704 | /* width, height, depth as single int vector */ |
||
2705 | if (dims <= 1) { |
||
2706 | bld.int_size = tex_width; |
||
2707 | } |
||
2708 | else { |
||
2709 | bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef, |
||
2710 | tex_width, |
||
2711 | LLVMConstInt(i32t, 0, 0), ""); |
||
2712 | if (dims >= 2) { |
||
2713 | LLVMValueRef tex_height = |
||
2714 | dynamic_state->height(dynamic_state, gallivm, |
||
2715 | context_ptr, texture_index); |
||
2716 | bld.int_size = LLVMBuildInsertElement(builder, bld.int_size, |
||
2717 | tex_height, |
||
2718 | LLVMConstInt(i32t, 1, 0), ""); |
||
2719 | if (dims >= 3) { |
||
2720 | LLVMValueRef tex_depth = |
||
2721 | dynamic_state->depth(dynamic_state, gallivm, context_ptr, |
||
2722 | texture_index); |
||
2723 | bld.int_size = LLVMBuildInsertElement(builder, bld.int_size, |
||
2724 | tex_depth, |
||
2725 | LLVMConstInt(i32t, 2, 0), ""); |
||
2726 | } |
||
2727 | } |
||
2728 | } |
||
2729 | |||
2730 | for (i = 0; i < 5; i++) { |
||
2731 | newcoords[i] = coords[i]; |
||
2732 | } |
||
2733 | |||
2734 | if (0) { |
||
2735 | /* For debug: no-op texture sampling */ |
||
2736 | lp_build_sample_nop(gallivm, |
||
2737 | bld.texel_type, |
||
2738 | newcoords, |
||
2739 | texel_out); |
||
2740 | } |
||
2741 | |||
2742 | else if (op_type == LP_SAMPLER_OP_FETCH) { |
||
2743 | lp_build_fetch_texel(&bld, texture_index, newcoords, |
||
2744 | lod, offsets, |
||
2745 | texel_out); |
||
2746 | } |
||
2747 | |||
2748 | else { |
||
2749 | LLVMValueRef lod_fpart = NULL, lod_positive = NULL; |
||
2750 | LLVMValueRef ilevel0 = NULL, ilevel1 = NULL; |
||
2751 | boolean use_aos = util_format_fits_8unorm(bld.format_desc) && |
||
2752 | op_is_tex && |
||
2753 | /* not sure this is strictly needed or simply impossible */ |
||
2754 | derived_sampler_state.compare_mode == PIPE_TEX_COMPARE_NONE && |
||
2755 | lp_is_simple_wrap_mode(derived_sampler_state.wrap_s); |
||
2756 | |||
2757 | use_aos &= bld.num_lods <= num_quads || |
||
2758 | derived_sampler_state.min_img_filter == |
||
2759 | derived_sampler_state.mag_img_filter; |
||
2760 | if (dims > 1) { |
||
2761 | use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_t); |
||
2762 | if (dims > 2) { |
||
2763 | use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_r); |
||
2764 | } |
||
2765 | } |
||
2766 | if ((static_texture_state->target == PIPE_TEXTURE_CUBE || |
||
2767 | static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) && |
||
2768 | derived_sampler_state.seamless_cube_map && |
||
2769 | (derived_sampler_state.min_img_filter == PIPE_TEX_FILTER_LINEAR || |
||
2770 | derived_sampler_state.mag_img_filter == PIPE_TEX_FILTER_LINEAR)) { |
||
2771 | /* theoretically possible with AoS filtering but not implemented (complex!) */ |
||
2772 | use_aos = 0; |
||
2773 | } |
||
2774 | |||
2775 | if ((gallivm_debug & GALLIVM_DEBUG_PERF) && |
||
2776 | !use_aos && util_format_fits_8unorm(bld.format_desc)) { |
||
2777 | debug_printf("%s: using floating point linear filtering for %s\n", |
||
2778 | __FUNCTION__, bld.format_desc->short_name); |
||
2779 | debug_printf(" min_img %d mag_img %d mip %d target %d seamless %d" |
||
2780 | " wraps %d wrapt %d wrapr %d\n", |
||
2781 | derived_sampler_state.min_img_filter, |
||
2782 | derived_sampler_state.mag_img_filter, |
||
2783 | derived_sampler_state.min_mip_filter, |
||
2784 | static_texture_state->target, |
||
2785 | derived_sampler_state.seamless_cube_map, |
||
2786 | derived_sampler_state.wrap_s, |
||
2787 | derived_sampler_state.wrap_t, |
||
2788 | derived_sampler_state.wrap_r); |
||
2789 | } |
||
2790 | |||
2791 | lp_build_sample_common(&bld, texture_index, sampler_index, |
||
2792 | newcoords, |
||
2793 | derivs, lod_bias, explicit_lod, |
||
2794 | &lod_positive, &lod_fpart, |
||
2795 | &ilevel0, &ilevel1); |
||
2796 | |||
2797 | if (use_aos && static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) { |
||
2798 | /* The aos path doesn't do seamless filtering so simply add cube layer |
||
2799 | * to face now. |
||
2800 | */ |
||
2801 | newcoords[2] = lp_build_add(&bld.int_coord_bld, newcoords[2], newcoords[3]); |
||
2802 | } |
||
2803 | |||
2804 | /* |
||
2805 | * we only try 8-wide sampling with soa as it appears to |
||
2806 | * be a loss with aos with AVX (but it should work, except |
||
2807 | * for conformance if min_filter != mag_filter if num_lods > 1). |
||
2808 | * (It should be faster if we'd support avx2) |
||
2809 | */ |
||
2810 | if (num_quads == 1 || !use_aos) { |
||
2811 | if (use_aos) { |
||
2812 | /* do sampling/filtering with fixed pt arithmetic */ |
||
2813 | lp_build_sample_aos(&bld, sampler_index, |
||
2814 | newcoords[0], newcoords[1], |
||
2815 | newcoords[2], |
||
2816 | offsets, lod_positive, lod_fpart, |
||
2817 | ilevel0, ilevel1, |
||
2818 | texel_out); |
||
2819 | } |
||
2820 | |||
2821 | else { |
||
2822 | lp_build_sample_general(&bld, sampler_index, |
||
2823 | op_type == LP_SAMPLER_OP_GATHER, |
||
2824 | newcoords, offsets, |
||
2825 | lod_positive, lod_fpart, |
||
2826 | ilevel0, ilevel1, |
||
2827 | texel_out); |
||
2828 | } |
||
2829 | } |
||
2830 | else { |
||
2831 | unsigned j; |
||
2832 | struct lp_build_sample_context bld4; |
||
2833 | struct lp_type type4 = type; |
||
2834 | unsigned i; |
||
2835 | LLVMValueRef texelout4[4]; |
||
2836 | LLVMValueRef texelouttmp[4][LP_MAX_VECTOR_LENGTH/16]; |
||
2837 | |||
2838 | type4.length = 4; |
||
2839 | |||
2840 | /* Setup our build context */ |
||
2841 | memset(&bld4, 0, sizeof bld4); |
||
2842 | bld4.gallivm = bld.gallivm; |
||
2843 | bld4.context_ptr = bld.context_ptr; |
||
2844 | bld4.static_texture_state = bld.static_texture_state; |
||
2845 | bld4.static_sampler_state = bld.static_sampler_state; |
||
2846 | bld4.dynamic_state = bld.dynamic_state; |
||
2847 | bld4.format_desc = bld.format_desc; |
||
2848 | bld4.dims = bld.dims; |
||
2849 | bld4.row_stride_array = bld.row_stride_array; |
||
2850 | bld4.img_stride_array = bld.img_stride_array; |
||
2851 | bld4.base_ptr = bld.base_ptr; |
||
2852 | bld4.mip_offsets = bld.mip_offsets; |
||
2853 | bld4.int_size = bld.int_size; |
||
2854 | |||
2855 | bld4.vector_width = lp_type_width(type4); |
||
2856 | |||
2857 | bld4.float_type = lp_type_float(32); |
||
2858 | bld4.int_type = lp_type_int(32); |
||
2859 | bld4.coord_type = type4; |
||
2860 | bld4.int_coord_type = lp_int_type(type4); |
||
2861 | bld4.float_size_in_type = lp_type_float(32); |
||
2862 | bld4.float_size_in_type.length = dims > 1 ? 4 : 1; |
||
2863 | bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type); |
||
2864 | bld4.texel_type = bld.texel_type; |
||
2865 | bld4.texel_type.length = 4; |
||
2866 | |||
2867 | bld4.num_mips = bld4.num_lods = 1; |
||
2868 | if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) && |
||
2869 | (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) && |
||
2870 | (static_texture_state->target == PIPE_TEXTURE_CUBE || |
||
2871 | static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) && |
||
2872 | (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) { |
||
2873 | bld4.num_mips = type4.length; |
||
2874 | bld4.num_lods = type4.length; |
||
2875 | } |
||
2876 | if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT && |
||
2877 | (explicit_lod || lod_bias || derivs)) { |
||
2878 | if ((!op_is_tex && target != PIPE_BUFFER) || |
||
2879 | (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) { |
||
2880 | bld4.num_mips = type4.length; |
||
2881 | bld4.num_lods = type4.length; |
||
2882 | } |
||
2883 | else if (op_is_tex && min_img_filter != mag_img_filter) { |
||
2884 | bld4.num_mips = 1; |
||
2885 | bld4.num_lods = type4.length; |
||
2886 | } |
||
2887 | } |
||
2888 | |||
2889 | /* we want native vector size to be able to use our intrinsics */ |
||
2890 | bld4.lodf_type = type4; |
||
2891 | if (bld4.num_lods != type4.length) { |
||
2892 | bld4.lodf_type.length = 1; |
||
2893 | } |
||
2894 | bld4.lodi_type = lp_int_type(bld4.lodf_type); |
||
2895 | bld4.levelf_type = type4; |
||
2896 | if (bld4.num_mips != type4.length) { |
||
2897 | bld4.levelf_type.length = 1; |
||
2898 | } |
||
2899 | bld4.leveli_type = lp_int_type(bld4.levelf_type); |
||
2900 | bld4.float_size_type = bld4.float_size_in_type; |
||
2901 | if (bld4.num_mips > 1) { |
||
2902 | bld4.float_size_type.length = bld4.num_mips == type4.length ? |
||
2903 | bld4.num_mips * bld4.float_size_in_type.length : |
||
2904 | type4.length; |
||
2905 | } |
||
2906 | bld4.int_size_type = lp_int_type(bld4.float_size_type); |
||
2907 | |||
2908 | lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type); |
||
2909 | lp_build_context_init(&bld4.float_vec_bld, gallivm, type4); |
||
2910 | lp_build_context_init(&bld4.int_bld, gallivm, bld4.int_type); |
||
2911 | lp_build_context_init(&bld4.coord_bld, gallivm, bld4.coord_type); |
||
2912 | lp_build_context_init(&bld4.int_coord_bld, gallivm, bld4.int_coord_type); |
||
2913 | lp_build_context_init(&bld4.int_size_in_bld, gallivm, bld4.int_size_in_type); |
||
2914 | lp_build_context_init(&bld4.float_size_in_bld, gallivm, bld4.float_size_in_type); |
||
2915 | lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type); |
||
2916 | lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type); |
||
2917 | lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type); |
||
2918 | lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type); |
||
2919 | lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type); |
||
2920 | lp_build_context_init(&bld4.lodf_bld, gallivm, bld4.lodf_type); |
||
2921 | lp_build_context_init(&bld4.lodi_bld, gallivm, bld4.lodi_type); |
||
2922 | |||
2923 | for (i = 0; i < num_quads; i++) { |
||
2924 | LLVMValueRef s4, t4, r4; |
||
2925 | LLVMValueRef lod_positive4, lod_fpart4 = NULL; |
||
2926 | LLVMValueRef ilevel04, ilevel14 = NULL; |
||
2927 | LLVMValueRef offsets4[4] = { NULL }; |
||
2928 | unsigned num_lods = bld4.num_lods; |
||
2929 | |||
2930 | s4 = lp_build_extract_range(gallivm, newcoords[0], 4*i, 4); |
||
2931 | t4 = lp_build_extract_range(gallivm, newcoords[1], 4*i, 4); |
||
2932 | r4 = lp_build_extract_range(gallivm, newcoords[2], 4*i, 4); |
||
2933 | |||
2934 | if (offsets[0]) { |
||
2935 | offsets4[0] = lp_build_extract_range(gallivm, offsets[0], 4*i, 4); |
||
2936 | if (dims > 1) { |
||
2937 | offsets4[1] = lp_build_extract_range(gallivm, offsets[1], 4*i, 4); |
||
2938 | if (dims > 2) { |
||
2939 | offsets4[2] = lp_build_extract_range(gallivm, offsets[2], 4*i, 4); |
||
2940 | } |
||
2941 | } |
||
2942 | } |
||
2943 | lod_positive4 = lp_build_extract_range(gallivm, lod_positive, num_lods * i, num_lods); |
||
2944 | ilevel04 = bld.num_mips == 1 ? ilevel0 : |
||
2945 | lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods); |
||
2946 | if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { |
||
2947 | ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods); |
||
2948 | lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart, num_lods * i, num_lods); |
||
2949 | } |
||
2950 | |||
2951 | if (use_aos) { |
||
2952 | /* do sampling/filtering with fixed pt arithmetic */ |
||
2953 | lp_build_sample_aos(&bld4, sampler_index, |
||
2954 | s4, t4, r4, offsets4, |
||
2955 | lod_positive4, lod_fpart4, |
||
2956 | ilevel04, ilevel14, |
||
2957 | texelout4); |
||
2958 | } |
||
2959 | |||
2960 | else { |
||
2961 | /* this path is currently unreachable and hence might break easily... */ |
||
2962 | LLVMValueRef newcoords4[5]; |
||
2963 | newcoords4[0] = s4; |
||
2964 | newcoords4[1] = t4; |
||
2965 | newcoords4[2] = r4; |
||
2966 | newcoords4[3] = lp_build_extract_range(gallivm, newcoords[3], 4*i, 4); |
||
2967 | newcoords4[4] = lp_build_extract_range(gallivm, newcoords[4], 4*i, 4); |
||
2968 | |||
2969 | lp_build_sample_general(&bld4, sampler_index, |
||
2970 | op_type == LP_SAMPLER_OP_GATHER, |
||
2971 | newcoords4, offsets4, |
||
2972 | lod_positive4, lod_fpart4, |
||
2973 | ilevel04, ilevel14, |
||
2974 | texelout4); |
||
2975 | } |
||
2976 | for (j = 0; j < 4; j++) { |
||
2977 | texelouttmp[j][i] = texelout4[j]; |
||
2978 | } |
||
2979 | } |
||
2980 | |||
2981 | for (j = 0; j < 4; j++) { |
||
2982 | texel_out[j] = lp_build_concat(gallivm, texelouttmp[j], type4, num_quads); |
||
2983 | } |
||
2984 | } |
||
2985 | } |
||
2986 | |||
2987 | if (target != PIPE_BUFFER && op_type != LP_SAMPLER_OP_GATHER) { |
||
2988 | apply_sampler_swizzle(&bld, texel_out); |
||
2989 | } |
||
2990 | |||
2991 | /* |
||
2992 | * texel type can be a (32bit) int/uint (for pure int formats only), |
||
2993 | * however we are expected to always return floats (storage is untyped). |
||
2994 | */ |
||
2995 | if (!bld.texel_type.floating) { |
||
2996 | unsigned chan; |
||
2997 | for (chan = 0; chan < 4; chan++) { |
||
2998 | texel_out[chan] = LLVMBuildBitCast(builder, texel_out[chan], |
||
2999 | lp_build_vec_type(gallivm, type), ""); |
||
3000 | } |
||
3001 | } |
||
3002 | } |
||
3003 | |||
3004 | |||
3005 | #define USE_TEX_FUNC_CALL 1 |
||
3006 | |||
3007 | #define LP_MAX_TEX_FUNC_ARGS 32 |
||
3008 | |||
3009 | static inline void |
||
3010 | get_target_info(enum pipe_texture_target target, |
||
3011 | unsigned *num_coords, unsigned *num_derivs, |
||
3012 | unsigned *num_offsets, unsigned *layer) |
||
3013 | { |
||
3014 | unsigned dims = texture_dims(target); |
||
3015 | *num_coords = dims; |
||
3016 | *num_offsets = dims; |
||
3017 | *num_derivs = (target == PIPE_TEXTURE_CUBE || |
||
3018 | target == PIPE_TEXTURE_CUBE_ARRAY) ? 3 : dims; |
||
3019 | *layer = has_layer_coord(target) ? 2: 0; |
||
3020 | if (target == PIPE_TEXTURE_CUBE_ARRAY) { |
||
3021 | /* |
||
3022 | * dims doesn't include r coord for cubes - this is handled |
||
3023 | * by layer instead, but need to fix up for cube arrays... |
||
3024 | */ |
||
3025 | *layer = 3; |
||
3026 | *num_coords = 3; |
||
3027 | } |
||
3028 | } |
||
3029 | |||
3030 | |||
3031 | /** |
||
3032 | * Generate the function body for a texture sampling function. |
||
3033 | */ |
||
3034 | static void |
||
3035 | lp_build_sample_gen_func(struct gallivm_state *gallivm, |
||
3036 | const struct lp_static_texture_state *static_texture_state, |
||
3037 | const struct lp_static_sampler_state *static_sampler_state, |
||
3038 | struct lp_sampler_dynamic_state *dynamic_state, |
||
3039 | struct lp_type type, |
||
3040 | unsigned texture_index, |
||
3041 | unsigned sampler_index, |
||
3042 | LLVMValueRef function, |
||
3043 | unsigned num_args, |
||
3044 | unsigned sample_key) |
||
3045 | { |
||
3046 | LLVMBuilderRef old_builder; |
||
3047 | LLVMBasicBlockRef block; |
||
3048 | LLVMValueRef coords[5]; |
||
3049 | LLVMValueRef offsets[3] = { NULL }; |
||
3050 | LLVMValueRef lod = NULL; |
||
3051 | LLVMValueRef context_ptr; |
||
3052 | LLVMValueRef texel_out[4]; |
||
3053 | struct lp_derivatives derivs; |
||
3054 | struct lp_derivatives *deriv_ptr = NULL; |
||
3055 | unsigned num_param = 0; |
||
3056 | unsigned i, num_coords, num_derivs, num_offsets, layer; |
||
3057 | enum lp_sampler_lod_control lod_control; |
||
3058 | |||
3059 | lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >> |
||
3060 | LP_SAMPLER_LOD_CONTROL_SHIFT; |
||
3061 | |||
3062 | get_target_info(static_texture_state->target, |
||
3063 | &num_coords, &num_derivs, &num_offsets, &layer); |
||
3064 | |||
3065 | /* "unpack" arguments */ |
||
3066 | context_ptr = LLVMGetParam(function, num_param++); |
||
3067 | for (i = 0; i < num_coords; i++) { |
||
3068 | coords[i] = LLVMGetParam(function, num_param++); |
||
3069 | } |
||
3070 | for (i = num_coords; i < 5; i++) { |
||
3071 | /* This is rather unfortunate... */ |
||
3072 | coords[i] = lp_build_undef(gallivm, type); |
||
3073 | } |
||
3074 | if (layer) { |
||
3075 | coords[layer] = LLVMGetParam(function, num_param++); |
||
3076 | } |
||
3077 | if (sample_key & LP_SAMPLER_SHADOW) { |
||
3078 | coords[4] = LLVMGetParam(function, num_param++); |
||
3079 | } |
||
3080 | if (sample_key & LP_SAMPLER_OFFSETS) { |
||
3081 | for (i = 0; i < num_offsets; i++) { |
||
3082 | offsets[i] = LLVMGetParam(function, num_param++); |
||
3083 | } |
||
3084 | } |
||
3085 | if (lod_control == LP_SAMPLER_LOD_BIAS || |
||
3086 | lod_control == LP_SAMPLER_LOD_EXPLICIT) { |
||
3087 | lod = LLVMGetParam(function, num_param++); |
||
3088 | } |
||
3089 | else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) { |
||
3090 | for (i = 0; i < num_derivs; i++) { |
||
3091 | derivs.ddx[i] = LLVMGetParam(function, num_param++); |
||
3092 | derivs.ddy[i] = LLVMGetParam(function, num_param++); |
||
3093 | } |
||
3094 | deriv_ptr = &derivs; |
||
3095 | } |
||
3096 | |||
3097 | assert(num_args == num_param); |
||
3098 | |||
3099 | /* |
||
3100 | * Function body |
||
3101 | */ |
||
3102 | |||
3103 | old_builder = gallivm->builder; |
||
3104 | block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry"); |
||
3105 | gallivm->builder = LLVMCreateBuilderInContext(gallivm->context); |
||
3106 | LLVMPositionBuilderAtEnd(gallivm->builder, block); |
||
3107 | |||
3108 | lp_build_sample_soa_code(gallivm, |
||
3109 | static_texture_state, |
||
3110 | static_sampler_state, |
||
3111 | dynamic_state, |
||
3112 | type, |
||
3113 | sample_key, |
||
3114 | texture_index, |
||
3115 | sampler_index, |
||
3116 | context_ptr, |
||
3117 | coords, |
||
3118 | offsets, |
||
3119 | deriv_ptr, |
||
3120 | lod, |
||
3121 | texel_out); |
||
3122 | |||
3123 | LLVMBuildAggregateRet(gallivm->builder, texel_out, 4); |
||
3124 | |||
3125 | LLVMDisposeBuilder(gallivm->builder); |
||
3126 | gallivm->builder = old_builder; |
||
3127 | |||
3128 | gallivm_verify_function(gallivm, function); |
||
3129 | } |
||
3130 | |||
3131 | |||
3132 | /** |
||
3133 | * Call the matching function for texture sampling. |
||
3134 | * If there's no match, generate a new one. |
||
3135 | */ |
||
3136 | static void |
||
3137 | lp_build_sample_soa_func(struct gallivm_state *gallivm, |
||
3138 | const struct lp_static_texture_state *static_texture_state, |
||
3139 | const struct lp_static_sampler_state *static_sampler_state, |
||
3140 | struct lp_sampler_dynamic_state *dynamic_state, |
||
3141 | const struct lp_sampler_params *params) |
||
3142 | { |
||
3143 | LLVMBuilderRef builder = gallivm->builder; |
||
3144 | LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent( |
||
3145 | LLVMGetInsertBlock(builder))); |
||
3146 | LLVMValueRef function, inst; |
||
3147 | LLVMValueRef args[LP_MAX_TEX_FUNC_ARGS]; |
||
3148 | LLVMBasicBlockRef bb; |
||
3149 | LLVMValueRef tex_ret; |
||
3150 | unsigned num_args = 0; |
||
3151 | char func_name[64]; |
||
3152 | unsigned i, num_coords, num_derivs, num_offsets, layer; |
||
3153 | unsigned texture_index = params->texture_index; |
||
3154 | unsigned sampler_index = params->sampler_index; |
||
3155 | unsigned sample_key = params->sample_key; |
||
3156 | const LLVMValueRef *coords = params->coords; |
||
3157 | const LLVMValueRef *offsets = params->offsets; |
||
3158 | const struct lp_derivatives *derivs = params->derivs; |
||
3159 | enum lp_sampler_lod_control lod_control; |
||
3160 | |||
3161 | lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >> |
||
3162 | LP_SAMPLER_LOD_CONTROL_SHIFT; |
||
3163 | |||
3164 | get_target_info(static_texture_state->target, |
||
3165 | &num_coords, &num_derivs, &num_offsets, &layer); |
||
3166 | |||
3167 | /* |
||
3168 | * texture function matches are found by name. |
||
3169 | * Thus the name has to include both the texture and sampler unit |
||
3170 | * (which covers all static state) plus the actual texture function |
||
3171 | * (including things like offsets, shadow coord, lod control). |
||
3172 | * Additionally lod_property has to be included too. |
||
3173 | */ |
||
3174 | |||
3175 | util_snprintf(func_name, sizeof(func_name), "texfunc_res_%d_sam_%d_%x", |
||
3176 | texture_index, sampler_index, sample_key); |
||
3177 | |||
3178 | function = LLVMGetNamedFunction(module, func_name); |
||
3179 | |||
3180 | if(!function) { |
||
3181 | LLVMTypeRef arg_types[LP_MAX_TEX_FUNC_ARGS]; |
||
3182 | LLVMTypeRef ret_type; |
||
3183 | LLVMTypeRef function_type; |
||
3184 | LLVMTypeRef val_type[4]; |
||
3185 | unsigned num_param = 0; |
||
3186 | |||
3187 | /* |
||
3188 | * Generate the function prototype. |
||
3189 | */ |
||
3190 | |||
3191 | arg_types[num_param++] = LLVMTypeOf(params->context_ptr); |
||
3192 | for (i = 0; i < num_coords; i++) { |
||
3193 | arg_types[num_param++] = LLVMTypeOf(coords[0]); |
||
3194 | assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[i])); |
||
3195 | } |
||
3196 | if (layer) { |
||
3197 | arg_types[num_param++] = LLVMTypeOf(coords[layer]); |
||
3198 | assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[layer])); |
||
3199 | } |
||
3200 | if (sample_key & LP_SAMPLER_SHADOW) { |
||
3201 | arg_types[num_param++] = LLVMTypeOf(coords[0]); |
||
3202 | } |
||
3203 | if (sample_key & LP_SAMPLER_OFFSETS) { |
||
3204 | for (i = 0; i < num_offsets; i++) { |
||
3205 | arg_types[num_param++] = LLVMTypeOf(offsets[0]); |
||
3206 | assert(LLVMTypeOf(offsets[0]) == LLVMTypeOf(offsets[i])); |
||
3207 | } |
||
3208 | } |
||
3209 | if (lod_control == LP_SAMPLER_LOD_BIAS || |
||
3210 | lod_control == LP_SAMPLER_LOD_EXPLICIT) { |
||
3211 | arg_types[num_param++] = LLVMTypeOf(params->lod); |
||
3212 | } |
||
3213 | else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) { |
||
3214 | for (i = 0; i < num_derivs; i++) { |
||
3215 | arg_types[num_param++] = LLVMTypeOf(derivs->ddx[i]); |
||
3216 | arg_types[num_param++] = LLVMTypeOf(derivs->ddy[i]); |
||
3217 | assert(LLVMTypeOf(derivs->ddx[0]) == LLVMTypeOf(derivs->ddx[i])); |
||
3218 | assert(LLVMTypeOf(derivs->ddy[0]) == LLVMTypeOf(derivs->ddy[i])); |
||
3219 | } |
||
3220 | } |
||
3221 | |||
3222 | val_type[0] = val_type[1] = val_type[2] = val_type[3] = |
||
3223 | lp_build_vec_type(gallivm, params->type); |
||
3224 | ret_type = LLVMStructTypeInContext(gallivm->context, val_type, 4, 0); |
||
3225 | function_type = LLVMFunctionType(ret_type, arg_types, num_param, 0); |
||
3226 | function = LLVMAddFunction(module, func_name, function_type); |
||
3227 | |||
3228 | for (i = 0; i < num_param; ++i) { |
||
3229 | if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) { |
||
3230 | LLVMAddAttribute(LLVMGetParam(function, i), LLVMNoAliasAttribute); |
||
3231 | } |
||
3232 | } |
||
3233 | |||
3234 | LLVMSetFunctionCallConv(function, LLVMFastCallConv); |
||
3235 | LLVMSetLinkage(function, LLVMPrivateLinkage); |
||
3236 | |||
3237 | lp_build_sample_gen_func(gallivm, |
||
3238 | static_texture_state, |
||
3239 | static_sampler_state, |
||
3240 | dynamic_state, |
||
3241 | params->type, |
||
3242 | texture_index, |
||
3243 | sampler_index, |
||
3244 | function, |
||
3245 | num_param, |
||
3246 | sample_key); |
||
3247 | } |
||
3248 | |||
3249 | num_args = 0; |
||
3250 | args[num_args++] = params->context_ptr; |
||
3251 | for (i = 0; i < num_coords; i++) { |
||
3252 | args[num_args++] = coords[i]; |
||
3253 | } |
||
3254 | if (layer) { |
||
3255 | args[num_args++] = coords[layer]; |
||
3256 | } |
||
3257 | if (sample_key & LP_SAMPLER_SHADOW) { |
||
3258 | args[num_args++] = coords[4]; |
||
3259 | } |
||
3260 | if (sample_key & LP_SAMPLER_OFFSETS) { |
||
3261 | for (i = 0; i < num_offsets; i++) { |
||
3262 | args[num_args++] = offsets[i]; |
||
3263 | } |
||
3264 | } |
||
3265 | if (lod_control == LP_SAMPLER_LOD_BIAS || |
||
3266 | lod_control == LP_SAMPLER_LOD_EXPLICIT) { |
||
3267 | args[num_args++] = params->lod; |
||
3268 | } |
||
3269 | else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) { |
||
3270 | for (i = 0; i < num_derivs; i++) { |
||
3271 | args[num_args++] = derivs->ddx[i]; |
||
3272 | args[num_args++] = derivs->ddy[i]; |
||
3273 | } |
||
3274 | } |
||
3275 | |||
3276 | assert(num_args <= LP_MAX_TEX_FUNC_ARGS); |
||
3277 | |||
3278 | tex_ret = LLVMBuildCall(builder, function, args, num_args, ""); |
||
3279 | bb = LLVMGetInsertBlock(builder); |
||
3280 | inst = LLVMGetLastInstruction(bb); |
||
3281 | LLVMSetInstructionCallConv(inst, LLVMFastCallConv); |
||
3282 | |||
3283 | for (i = 0; i < 4; i++) { |
||
3284 | params->texel[i] = LLVMBuildExtractValue(gallivm->builder, tex_ret, i, ""); |
||
3285 | } |
||
3286 | } |
||
3287 | |||
3288 | |||
3289 | /** |
||
3290 | * Build texture sampling code. |
||
3291 | * Either via a function call or inline it directly. |
||
3292 | */ |
||
3293 | void |
||
3294 | lp_build_sample_soa(const struct lp_static_texture_state *static_texture_state, |
||
3295 | const struct lp_static_sampler_state *static_sampler_state, |
||
3296 | struct lp_sampler_dynamic_state *dynamic_state, |
||
3297 | struct gallivm_state *gallivm, |
||
3298 | const struct lp_sampler_params *params) |
||
3299 | { |
||
3300 | boolean use_tex_func = FALSE; |
||
3301 | |||
3302 | /* |
||
3303 | * Do not use a function call if the sampling is "simple enough". |
||
3304 | * We define this by |
||
3305 | * a) format |
||
3306 | * b) no mips (either one level only or no mip filter) |
||
3307 | * No mips will definitely make the code smaller, though |
||
3308 | * the format requirement is a bit iffy - there's some (SoA) formats |
||
3309 | * which definitely generate less code. This does happen to catch |
||
3310 | * some important cases though which are hurt quite a bit by using |
||
3311 | * a call (though not really because of the call overhead but because |
||
3312 | * they are reusing the same texture unit with some of the same |
||
3313 | * parameters). |
||
3314 | * Ideally we'd let llvm recognize this stuff by doing IPO passes. |
||
3315 | */ |
||
3316 | |||
3317 | if (USE_TEX_FUNC_CALL) { |
||
3318 | const struct util_format_description *format_desc; |
||
3319 | boolean simple_format; |
||
3320 | boolean simple_tex; |
||
3321 | enum lp_sampler_op_type op_type; |
||
3322 | format_desc = util_format_description(static_texture_state->format); |
||
3323 | simple_format = !format_desc || |
||
3324 | (util_format_is_rgba8_variant(format_desc) && |
||
3325 | format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB); |
||
3326 | |||
3327 | op_type = (params->sample_key & LP_SAMPLER_OP_TYPE_MASK) >> |
||
3328 | LP_SAMPLER_OP_TYPE_SHIFT; |
||
3329 | simple_tex = |
||
3330 | op_type != LP_SAMPLER_OP_TEXTURE || |
||
3331 | ((static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE || |
||
3332 | static_texture_state->level_zero_only == TRUE) && |
||
3333 | static_sampler_state->min_img_filter == static_sampler_state->mag_img_filter); |
||
3334 | |||
3335 | use_tex_func = format_desc && !(simple_format && simple_tex); |
||
3336 | } |
||
3337 | |||
3338 | if (use_tex_func) { |
||
3339 | lp_build_sample_soa_func(gallivm, |
||
3340 | static_texture_state, |
||
3341 | static_sampler_state, |
||
3342 | dynamic_state, |
||
3343 | params); |
||
3344 | } |
||
3345 | else { |
||
3346 | lp_build_sample_soa_code(gallivm, |
||
3347 | static_texture_state, |
||
3348 | static_sampler_state, |
||
3349 | dynamic_state, |
||
3350 | params->type, |
||
3351 | params->sample_key, |
||
3352 | params->texture_index, |
||
3353 | params->sampler_index, |
||
3354 | params->context_ptr, |
||
3355 | params->coords, |
||
3356 | params->offsets, |
||
3357 | params->derivs, |
||
3358 | params->lod, |
||
3359 | params->texel); |
||
3360 | } |
||
3361 | } |
||
3362 | |||
3363 | |||
3364 | void |
||
3365 | lp_build_size_query_soa(struct gallivm_state *gallivm, |
||
3366 | const struct lp_static_texture_state *static_state, |
||
3367 | struct lp_sampler_dynamic_state *dynamic_state, |
||
3368 | struct lp_type int_type, |
||
3369 | unsigned texture_unit, |
||
3370 | unsigned target, |
||
3371 | LLVMValueRef context_ptr, |
||
3372 | boolean is_sviewinfo, |
||
3373 | enum lp_sampler_lod_property lod_property, |
||
3374 | LLVMValueRef explicit_lod, |
||
3375 | LLVMValueRef *sizes_out) |
||
3376 | { |
||
3377 | LLVMValueRef lod, level, size; |
||
3378 | LLVMValueRef first_level = NULL; |
||
3379 | int dims, i; |
||
3380 | boolean has_array; |
||
3381 | unsigned num_lods = 1; |
||
3382 | struct lp_build_context bld_int_vec4; |
||
3383 | |||
3384 | if (static_state->format == PIPE_FORMAT_NONE) { |
||
3385 | /* |
||
3386 | * If there's nothing bound, format is NONE, and we must return |
||
3387 | * all zero as mandated by d3d10 in this case. |
||
3388 | */ |
||
3389 | unsigned chan; |
||
3390 | LLVMValueRef zero = lp_build_const_vec(gallivm, int_type, 0.0F); |
||
3391 | for (chan = 0; chan < 4; chan++) { |
||
3392 | sizes_out[chan] = zero; |
||
3393 | } |
||
3394 | return; |
||
3395 | } |
||
3396 | |||
3397 | /* |
||
3398 | * Do some sanity verification about bound texture and shader dcl target. |
||
3399 | * Not entirely sure what's possible but assume array/non-array |
||
3400 | * always compatible (probably not ok for OpenGL but d3d10 has no |
||
3401 | * distinction of arrays at the resource level). |
||
3402 | * Everything else looks bogus (though not entirely sure about rect/2d). |
||
3403 | * Currently disabled because it causes assertion failures if there's |
||
3404 | * nothing bound (or rather a dummy texture, not that this case would |
||
3405 | * return the right values). |
||
3406 | */ |
||
3407 | if (0 && static_state->target != target) { |
||
3408 | if (static_state->target == PIPE_TEXTURE_1D) |
||
3409 | assert(target == PIPE_TEXTURE_1D_ARRAY); |
||
3410 | else if (static_state->target == PIPE_TEXTURE_1D_ARRAY) |
||
3411 | assert(target == PIPE_TEXTURE_1D); |
||
3412 | else if (static_state->target == PIPE_TEXTURE_2D) |
||
3413 | assert(target == PIPE_TEXTURE_2D_ARRAY); |
||
3414 | else if (static_state->target == PIPE_TEXTURE_2D_ARRAY) |
||
3415 | assert(target == PIPE_TEXTURE_2D); |
||
3416 | else if (static_state->target == PIPE_TEXTURE_CUBE) |
||
3417 | assert(target == PIPE_TEXTURE_CUBE_ARRAY); |
||
3418 | else if (static_state->target == PIPE_TEXTURE_CUBE_ARRAY) |
||
3419 | assert(target == PIPE_TEXTURE_CUBE); |
||
3420 | else |
||
3421 | assert(0); |
||
3422 | } |
||
3423 | |||
3424 | dims = texture_dims(target); |
||
3425 | |||
3426 | switch (target) { |
||
3427 | case PIPE_TEXTURE_1D_ARRAY: |
||
3428 | case PIPE_TEXTURE_2D_ARRAY: |
||
3429 | case PIPE_TEXTURE_CUBE_ARRAY: |
||
3430 | has_array = TRUE; |
||
3431 | break; |
||
3432 | default: |
||
3433 | has_array = FALSE; |
||
3434 | break; |
||
3435 | } |
||
3436 | |||
3437 | assert(!int_type.floating); |
||
3438 | |||
3439 | lp_build_context_init(&bld_int_vec4, gallivm, lp_type_int_vec(32, 128)); |
||
3440 | |||
3441 | if (explicit_lod) { |
||
3442 | /* FIXME: this needs to honor per-element lod */ |
||
3443 | lod = LLVMBuildExtractElement(gallivm->builder, explicit_lod, |
||
3444 | lp_build_const_int32(gallivm, 0), ""); |
||
3445 | first_level = dynamic_state->first_level(dynamic_state, gallivm, |
||
3446 | context_ptr, texture_unit); |
||
3447 | level = LLVMBuildAdd(gallivm->builder, lod, first_level, "level"); |
||
3448 | lod = lp_build_broadcast_scalar(&bld_int_vec4, level); |
||
3449 | } else { |
||
3450 | lod = bld_int_vec4.zero; |
||
3451 | } |
||
3452 | |||
3453 | size = bld_int_vec4.undef; |
||
3454 | |||
3455 | size = LLVMBuildInsertElement(gallivm->builder, size, |
||
3456 | dynamic_state->width(dynamic_state, gallivm, |
||
3457 | context_ptr, texture_unit), |
||
3458 | lp_build_const_int32(gallivm, 0), ""); |
||
3459 | |||
3460 | if (dims >= 2) { |
||
3461 | size = LLVMBuildInsertElement(gallivm->builder, size, |
||
3462 | dynamic_state->height(dynamic_state, gallivm, |
||
3463 | context_ptr, texture_unit), |
||
3464 | lp_build_const_int32(gallivm, 1), ""); |
||
3465 | } |
||
3466 | |||
3467 | if (dims >= 3) { |
||
3468 | size = LLVMBuildInsertElement(gallivm->builder, size, |
||
3469 | dynamic_state->depth(dynamic_state, gallivm, |
||
3470 | context_ptr, texture_unit), |
||
3471 | lp_build_const_int32(gallivm, 2), ""); |
||
3472 | } |
||
3473 | |||
3474 | size = lp_build_minify(&bld_int_vec4, size, lod, TRUE); |
||
3475 | |||
3476 | if (has_array) { |
||
3477 | LLVMValueRef layers = dynamic_state->depth(dynamic_state, gallivm, |
||
3478 | context_ptr, texture_unit); |
||
3479 | if (target == PIPE_TEXTURE_CUBE_ARRAY) { |
||
3480 | /* |
||
3481 | * It looks like GL wants number of cubes, d3d10.1 has it undefined? |
||
3482 | * Could avoid this by passing in number of cubes instead of total |
||
3483 | * number of layers (might make things easier elsewhere too). |
||
3484 | */ |
||
3485 | LLVMValueRef six = lp_build_const_int32(gallivm, 6); |
||
3486 | layers = LLVMBuildSDiv(gallivm->builder, layers, six, ""); |
||
3487 | } |
||
3488 | size = LLVMBuildInsertElement(gallivm->builder, size, layers, |
||
3489 | lp_build_const_int32(gallivm, dims), ""); |
||
3490 | } |
||
3491 | |||
3492 | /* |
||
3493 | * d3d10 requires zero for x/y/z values (but not w, i.e. mip levels) |
||
3494 | * if level is out of bounds (note this can't cover unbound texture |
||
3495 | * here, which also requires returning zero). |
||
3496 | */ |
||
3497 | if (explicit_lod && is_sviewinfo) { |
||
3498 | LLVMValueRef last_level, out, out1; |
||
3499 | struct lp_build_context leveli_bld; |
||
3500 | |||
3501 | /* everything is scalar for now */ |
||
3502 | lp_build_context_init(&leveli_bld, gallivm, lp_type_int_vec(32, 32)); |
||
3503 | last_level = dynamic_state->last_level(dynamic_state, gallivm, |
||
3504 | context_ptr, texture_unit); |
||
3505 | |||
3506 | out = lp_build_cmp(&leveli_bld, PIPE_FUNC_LESS, level, first_level); |
||
3507 | out1 = lp_build_cmp(&leveli_bld, PIPE_FUNC_GREATER, level, last_level); |
||
3508 | out = lp_build_or(&leveli_bld, out, out1); |
||
3509 | if (num_lods == 1) { |
||
3510 | out = lp_build_broadcast_scalar(&bld_int_vec4, out); |
||
3511 | } |
||
3512 | else { |
||
3513 | /* TODO */ |
||
3514 | assert(0); |
||
3515 | } |
||
3516 | size = lp_build_andnot(&bld_int_vec4, size, out); |
||
3517 | } |
||
3518 | for (i = 0; i < dims + (has_array ? 1 : 0); i++) { |
||
3519 | sizes_out[i] = lp_build_extract_broadcast(gallivm, bld_int_vec4.type, int_type, |
||
3520 | size, |
||
3521 | lp_build_const_int32(gallivm, i)); |
||
3522 | } |
||
3523 | if (is_sviewinfo) { |
||
3524 | for (; i < 4; i++) { |
||
3525 | sizes_out[i] = lp_build_const_vec(gallivm, int_type, 0.0); |
||
3526 | } |
||
3527 | } |
||
3528 | |||
3529 | /* |
||
3530 | * if there's no explicit_lod (buffers, rects) queries requiring nr of |
||
3531 | * mips would be illegal. |
||
3532 | */ |
||
3533 | if (is_sviewinfo && explicit_lod) { |
||
3534 | struct lp_build_context bld_int_scalar; |
||
3535 | LLVMValueRef num_levels; |
||
3536 | lp_build_context_init(&bld_int_scalar, gallivm, lp_type_int(32)); |
||
3537 | |||
3538 | if (static_state->level_zero_only) { |
||
3539 | num_levels = bld_int_scalar.one; |
||
3540 | } |
||
3541 | else { |
||
3542 | LLVMValueRef last_level; |
||
3543 | |||
3544 | last_level = dynamic_state->last_level(dynamic_state, gallivm, |
||
3545 | context_ptr, texture_unit); |
||
3546 | num_levels = lp_build_sub(&bld_int_scalar, last_level, first_level); |
||
3547 | num_levels = lp_build_add(&bld_int_scalar, num_levels, bld_int_scalar.one); |
||
3548 | } |
||
3549 | sizes_out[3] = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, int_type), |
||
3550 | num_levels); |
||
3551 | } |
||
3552 | }>>>>=>>>>>>>>>>>>>>>>=>>=>>>>>=>>><>><>><>>><>><>>=>=>>>>>>>>=>>>>>=>>>>> |