Go to most recent revision | Details | Last modification | View Log | RSS feed
Rev | Author | Line No. | Line |
---|---|---|---|
4358 | Serge | 1 | /************************************************************************** |
2 | * |
||
3 | * Copyright 2010 VMware, Inc. |
||
4 | * All Rights Reserved. |
||
5 | * |
||
6 | * Permission is hereby granted, free of charge, to any person obtaining a |
||
7 | * copy of this software and associated documentation files (the |
||
8 | * "Software"), to deal in the Software without restriction, including |
||
9 | * without limitation the rights to use, copy, modify, merge, publish, |
||
10 | * distribute, sub license, and/or sell copies of the Software, and to |
||
11 | * permit persons to whom the Software is furnished to do so, subject to |
||
12 | * the following conditions: |
||
13 | * |
||
14 | * The above copyright notice and this permission notice (including the |
||
15 | * next paragraph) shall be included in all copies or substantial portions |
||
16 | * of the Software. |
||
17 | * |
||
18 | * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS |
||
19 | * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF |
||
20 | * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. |
||
21 | * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR |
||
22 | * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, |
||
23 | * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE |
||
24 | * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. |
||
25 | * |
||
26 | **************************************************************************/ |
||
27 | |||
28 | /** |
||
29 | * @file |
||
30 | * Texture sampling -- AoS. |
||
31 | * |
||
32 | * @author Jose Fonseca |
||
33 | * @author Brian Paul |
||
34 | */ |
||
35 | |||
36 | #include "pipe/p_defines.h" |
||
37 | #include "pipe/p_state.h" |
||
38 | #include "util/u_debug.h" |
||
39 | #include "util/u_dump.h" |
||
40 | #include "util/u_memory.h" |
||
41 | #include "util/u_math.h" |
||
42 | #include "util/u_format.h" |
||
43 | #include "util/u_cpu_detect.h" |
||
44 | #include "lp_bld_debug.h" |
||
45 | #include "lp_bld_type.h" |
||
46 | #include "lp_bld_const.h" |
||
47 | #include "lp_bld_conv.h" |
||
48 | #include "lp_bld_arit.h" |
||
49 | #include "lp_bld_bitarit.h" |
||
50 | #include "lp_bld_logic.h" |
||
51 | #include "lp_bld_swizzle.h" |
||
52 | #include "lp_bld_pack.h" |
||
53 | #include "lp_bld_flow.h" |
||
54 | #include "lp_bld_gather.h" |
||
55 | #include "lp_bld_format.h" |
||
56 | #include "lp_bld_init.h" |
||
57 | #include "lp_bld_sample.h" |
||
58 | #include "lp_bld_sample_aos.h" |
||
59 | #include "lp_bld_quad.h" |
||
60 | |||
61 | |||
62 | /** |
||
63 | * Build LLVM code for texture coord wrapping, for nearest filtering, |
||
64 | * for scaled integer texcoords. |
||
65 | * \param block_length is the length of the pixel block along the |
||
66 | * coordinate axis |
||
67 | * \param coord the incoming texcoord (s,t or r) scaled to the texture size |
||
68 | * \param coord_f the incoming texcoord (s,t or r) as float vec |
||
69 | * \param length the texture size along one dimension |
||
70 | * \param stride pixel stride along the coordinate axis (in bytes) |
||
71 | * \param offset the texel offset along the coord axis |
||
72 | * \param is_pot if TRUE, length is a power of two |
||
73 | * \param wrap_mode one of PIPE_TEX_WRAP_x |
||
74 | * \param out_offset byte offset for the wrapped coordinate |
||
75 | * \param out_i resulting sub-block pixel coordinate for coord0 |
||
76 | */ |
||
77 | static void |
||
78 | lp_build_sample_wrap_nearest_int(struct lp_build_sample_context *bld, |
||
79 | unsigned block_length, |
||
80 | LLVMValueRef coord, |
||
81 | LLVMValueRef coord_f, |
||
82 | LLVMValueRef length, |
||
83 | LLVMValueRef stride, |
||
84 | LLVMValueRef offset, |
||
85 | boolean is_pot, |
||
86 | unsigned wrap_mode, |
||
87 | LLVMValueRef *out_offset, |
||
88 | LLVMValueRef *out_i) |
||
89 | { |
||
90 | struct lp_build_context *int_coord_bld = &bld->int_coord_bld; |
||
91 | LLVMBuilderRef builder = bld->gallivm->builder; |
||
92 | LLVMValueRef length_minus_one; |
||
93 | |||
94 | length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); |
||
95 | |||
96 | switch(wrap_mode) { |
||
97 | case PIPE_TEX_WRAP_REPEAT: |
||
98 | if(is_pot) |
||
99 | coord = LLVMBuildAnd(builder, coord, length_minus_one, ""); |
||
100 | else { |
||
101 | struct lp_build_context *coord_bld = &bld->coord_bld; |
||
102 | LLVMValueRef length_f = lp_build_int_to_float(coord_bld, length); |
||
103 | if (offset) { |
||
104 | offset = lp_build_int_to_float(coord_bld, offset); |
||
105 | offset = lp_build_div(coord_bld, offset, length_f); |
||
106 | coord_f = lp_build_add(coord_bld, coord_f, offset); |
||
107 | } |
||
108 | coord = lp_build_fract_safe(coord_bld, coord_f); |
||
109 | coord = lp_build_mul(coord_bld, coord, length_f); |
||
110 | coord = lp_build_itrunc(coord_bld, coord); |
||
111 | } |
||
112 | break; |
||
113 | |||
114 | case PIPE_TEX_WRAP_CLAMP_TO_EDGE: |
||
115 | coord = lp_build_max(int_coord_bld, coord, int_coord_bld->zero); |
||
116 | coord = lp_build_min(int_coord_bld, coord, length_minus_one); |
||
117 | break; |
||
118 | |||
119 | case PIPE_TEX_WRAP_CLAMP: |
||
120 | case PIPE_TEX_WRAP_CLAMP_TO_BORDER: |
||
121 | case PIPE_TEX_WRAP_MIRROR_REPEAT: |
||
122 | case PIPE_TEX_WRAP_MIRROR_CLAMP: |
||
123 | case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: |
||
124 | case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: |
||
125 | default: |
||
126 | assert(0); |
||
127 | } |
||
128 | |||
129 | lp_build_sample_partial_offset(int_coord_bld, block_length, coord, stride, |
||
130 | out_offset, out_i); |
||
131 | } |
||
132 | |||
133 | |||
134 | /** |
||
135 | * Build LLVM code for texture coord wrapping, for nearest filtering, |
||
136 | * for float texcoords. |
||
137 | * \param coord the incoming texcoord (s,t or r) |
||
138 | * \param length the texture size along one dimension |
||
139 | * \param offset the texel offset along the coord axis |
||
140 | * \param is_pot if TRUE, length is a power of two |
||
141 | * \param wrap_mode one of PIPE_TEX_WRAP_x |
||
142 | * \param icoord the texcoord after wrapping, as int |
||
143 | */ |
||
144 | static void |
||
145 | lp_build_sample_wrap_nearest_float(struct lp_build_sample_context *bld, |
||
146 | LLVMValueRef coord, |
||
147 | LLVMValueRef length, |
||
148 | LLVMValueRef offset, |
||
149 | boolean is_pot, |
||
150 | unsigned wrap_mode, |
||
151 | LLVMValueRef *icoord) |
||
152 | { |
||
153 | struct lp_build_context *coord_bld = &bld->coord_bld; |
||
154 | LLVMValueRef length_minus_one; |
||
155 | |||
156 | switch(wrap_mode) { |
||
157 | case PIPE_TEX_WRAP_REPEAT: |
||
158 | if (offset) { |
||
159 | /* this is definitely not ideal for POT case */ |
||
160 | offset = lp_build_int_to_float(coord_bld, offset); |
||
161 | offset = lp_build_div(coord_bld, offset, length); |
||
162 | coord = lp_build_add(coord_bld, coord, offset); |
||
163 | } |
||
164 | /* take fraction, unnormalize */ |
||
165 | coord = lp_build_fract_safe(coord_bld, coord); |
||
166 | coord = lp_build_mul(coord_bld, coord, length); |
||
167 | *icoord = lp_build_itrunc(coord_bld, coord); |
||
168 | break; |
||
169 | case PIPE_TEX_WRAP_CLAMP_TO_EDGE: |
||
170 | length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one); |
||
171 | if (bld->static_sampler_state->normalized_coords) { |
||
172 | /* scale coord to length */ |
||
173 | coord = lp_build_mul(coord_bld, coord, length); |
||
174 | } |
||
175 | if (offset) { |
||
176 | offset = lp_build_int_to_float(coord_bld, offset); |
||
177 | coord = lp_build_add(coord_bld, coord, offset); |
||
178 | } |
||
179 | coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, |
||
180 | length_minus_one); |
||
181 | *icoord = lp_build_itrunc(coord_bld, coord); |
||
182 | break; |
||
183 | |||
184 | case PIPE_TEX_WRAP_CLAMP: |
||
185 | case PIPE_TEX_WRAP_CLAMP_TO_BORDER: |
||
186 | case PIPE_TEX_WRAP_MIRROR_REPEAT: |
||
187 | case PIPE_TEX_WRAP_MIRROR_CLAMP: |
||
188 | case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: |
||
189 | case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: |
||
190 | default: |
||
191 | assert(0); |
||
192 | } |
||
193 | } |
||
194 | |||
195 | |||
196 | /** |
||
197 | * Build LLVM code for texture coord wrapping, for linear filtering, |
||
198 | * for scaled integer texcoords. |
||
199 | * \param block_length is the length of the pixel block along the |
||
200 | * coordinate axis |
||
201 | * \param coord0 the incoming texcoord (s,t or r) scaled to the texture size |
||
202 | * \param coord_f the incoming texcoord (s,t or r) as float vec |
||
203 | * \param length the texture size along one dimension |
||
204 | * \param stride pixel stride along the coordinate axis (in bytes) |
||
205 | * \param offset the texel offset along the coord axis |
||
206 | * \param is_pot if TRUE, length is a power of two |
||
207 | * \param wrap_mode one of PIPE_TEX_WRAP_x |
||
208 | * \param offset0 resulting relative offset for coord0 |
||
209 | * \param offset1 resulting relative offset for coord0 + 1 |
||
210 | * \param i0 resulting sub-block pixel coordinate for coord0 |
||
211 | * \param i1 resulting sub-block pixel coordinate for coord0 + 1 |
||
212 | */ |
||
213 | static void |
||
214 | lp_build_sample_wrap_linear_int(struct lp_build_sample_context *bld, |
||
215 | unsigned block_length, |
||
216 | LLVMValueRef coord0, |
||
217 | LLVMValueRef *weight_i, |
||
218 | LLVMValueRef coord_f, |
||
219 | LLVMValueRef length, |
||
220 | LLVMValueRef stride, |
||
221 | LLVMValueRef offset, |
||
222 | boolean is_pot, |
||
223 | unsigned wrap_mode, |
||
224 | LLVMValueRef *offset0, |
||
225 | LLVMValueRef *offset1, |
||
226 | LLVMValueRef *i0, |
||
227 | LLVMValueRef *i1) |
||
228 | { |
||
229 | struct lp_build_context *int_coord_bld = &bld->int_coord_bld; |
||
230 | LLVMBuilderRef builder = bld->gallivm->builder; |
||
231 | LLVMValueRef length_minus_one; |
||
232 | LLVMValueRef lmask, umask, mask; |
||
233 | |||
234 | /* |
||
235 | * If the pixel block covers more than one pixel then there is no easy |
||
236 | * way to calculate offset1 relative to offset0. Instead, compute them |
||
237 | * independently. Otherwise, try to compute offset0 and offset1 with |
||
238 | * a single stride multiplication. |
||
239 | */ |
||
240 | |||
241 | length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one); |
||
242 | |||
243 | if (block_length != 1) { |
||
244 | LLVMValueRef coord1; |
||
245 | switch(wrap_mode) { |
||
246 | case PIPE_TEX_WRAP_REPEAT: |
||
247 | if (is_pot) { |
||
248 | coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); |
||
249 | coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, ""); |
||
250 | coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, ""); |
||
251 | } |
||
252 | else { |
||
253 | LLVMValueRef mask; |
||
254 | LLVMValueRef weight; |
||
255 | LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length); |
||
256 | if (offset) { |
||
257 | offset = lp_build_int_to_float(&bld->coord_bld, offset); |
||
258 | offset = lp_build_div(&bld->coord_bld, offset, length_f); |
||
259 | coord_f = lp_build_add(&bld->coord_bld, coord_f, offset); |
||
260 | } |
||
261 | lp_build_coord_repeat_npot_linear(bld, coord_f, |
||
262 | length, length_f, |
||
263 | &coord0, &weight); |
||
264 | mask = lp_build_compare(bld->gallivm, int_coord_bld->type, |
||
265 | PIPE_FUNC_NOTEQUAL, coord0, length_minus_one); |
||
266 | coord1 = LLVMBuildAnd(builder, |
||
267 | lp_build_add(int_coord_bld, coord0, |
||
268 | int_coord_bld->one), |
||
269 | mask, ""); |
||
270 | weight = lp_build_mul_imm(&bld->coord_bld, weight, 256); |
||
271 | *weight_i = lp_build_itrunc(&bld->coord_bld, weight); |
||
272 | } |
||
273 | break; |
||
274 | |||
275 | case PIPE_TEX_WRAP_CLAMP_TO_EDGE: |
||
276 | coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one); |
||
277 | coord0 = lp_build_clamp(int_coord_bld, coord0, int_coord_bld->zero, |
||
278 | length_minus_one); |
||
279 | coord1 = lp_build_clamp(int_coord_bld, coord1, int_coord_bld->zero, |
||
280 | length_minus_one); |
||
281 | break; |
||
282 | |||
283 | case PIPE_TEX_WRAP_CLAMP: |
||
284 | case PIPE_TEX_WRAP_CLAMP_TO_BORDER: |
||
285 | case PIPE_TEX_WRAP_MIRROR_REPEAT: |
||
286 | case PIPE_TEX_WRAP_MIRROR_CLAMP: |
||
287 | case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: |
||
288 | case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: |
||
289 | default: |
||
290 | assert(0); |
||
291 | coord0 = int_coord_bld->zero; |
||
292 | coord1 = int_coord_bld->zero; |
||
293 | break; |
||
294 | } |
||
295 | lp_build_sample_partial_offset(int_coord_bld, block_length, coord0, stride, |
||
296 | offset0, i0); |
||
297 | lp_build_sample_partial_offset(int_coord_bld, block_length, coord1, stride, |
||
298 | offset1, i1); |
||
299 | return; |
||
300 | } |
||
301 | |||
302 | *i0 = int_coord_bld->zero; |
||
303 | *i1 = int_coord_bld->zero; |
||
304 | |||
305 | switch(wrap_mode) { |
||
306 | case PIPE_TEX_WRAP_REPEAT: |
||
307 | if (is_pot) { |
||
308 | coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, ""); |
||
309 | } |
||
310 | else { |
||
311 | LLVMValueRef weight; |
||
312 | LLVMValueRef length_f = lp_build_int_to_float(&bld->coord_bld, length); |
||
313 | if (offset) { |
||
314 | offset = lp_build_int_to_float(&bld->coord_bld, offset); |
||
315 | offset = lp_build_div(&bld->coord_bld, offset, length_f); |
||
316 | coord_f = lp_build_add(&bld->coord_bld, coord_f, offset); |
||
317 | } |
||
318 | lp_build_coord_repeat_npot_linear(bld, coord_f, |
||
319 | length, length_f, |
||
320 | &coord0, &weight); |
||
321 | weight = lp_build_mul_imm(&bld->coord_bld, weight, 256); |
||
322 | *weight_i = lp_build_itrunc(&bld->coord_bld, weight); |
||
323 | } |
||
324 | |||
325 | mask = lp_build_compare(bld->gallivm, int_coord_bld->type, |
||
326 | PIPE_FUNC_NOTEQUAL, coord0, length_minus_one); |
||
327 | |||
328 | *offset0 = lp_build_mul(int_coord_bld, coord0, stride); |
||
329 | *offset1 = LLVMBuildAnd(builder, |
||
330 | lp_build_add(int_coord_bld, *offset0, stride), |
||
331 | mask, ""); |
||
332 | break; |
||
333 | |||
334 | case PIPE_TEX_WRAP_CLAMP_TO_EDGE: |
||
335 | /* XXX this might be slower than the separate path |
||
336 | * on some newer cpus. With sse41 this is 8 instructions vs. 7 |
||
337 | * - at least on SNB this is almost certainly slower since |
||
338 | * min/max are cheaper than selects, and the muls aren't bad. |
||
339 | */ |
||
340 | lmask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type, |
||
341 | PIPE_FUNC_GEQUAL, coord0, int_coord_bld->zero); |
||
342 | umask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type, |
||
343 | PIPE_FUNC_LESS, coord0, length_minus_one); |
||
344 | |||
345 | coord0 = lp_build_select(int_coord_bld, lmask, coord0, int_coord_bld->zero); |
||
346 | coord0 = lp_build_select(int_coord_bld, umask, coord0, length_minus_one); |
||
347 | |||
348 | mask = LLVMBuildAnd(builder, lmask, umask, ""); |
||
349 | |||
350 | *offset0 = lp_build_mul(int_coord_bld, coord0, stride); |
||
351 | *offset1 = lp_build_add(int_coord_bld, |
||
352 | *offset0, |
||
353 | LLVMBuildAnd(builder, stride, mask, "")); |
||
354 | break; |
||
355 | |||
356 | case PIPE_TEX_WRAP_CLAMP: |
||
357 | case PIPE_TEX_WRAP_CLAMP_TO_BORDER: |
||
358 | case PIPE_TEX_WRAP_MIRROR_REPEAT: |
||
359 | case PIPE_TEX_WRAP_MIRROR_CLAMP: |
||
360 | case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE: |
||
361 | case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: |
||
362 | default: |
||
363 | assert(0); |
||
364 | *offset0 = int_coord_bld->zero; |
||
365 | *offset1 = int_coord_bld->zero; |
||
366 | break; |
||
367 | } |
||
368 | } |
||
369 | |||
370 | |||
371 | /** |
||
372 | * Build LLVM code for texture coord wrapping, for linear filtering, |
||
373 | * for float texcoords. |
||
374 | * \param block_length is the length of the pixel block along the |
||
375 | * coordinate axis |
||
376 | * \param coord the incoming texcoord (s,t or r) |
||
377 | * \param length the texture size along one dimension |
||
378 | * \param offset the texel offset along the coord axis |
||
379 | * \param is_pot if TRUE, length is a power of two |
||
380 | * \param wrap_mode one of PIPE_TEX_WRAP_x |
||
381 | * \param coord0 the first texcoord after wrapping, as int |
||
382 | * \param coord1 the second texcoord after wrapping, as int |
||
383 | * \param weight the filter weight as int (0-255) |
||
384 | * \param force_nearest if this coord actually uses nearest filtering |
||
385 | */ |
||
386 | static void |
||
387 | lp_build_sample_wrap_linear_float(struct lp_build_sample_context *bld, |
||
388 | unsigned block_length, |
||
389 | LLVMValueRef coord, |
||
390 | LLVMValueRef length, |
||
391 | LLVMValueRef offset, |
||
392 | boolean is_pot, |
||
393 | unsigned wrap_mode, |
||
394 | LLVMValueRef *coord0, |
||
395 | LLVMValueRef *coord1, |
||
396 | LLVMValueRef *weight, |
||
397 | unsigned force_nearest) |
||
398 | { |
||
399 | struct lp_build_context *int_coord_bld = &bld->int_coord_bld; |
||
400 | struct lp_build_context *coord_bld = &bld->coord_bld; |
||
401 | LLVMBuilderRef builder = bld->gallivm->builder; |
||
402 | LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5); |
||
403 | LLVMValueRef length_minus_one = lp_build_sub(coord_bld, length, coord_bld->one); |
||
404 | |||
405 | switch(wrap_mode) { |
||
406 | case PIPE_TEX_WRAP_REPEAT: |
||
407 | if (is_pot) { |
||
408 | /* mul by size and subtract 0.5 */ |
||
409 | coord = lp_build_mul(coord_bld, coord, length); |
||
410 | if (offset) { |
||
411 | offset = lp_build_int_to_float(coord_bld, offset); |
||
412 | coord = lp_build_add(coord_bld, coord, offset); |
||
413 | } |
||
414 | if (!force_nearest) |
||
415 | coord = lp_build_sub(coord_bld, coord, half); |
||
416 | *coord1 = lp_build_add(coord_bld, coord, coord_bld->one); |
||
417 | /* convert to int, compute lerp weight */ |
||
418 | lp_build_ifloor_fract(coord_bld, coord, coord0, weight); |
||
419 | *coord1 = lp_build_ifloor(coord_bld, *coord1); |
||
420 | /* repeat wrap */ |
||
421 | length_minus_one = lp_build_itrunc(coord_bld, length_minus_one); |
||
422 | *coord0 = LLVMBuildAnd(builder, *coord0, length_minus_one, ""); |
||
423 | *coord1 = LLVMBuildAnd(builder, *coord1, length_minus_one, ""); |
||
424 | } |
||
425 | else { |
||
426 | LLVMValueRef mask; |
||
427 | if (offset) { |
||
428 | offset = lp_build_int_to_float(coord_bld, offset); |
||
429 | offset = lp_build_div(coord_bld, offset, length); |
||
430 | coord = lp_build_add(coord_bld, coord, offset); |
||
431 | } |
||
432 | /* wrap with normalized floats is just fract */ |
||
433 | coord = lp_build_fract(coord_bld, coord); |
||
434 | /* unnormalize */ |
||
435 | coord = lp_build_mul(coord_bld, coord, length); |
||
436 | /* |
||
437 | * we avoided the 0.5/length division, have to fix up wrong |
||
438 | * edge cases with selects |
||
439 | */ |
||
440 | *coord1 = lp_build_add(coord_bld, coord, half); |
||
441 | coord = lp_build_sub(coord_bld, coord, half); |
||
442 | *weight = lp_build_fract(coord_bld, coord); |
||
443 | mask = lp_build_compare(coord_bld->gallivm, coord_bld->type, |
||
444 | PIPE_FUNC_LESS, coord, coord_bld->zero); |
||
445 | *coord0 = lp_build_select(coord_bld, mask, length_minus_one, coord); |
||
446 | *coord0 = lp_build_itrunc(coord_bld, *coord0); |
||
447 | mask = lp_build_compare(coord_bld->gallivm, coord_bld->type, |
||
448 | PIPE_FUNC_LESS, *coord1, length); |
||
449 | *coord1 = lp_build_select(coord_bld, mask, *coord1, coord_bld->zero); |
||
450 | *coord1 = lp_build_itrunc(coord_bld, *coord1); |
||
451 | } |
||
452 | break; |
||
453 | case PIPE_TEX_WRAP_CLAMP_TO_EDGE: |
||
454 | if (bld->static_sampler_state->normalized_coords) { |
||
455 | /* mul by tex size */ |
||
456 | coord = lp_build_mul(coord_bld, coord, length); |
||
457 | } |
||
458 | if (offset) { |
||
459 | offset = lp_build_int_to_float(coord_bld, offset); |
||
460 | coord = lp_build_add(coord_bld, coord, offset); |
||
461 | } |
||
462 | /* subtract 0.5 */ |
||
463 | if (!force_nearest) { |
||
464 | coord = lp_build_sub(coord_bld, coord, half); |
||
465 | } |
||
466 | /* clamp to [0, length - 1] */ |
||
467 | coord = lp_build_min(coord_bld, coord, length_minus_one); |
||
468 | coord = lp_build_max(coord_bld, coord, coord_bld->zero); |
||
469 | *coord1 = lp_build_add(coord_bld, coord, coord_bld->one); |
||
470 | /* convert to int, compute lerp weight */ |
||
471 | lp_build_ifloor_fract(coord_bld, coord, coord0, weight); |
||
472 | /* coord1 = min(coord1, length-1) */ |
||
473 | *coord1 = lp_build_min(coord_bld, *coord1, length_minus_one); |
||
474 | *coord1 = lp_build_itrunc(coord_bld, *coord1); |
||
475 | break; |
||
476 | default: |
||
477 | assert(0); |
||
478 | *coord0 = int_coord_bld->zero; |
||
479 | *coord1 = int_coord_bld->zero; |
||
480 | *weight = coord_bld->zero; |
||
481 | break; |
||
482 | } |
||
483 | *weight = lp_build_mul_imm(coord_bld, *weight, 256); |
||
484 | *weight = lp_build_itrunc(coord_bld, *weight); |
||
485 | return; |
||
486 | } |
||
487 | |||
488 | |||
489 | /** |
||
490 | * Fetch texels for image with nearest sampling. |
||
491 | * Return filtered color as two vectors of 16-bit fixed point values. |
||
492 | */ |
||
493 | static void |
||
494 | lp_build_sample_fetch_image_nearest(struct lp_build_sample_context *bld, |
||
495 | LLVMValueRef data_ptr, |
||
496 | LLVMValueRef offset, |
||
497 | LLVMValueRef x_subcoord, |
||
498 | LLVMValueRef y_subcoord, |
||
499 | LLVMValueRef *colors) |
||
500 | { |
||
501 | /* |
||
502 | * Fetch the pixels as 4 x 32bit (rgba order might differ): |
||
503 | * |
||
504 | * rgba0 rgba1 rgba2 rgba3 |
||
505 | * |
||
506 | * bit cast them into 16 x u8 |
||
507 | * |
||
508 | * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3 |
||
509 | * |
||
510 | * unpack them into two 8 x i16: |
||
511 | * |
||
512 | * r0 g0 b0 a0 r1 g1 b1 a1 |
||
513 | * r2 g2 b2 a2 r3 g3 b3 a3 |
||
514 | * |
||
515 | * The higher 8 bits of the resulting elements will be zero. |
||
516 | */ |
||
517 | LLVMBuilderRef builder = bld->gallivm->builder; |
||
518 | LLVMValueRef rgba8; |
||
519 | struct lp_build_context u8n; |
||
520 | LLVMTypeRef u8n_vec_type; |
||
521 | |||
522 | lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width)); |
||
523 | u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type); |
||
524 | |||
525 | if (util_format_is_rgba8_variant(bld->format_desc)) { |
||
526 | /* |
||
527 | * Given the format is a rgba8, just read the pixels as is, |
||
528 | * without any swizzling. Swizzling will be done later. |
||
529 | */ |
||
530 | rgba8 = lp_build_gather(bld->gallivm, |
||
531 | bld->texel_type.length, |
||
532 | bld->format_desc->block.bits, |
||
533 | bld->texel_type.width, |
||
534 | data_ptr, offset, TRUE); |
||
535 | |||
536 | rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, ""); |
||
537 | } |
||
538 | else { |
||
539 | rgba8 = lp_build_fetch_rgba_aos(bld->gallivm, |
||
540 | bld->format_desc, |
||
541 | u8n.type, |
||
542 | data_ptr, offset, |
||
543 | x_subcoord, |
||
544 | y_subcoord); |
||
545 | } |
||
546 | |||
547 | *colors = rgba8; |
||
548 | } |
||
549 | |||
550 | |||
551 | /** |
||
552 | * Sample a single texture image with nearest sampling. |
||
553 | * If sampling a cube texture, r = cube face in [0,5]. |
||
554 | * Return filtered color as two vectors of 16-bit fixed point values. |
||
555 | */ |
||
556 | static void |
||
557 | lp_build_sample_image_nearest(struct lp_build_sample_context *bld, |
||
558 | LLVMValueRef int_size, |
||
559 | LLVMValueRef row_stride_vec, |
||
560 | LLVMValueRef img_stride_vec, |
||
561 | LLVMValueRef data_ptr, |
||
562 | LLVMValueRef mipoffsets, |
||
563 | LLVMValueRef s, |
||
564 | LLVMValueRef t, |
||
565 | LLVMValueRef r, |
||
566 | const LLVMValueRef *offsets, |
||
567 | LLVMValueRef *colors) |
||
568 | { |
||
569 | const unsigned dims = bld->dims; |
||
570 | LLVMBuilderRef builder = bld->gallivm->builder; |
||
571 | struct lp_build_context i32; |
||
572 | LLVMTypeRef i32_vec_type; |
||
573 | LLVMValueRef i32_c8; |
||
574 | LLVMValueRef width_vec, height_vec, depth_vec; |
||
575 | LLVMValueRef s_ipart, t_ipart = NULL, r_ipart = NULL; |
||
576 | LLVMValueRef s_float, t_float = NULL, r_float = NULL; |
||
577 | LLVMValueRef x_stride; |
||
578 | LLVMValueRef x_offset, offset; |
||
579 | LLVMValueRef x_subcoord, y_subcoord, z_subcoord; |
||
580 | |||
581 | lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width)); |
||
582 | |||
583 | i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type); |
||
584 | |||
585 | lp_build_extract_image_sizes(bld, |
||
586 | &bld->int_size_bld, |
||
587 | bld->int_coord_type, |
||
588 | int_size, |
||
589 | &width_vec, |
||
590 | &height_vec, |
||
591 | &depth_vec); |
||
592 | |||
593 | s_float = s; t_float = t; r_float = r; |
||
594 | |||
595 | if (bld->static_sampler_state->normalized_coords) { |
||
596 | LLVMValueRef scaled_size; |
||
597 | LLVMValueRef flt_size; |
||
598 | |||
599 | /* scale size by 256 (8 fractional bits) */ |
||
600 | scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8); |
||
601 | |||
602 | flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size); |
||
603 | |||
604 | lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r); |
||
605 | } |
||
606 | else { |
||
607 | /* scale coords by 256 (8 fractional bits) */ |
||
608 | s = lp_build_mul_imm(&bld->coord_bld, s, 256); |
||
609 | if (dims >= 2) |
||
610 | t = lp_build_mul_imm(&bld->coord_bld, t, 256); |
||
611 | if (dims >= 3) |
||
612 | r = lp_build_mul_imm(&bld->coord_bld, r, 256); |
||
613 | } |
||
614 | |||
615 | /* convert float to int */ |
||
616 | s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); |
||
617 | if (dims >= 2) |
||
618 | t = LLVMBuildFPToSI(builder, t, i32_vec_type, ""); |
||
619 | if (dims >= 3) |
||
620 | r = LLVMBuildFPToSI(builder, r, i32_vec_type, ""); |
||
621 | |||
622 | /* compute floor (shift right 8) */ |
||
623 | i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8); |
||
624 | s_ipart = LLVMBuildAShr(builder, s, i32_c8, ""); |
||
625 | if (dims >= 2) |
||
626 | t_ipart = LLVMBuildAShr(builder, t, i32_c8, ""); |
||
627 | if (dims >= 3) |
||
628 | r_ipart = LLVMBuildAShr(builder, r, i32_c8, ""); |
||
629 | |||
630 | /* add texel offsets */ |
||
631 | if (offsets[0]) { |
||
632 | s_ipart = lp_build_add(&i32, s_ipart, offsets[0]); |
||
633 | if (dims >= 2) { |
||
634 | t_ipart = lp_build_add(&i32, t_ipart, offsets[1]); |
||
635 | if (dims >= 3) { |
||
636 | r_ipart = lp_build_add(&i32, r_ipart, offsets[2]); |
||
637 | } |
||
638 | } |
||
639 | } |
||
640 | |||
641 | /* get pixel, row, image strides */ |
||
642 | x_stride = lp_build_const_vec(bld->gallivm, |
||
643 | bld->int_coord_bld.type, |
||
644 | bld->format_desc->block.bits/8); |
||
645 | |||
646 | /* Do texcoord wrapping, compute texel offset */ |
||
647 | lp_build_sample_wrap_nearest_int(bld, |
||
648 | bld->format_desc->block.width, |
||
649 | s_ipart, s_float, |
||
650 | width_vec, x_stride, offsets[0], |
||
651 | bld->static_texture_state->pot_width, |
||
652 | bld->static_sampler_state->wrap_s, |
||
653 | &x_offset, &x_subcoord); |
||
654 | offset = x_offset; |
||
655 | if (dims >= 2) { |
||
656 | LLVMValueRef y_offset; |
||
657 | lp_build_sample_wrap_nearest_int(bld, |
||
658 | bld->format_desc->block.height, |
||
659 | t_ipart, t_float, |
||
660 | height_vec, row_stride_vec, offsets[1], |
||
661 | bld->static_texture_state->pot_height, |
||
662 | bld->static_sampler_state->wrap_t, |
||
663 | &y_offset, &y_subcoord); |
||
664 | offset = lp_build_add(&bld->int_coord_bld, offset, y_offset); |
||
665 | if (dims >= 3) { |
||
666 | LLVMValueRef z_offset; |
||
667 | lp_build_sample_wrap_nearest_int(bld, |
||
668 | 1, /* block length (depth) */ |
||
669 | r_ipart, r_float, |
||
670 | depth_vec, img_stride_vec, offsets[2], |
||
671 | bld->static_texture_state->pot_depth, |
||
672 | bld->static_sampler_state->wrap_r, |
||
673 | &z_offset, &z_subcoord); |
||
674 | offset = lp_build_add(&bld->int_coord_bld, offset, z_offset); |
||
675 | } |
||
676 | } |
||
677 | if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE || |
||
678 | bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY || |
||
679 | bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) { |
||
680 | LLVMValueRef z_offset; |
||
681 | /* The r coord is the cube face in [0,5] or array layer */ |
||
682 | z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec); |
||
683 | offset = lp_build_add(&bld->int_coord_bld, offset, z_offset); |
||
684 | } |
||
685 | if (mipoffsets) { |
||
686 | offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets); |
||
687 | } |
||
688 | |||
689 | lp_build_sample_fetch_image_nearest(bld, data_ptr, offset, |
||
690 | x_subcoord, y_subcoord, |
||
691 | colors); |
||
692 | } |
||
693 | |||
694 | |||
695 | /** |
||
696 | * Sample a single texture image with nearest sampling. |
||
697 | * If sampling a cube texture, r = cube face in [0,5]. |
||
698 | * Return filtered color as two vectors of 16-bit fixed point values. |
||
699 | * Does address calcs (except offsets) with floats. |
||
700 | * Useful for AVX which has support for 8x32 floats but not 8x32 ints. |
||
701 | */ |
||
702 | static void |
||
703 | lp_build_sample_image_nearest_afloat(struct lp_build_sample_context *bld, |
||
704 | LLVMValueRef int_size, |
||
705 | LLVMValueRef row_stride_vec, |
||
706 | LLVMValueRef img_stride_vec, |
||
707 | LLVMValueRef data_ptr, |
||
708 | LLVMValueRef mipoffsets, |
||
709 | LLVMValueRef s, |
||
710 | LLVMValueRef t, |
||
711 | LLVMValueRef r, |
||
712 | const LLVMValueRef *offsets, |
||
713 | LLVMValueRef *colors) |
||
714 | { |
||
715 | const unsigned dims = bld->dims; |
||
716 | LLVMValueRef width_vec, height_vec, depth_vec; |
||
717 | LLVMValueRef offset; |
||
718 | LLVMValueRef x_subcoord, y_subcoord; |
||
719 | LLVMValueRef x_icoord = NULL, y_icoord = NULL, z_icoord = NULL; |
||
720 | LLVMValueRef flt_size; |
||
721 | |||
722 | flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size); |
||
723 | |||
724 | lp_build_extract_image_sizes(bld, |
||
725 | &bld->float_size_bld, |
||
726 | bld->coord_type, |
||
727 | flt_size, |
||
728 | &width_vec, |
||
729 | &height_vec, |
||
730 | &depth_vec); |
||
731 | |||
732 | /* Do texcoord wrapping */ |
||
733 | lp_build_sample_wrap_nearest_float(bld, |
||
734 | s, width_vec, offsets[0], |
||
735 | bld->static_texture_state->pot_width, |
||
736 | bld->static_sampler_state->wrap_s, |
||
737 | &x_icoord); |
||
738 | |||
739 | if (dims >= 2) { |
||
740 | lp_build_sample_wrap_nearest_float(bld, |
||
741 | t, height_vec, offsets[1], |
||
742 | bld->static_texture_state->pot_height, |
||
743 | bld->static_sampler_state->wrap_t, |
||
744 | &y_icoord); |
||
745 | |||
746 | if (dims >= 3) { |
||
747 | lp_build_sample_wrap_nearest_float(bld, |
||
748 | r, depth_vec, offsets[2], |
||
749 | bld->static_texture_state->pot_depth, |
||
750 | bld->static_sampler_state->wrap_r, |
||
751 | &z_icoord); |
||
752 | } |
||
753 | } |
||
754 | if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE || |
||
755 | bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY || |
||
756 | bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) { |
||
757 | z_icoord = r; |
||
758 | } |
||
759 | |||
760 | /* |
||
761 | * From here on we deal with ints, and we should split up the 256bit |
||
762 | * vectors manually for better generated code. |
||
763 | */ |
||
764 | |||
765 | /* |
||
766 | * compute texel offsets - |
||
767 | * cannot do offset calc with floats, difficult for block-based formats, |
||
768 | * and not enough precision anyway. |
||
769 | */ |
||
770 | lp_build_sample_offset(&bld->int_coord_bld, |
||
771 | bld->format_desc, |
||
772 | x_icoord, y_icoord, |
||
773 | z_icoord, |
||
774 | row_stride_vec, img_stride_vec, |
||
775 | &offset, |
||
776 | &x_subcoord, &y_subcoord); |
||
777 | if (mipoffsets) { |
||
778 | offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets); |
||
779 | } |
||
780 | |||
781 | lp_build_sample_fetch_image_nearest(bld, data_ptr, offset, |
||
782 | x_subcoord, y_subcoord, |
||
783 | colors); |
||
784 | } |
||
785 | |||
786 | |||
787 | /** |
||
788 | * Fetch texels for image with linear sampling. |
||
789 | * Return filtered color as two vectors of 16-bit fixed point values. |
||
790 | */ |
||
791 | static void |
||
792 | lp_build_sample_fetch_image_linear(struct lp_build_sample_context *bld, |
||
793 | LLVMValueRef data_ptr, |
||
794 | LLVMValueRef offset[2][2][2], |
||
795 | LLVMValueRef x_subcoord[2], |
||
796 | LLVMValueRef y_subcoord[2], |
||
797 | LLVMValueRef s_fpart, |
||
798 | LLVMValueRef t_fpart, |
||
799 | LLVMValueRef r_fpart, |
||
800 | LLVMValueRef *colors) |
||
801 | { |
||
802 | const unsigned dims = bld->dims; |
||
803 | LLVMBuilderRef builder = bld->gallivm->builder; |
||
804 | struct lp_build_context u8n; |
||
805 | LLVMTypeRef u8n_vec_type; |
||
806 | LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context); |
||
807 | LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH]; |
||
808 | LLVMValueRef shuffle; |
||
809 | LLVMValueRef neighbors[2][2][2]; /* [z][y][x] */ |
||
810 | LLVMValueRef packed; |
||
811 | unsigned i, j, k; |
||
812 | unsigned numj, numk; |
||
813 | |||
814 | lp_build_context_init(&u8n, bld->gallivm, lp_type_unorm(8, bld->vector_width)); |
||
815 | u8n_vec_type = lp_build_vec_type(bld->gallivm, u8n.type); |
||
816 | |||
817 | /* |
||
818 | * Transform 4 x i32 in |
||
819 | * |
||
820 | * s_fpart = {s0, s1, s2, s3} |
||
821 | * |
||
822 | * where each value is between 0 and 0xff, |
||
823 | * |
||
824 | * into one 16 x i20 |
||
825 | * |
||
826 | * s_fpart = {s0, s0, s0, s0, s1, s1, s1, s1, s2, s2, s2, s2, s3, s3, s3, s3} |
||
827 | * |
||
828 | * and likewise for t_fpart. There is no risk of loosing precision here |
||
829 | * since the fractional parts only use the lower 8bits. |
||
830 | */ |
||
831 | s_fpart = LLVMBuildBitCast(builder, s_fpart, u8n_vec_type, ""); |
||
832 | if (dims >= 2) |
||
833 | t_fpart = LLVMBuildBitCast(builder, t_fpart, u8n_vec_type, ""); |
||
834 | if (dims >= 3) |
||
835 | r_fpart = LLVMBuildBitCast(builder, r_fpart, u8n_vec_type, ""); |
||
836 | |||
837 | for (j = 0; j < u8n.type.length; j += 4) { |
||
838 | #ifdef PIPE_ARCH_LITTLE_ENDIAN |
||
839 | unsigned subindex = 0; |
||
840 | #else |
||
841 | unsigned subindex = 3; |
||
842 | #endif |
||
843 | LLVMValueRef index; |
||
844 | |||
845 | index = LLVMConstInt(elem_type, j + subindex, 0); |
||
846 | for (i = 0; i < 4; ++i) |
||
847 | shuffles[j + i] = index; |
||
848 | } |
||
849 | |||
850 | shuffle = LLVMConstVector(shuffles, u8n.type.length); |
||
851 | |||
852 | s_fpart = LLVMBuildShuffleVector(builder, s_fpart, u8n.undef, |
||
853 | shuffle, ""); |
||
854 | if (dims >= 2) { |
||
855 | t_fpart = LLVMBuildShuffleVector(builder, t_fpart, u8n.undef, |
||
856 | shuffle, ""); |
||
857 | } |
||
858 | if (dims >= 3) { |
||
859 | r_fpart = LLVMBuildShuffleVector(builder, r_fpart, u8n.undef, |
||
860 | shuffle, ""); |
||
861 | } |
||
862 | |||
863 | /* |
||
864 | * Fetch the pixels as 4 x 32bit (rgba order might differ): |
||
865 | * |
||
866 | * rgba0 rgba1 rgba2 rgba3 |
||
867 | * |
||
868 | * bit cast them into 16 x u8 |
||
869 | * |
||
870 | * r0 g0 b0 a0 r1 g1 b1 a1 r2 g2 b2 a2 r3 g3 b3 a3 |
||
871 | * |
||
872 | * unpack them into two 8 x i16: |
||
873 | * |
||
874 | * r0 g0 b0 a0 r1 g1 b1 a1 |
||
875 | * r2 g2 b2 a2 r3 g3 b3 a3 |
||
876 | * |
||
877 | * The higher 8 bits of the resulting elements will be zero. |
||
878 | */ |
||
879 | numj = 1 + (dims >= 2); |
||
880 | numk = 1 + (dims >= 3); |
||
881 | |||
882 | for (k = 0; k < numk; k++) { |
||
883 | for (j = 0; j < numj; j++) { |
||
884 | for (i = 0; i < 2; i++) { |
||
885 | LLVMValueRef rgba8; |
||
886 | |||
887 | if (util_format_is_rgba8_variant(bld->format_desc)) { |
||
888 | /* |
||
889 | * Given the format is a rgba8, just read the pixels as is, |
||
890 | * without any swizzling. Swizzling will be done later. |
||
891 | */ |
||
892 | rgba8 = lp_build_gather(bld->gallivm, |
||
893 | bld->texel_type.length, |
||
894 | bld->format_desc->block.bits, |
||
895 | bld->texel_type.width, |
||
896 | data_ptr, offset[k][j][i], TRUE); |
||
897 | |||
898 | rgba8 = LLVMBuildBitCast(builder, rgba8, u8n_vec_type, ""); |
||
899 | } |
||
900 | else { |
||
901 | rgba8 = lp_build_fetch_rgba_aos(bld->gallivm, |
||
902 | bld->format_desc, |
||
903 | u8n.type, |
||
904 | data_ptr, offset[k][j][i], |
||
905 | x_subcoord[i], |
||
906 | y_subcoord[j]); |
||
907 | } |
||
908 | |||
909 | neighbors[k][j][i] = rgba8; |
||
910 | } |
||
911 | } |
||
912 | } |
||
913 | |||
914 | /* |
||
915 | * Linear interpolation with 8.8 fixed point. |
||
916 | */ |
||
917 | if (bld->static_sampler_state->force_nearest_s) { |
||
918 | /* special case 1-D lerp */ |
||
919 | packed = lp_build_lerp(&u8n, |
||
920 | t_fpart, |
||
921 | neighbors[0][0][0], |
||
922 | neighbors[0][0][1], |
||
923 | LP_BLD_LERP_PRESCALED_WEIGHTS); |
||
924 | } |
||
925 | else if (bld->static_sampler_state->force_nearest_t) { |
||
926 | /* special case 1-D lerp */ |
||
927 | packed = lp_build_lerp(&u8n, |
||
928 | s_fpart, |
||
929 | neighbors[0][0][0], |
||
930 | neighbors[0][0][1], |
||
931 | LP_BLD_LERP_PRESCALED_WEIGHTS); |
||
932 | } |
||
933 | else { |
||
934 | /* general 1/2/3-D lerping */ |
||
935 | if (dims == 1) { |
||
936 | packed = lp_build_lerp(&u8n, |
||
937 | s_fpart, |
||
938 | neighbors[0][0][0], |
||
939 | neighbors[0][0][1], |
||
940 | LP_BLD_LERP_PRESCALED_WEIGHTS); |
||
941 | } else if (dims == 2) { |
||
942 | /* 2-D lerp */ |
||
943 | packed = lp_build_lerp_2d(&u8n, |
||
944 | s_fpart, t_fpart, |
||
945 | neighbors[0][0][0], |
||
946 | neighbors[0][0][1], |
||
947 | neighbors[0][1][0], |
||
948 | neighbors[0][1][1], |
||
949 | LP_BLD_LERP_PRESCALED_WEIGHTS); |
||
950 | } else { |
||
951 | /* 3-D lerp */ |
||
952 | assert(dims == 3); |
||
953 | packed = lp_build_lerp_3d(&u8n, |
||
954 | s_fpart, t_fpart, r_fpart, |
||
955 | neighbors[0][0][0], |
||
956 | neighbors[0][0][1], |
||
957 | neighbors[0][1][0], |
||
958 | neighbors[0][1][1], |
||
959 | neighbors[1][0][0], |
||
960 | neighbors[1][0][1], |
||
961 | neighbors[1][1][0], |
||
962 | neighbors[1][1][1], |
||
963 | LP_BLD_LERP_PRESCALED_WEIGHTS); |
||
964 | } |
||
965 | } |
||
966 | |||
967 | *colors = packed; |
||
968 | } |
||
969 | |||
970 | /** |
||
971 | * Sample a single texture image with (bi-)(tri-)linear sampling. |
||
972 | * Return filtered color as two vectors of 16-bit fixed point values. |
||
973 | */ |
||
974 | static void |
||
975 | lp_build_sample_image_linear(struct lp_build_sample_context *bld, |
||
976 | LLVMValueRef int_size, |
||
977 | LLVMValueRef row_stride_vec, |
||
978 | LLVMValueRef img_stride_vec, |
||
979 | LLVMValueRef data_ptr, |
||
980 | LLVMValueRef mipoffsets, |
||
981 | LLVMValueRef s, |
||
982 | LLVMValueRef t, |
||
983 | LLVMValueRef r, |
||
984 | const LLVMValueRef *offsets, |
||
985 | LLVMValueRef *colors) |
||
986 | { |
||
987 | const unsigned dims = bld->dims; |
||
988 | LLVMBuilderRef builder = bld->gallivm->builder; |
||
989 | struct lp_build_context i32; |
||
990 | LLVMTypeRef i32_vec_type; |
||
991 | LLVMValueRef i32_c8, i32_c128, i32_c255; |
||
992 | LLVMValueRef width_vec, height_vec, depth_vec; |
||
993 | LLVMValueRef s_ipart, s_fpart, s_float; |
||
994 | LLVMValueRef t_ipart = NULL, t_fpart = NULL, t_float = NULL; |
||
995 | LLVMValueRef r_ipart = NULL, r_fpart = NULL, r_float = NULL; |
||
996 | LLVMValueRef x_stride, y_stride, z_stride; |
||
997 | LLVMValueRef x_offset0, x_offset1; |
||
998 | LLVMValueRef y_offset0, y_offset1; |
||
999 | LLVMValueRef z_offset0, z_offset1; |
||
1000 | LLVMValueRef offset[2][2][2]; /* [z][y][x] */ |
||
1001 | LLVMValueRef x_subcoord[2], y_subcoord[2], z_subcoord[2]; |
||
1002 | unsigned x, y, z; |
||
1003 | |||
1004 | lp_build_context_init(&i32, bld->gallivm, lp_type_int_vec(32, bld->vector_width)); |
||
1005 | |||
1006 | i32_vec_type = lp_build_vec_type(bld->gallivm, i32.type); |
||
1007 | |||
1008 | lp_build_extract_image_sizes(bld, |
||
1009 | &bld->int_size_bld, |
||
1010 | bld->int_coord_type, |
||
1011 | int_size, |
||
1012 | &width_vec, |
||
1013 | &height_vec, |
||
1014 | &depth_vec); |
||
1015 | |||
1016 | s_float = s; t_float = t; r_float = r; |
||
1017 | |||
1018 | if (bld->static_sampler_state->normalized_coords) { |
||
1019 | LLVMValueRef scaled_size; |
||
1020 | LLVMValueRef flt_size; |
||
1021 | |||
1022 | /* scale size by 256 (8 fractional bits) */ |
||
1023 | scaled_size = lp_build_shl_imm(&bld->int_size_bld, int_size, 8); |
||
1024 | |||
1025 | flt_size = lp_build_int_to_float(&bld->float_size_bld, scaled_size); |
||
1026 | |||
1027 | lp_build_unnormalized_coords(bld, flt_size, &s, &t, &r); |
||
1028 | } |
||
1029 | else { |
||
1030 | /* scale coords by 256 (8 fractional bits) */ |
||
1031 | s = lp_build_mul_imm(&bld->coord_bld, s, 256); |
||
1032 | if (dims >= 2) |
||
1033 | t = lp_build_mul_imm(&bld->coord_bld, t, 256); |
||
1034 | if (dims >= 3) |
||
1035 | r = lp_build_mul_imm(&bld->coord_bld, r, 256); |
||
1036 | } |
||
1037 | |||
1038 | /* convert float to int */ |
||
1039 | s = LLVMBuildFPToSI(builder, s, i32_vec_type, ""); |
||
1040 | if (dims >= 2) |
||
1041 | t = LLVMBuildFPToSI(builder, t, i32_vec_type, ""); |
||
1042 | if (dims >= 3) |
||
1043 | r = LLVMBuildFPToSI(builder, r, i32_vec_type, ""); |
||
1044 | |||
1045 | /* subtract 0.5 (add -128) */ |
||
1046 | i32_c128 = lp_build_const_int_vec(bld->gallivm, i32.type, -128); |
||
1047 | if (!bld->static_sampler_state->force_nearest_s) { |
||
1048 | s = LLVMBuildAdd(builder, s, i32_c128, ""); |
||
1049 | } |
||
1050 | if (dims >= 2 && !bld->static_sampler_state->force_nearest_t) { |
||
1051 | t = LLVMBuildAdd(builder, t, i32_c128, ""); |
||
1052 | } |
||
1053 | if (dims >= 3) { |
||
1054 | r = LLVMBuildAdd(builder, r, i32_c128, ""); |
||
1055 | } |
||
1056 | |||
1057 | /* compute floor (shift right 8) */ |
||
1058 | i32_c8 = lp_build_const_int_vec(bld->gallivm, i32.type, 8); |
||
1059 | s_ipart = LLVMBuildAShr(builder, s, i32_c8, ""); |
||
1060 | if (dims >= 2) |
||
1061 | t_ipart = LLVMBuildAShr(builder, t, i32_c8, ""); |
||
1062 | if (dims >= 3) |
||
1063 | r_ipart = LLVMBuildAShr(builder, r, i32_c8, ""); |
||
1064 | |||
1065 | /* add texel offsets */ |
||
1066 | if (offsets[0]) { |
||
1067 | s_ipart = lp_build_add(&i32, s_ipart, offsets[0]); |
||
1068 | if (dims >= 2) { |
||
1069 | t_ipart = lp_build_add(&i32, t_ipart, offsets[1]); |
||
1070 | if (dims >= 3) { |
||
1071 | r_ipart = lp_build_add(&i32, r_ipart, offsets[2]); |
||
1072 | } |
||
1073 | } |
||
1074 | } |
||
1075 | |||
1076 | /* compute fractional part (AND with 0xff) */ |
||
1077 | i32_c255 = lp_build_const_int_vec(bld->gallivm, i32.type, 255); |
||
1078 | s_fpart = LLVMBuildAnd(builder, s, i32_c255, ""); |
||
1079 | if (dims >= 2) |
||
1080 | t_fpart = LLVMBuildAnd(builder, t, i32_c255, ""); |
||
1081 | if (dims >= 3) |
||
1082 | r_fpart = LLVMBuildAnd(builder, r, i32_c255, ""); |
||
1083 | |||
1084 | /* get pixel, row and image strides */ |
||
1085 | x_stride = lp_build_const_vec(bld->gallivm, bld->int_coord_bld.type, |
||
1086 | bld->format_desc->block.bits/8); |
||
1087 | y_stride = row_stride_vec; |
||
1088 | z_stride = img_stride_vec; |
||
1089 | |||
1090 | /* do texcoord wrapping and compute texel offsets */ |
||
1091 | lp_build_sample_wrap_linear_int(bld, |
||
1092 | bld->format_desc->block.width, |
||
1093 | s_ipart, &s_fpart, s_float, |
||
1094 | width_vec, x_stride, offsets[0], |
||
1095 | bld->static_texture_state->pot_width, |
||
1096 | bld->static_sampler_state->wrap_s, |
||
1097 | &x_offset0, &x_offset1, |
||
1098 | &x_subcoord[0], &x_subcoord[1]); |
||
1099 | |||
1100 | /* add potential cube/array/mip offsets now as they are constant per pixel */ |
||
1101 | if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE || |
||
1102 | bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY || |
||
1103 | bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) { |
||
1104 | LLVMValueRef z_offset; |
||
1105 | z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec); |
||
1106 | /* The r coord is the cube face in [0,5] or array layer */ |
||
1107 | x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, z_offset); |
||
1108 | x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, z_offset); |
||
1109 | } |
||
1110 | if (mipoffsets) { |
||
1111 | x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, mipoffsets); |
||
1112 | x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, mipoffsets); |
||
1113 | } |
||
1114 | |||
1115 | for (z = 0; z < 2; z++) { |
||
1116 | for (y = 0; y < 2; y++) { |
||
1117 | offset[z][y][0] = x_offset0; |
||
1118 | offset[z][y][1] = x_offset1; |
||
1119 | } |
||
1120 | } |
||
1121 | |||
1122 | if (dims >= 2) { |
||
1123 | lp_build_sample_wrap_linear_int(bld, |
||
1124 | bld->format_desc->block.height, |
||
1125 | t_ipart, &t_fpart, t_float, |
||
1126 | height_vec, y_stride, offsets[1], |
||
1127 | bld->static_texture_state->pot_height, |
||
1128 | bld->static_sampler_state->wrap_t, |
||
1129 | &y_offset0, &y_offset1, |
||
1130 | &y_subcoord[0], &y_subcoord[1]); |
||
1131 | |||
1132 | for (z = 0; z < 2; z++) { |
||
1133 | for (x = 0; x < 2; x++) { |
||
1134 | offset[z][0][x] = lp_build_add(&bld->int_coord_bld, |
||
1135 | offset[z][0][x], y_offset0); |
||
1136 | offset[z][1][x] = lp_build_add(&bld->int_coord_bld, |
||
1137 | offset[z][1][x], y_offset1); |
||
1138 | } |
||
1139 | } |
||
1140 | } |
||
1141 | |||
1142 | if (dims >= 3) { |
||
1143 | lp_build_sample_wrap_linear_int(bld, |
||
1144 | bld->format_desc->block.height, |
||
1145 | r_ipart, &r_fpart, r_float, |
||
1146 | depth_vec, z_stride, offsets[2], |
||
1147 | bld->static_texture_state->pot_depth, |
||
1148 | bld->static_sampler_state->wrap_r, |
||
1149 | &z_offset0, &z_offset1, |
||
1150 | &z_subcoord[0], &z_subcoord[1]); |
||
1151 | for (y = 0; y < 2; y++) { |
||
1152 | for (x = 0; x < 2; x++) { |
||
1153 | offset[0][y][x] = lp_build_add(&bld->int_coord_bld, |
||
1154 | offset[0][y][x], z_offset0); |
||
1155 | offset[1][y][x] = lp_build_add(&bld->int_coord_bld, |
||
1156 | offset[1][y][x], z_offset1); |
||
1157 | } |
||
1158 | } |
||
1159 | } |
||
1160 | |||
1161 | lp_build_sample_fetch_image_linear(bld, data_ptr, offset, |
||
1162 | x_subcoord, y_subcoord, |
||
1163 | s_fpart, t_fpart, r_fpart, |
||
1164 | colors); |
||
1165 | } |
||
1166 | |||
1167 | |||
1168 | /** |
||
1169 | * Sample a single texture image with (bi-)(tri-)linear sampling. |
||
1170 | * Return filtered color as two vectors of 16-bit fixed point values. |
||
1171 | * Does address calcs (except offsets) with floats. |
||
1172 | * Useful for AVX which has support for 8x32 floats but not 8x32 ints. |
||
1173 | */ |
||
1174 | static void |
||
1175 | lp_build_sample_image_linear_afloat(struct lp_build_sample_context *bld, |
||
1176 | LLVMValueRef int_size, |
||
1177 | LLVMValueRef row_stride_vec, |
||
1178 | LLVMValueRef img_stride_vec, |
||
1179 | LLVMValueRef data_ptr, |
||
1180 | LLVMValueRef mipoffsets, |
||
1181 | LLVMValueRef s, |
||
1182 | LLVMValueRef t, |
||
1183 | LLVMValueRef r, |
||
1184 | const LLVMValueRef *offsets, |
||
1185 | LLVMValueRef *colors) |
||
1186 | { |
||
1187 | const unsigned dims = bld->dims; |
||
1188 | LLVMValueRef width_vec, height_vec, depth_vec; |
||
1189 | LLVMValueRef s_fpart; |
||
1190 | LLVMValueRef t_fpart = NULL; |
||
1191 | LLVMValueRef r_fpart = NULL; |
||
1192 | LLVMValueRef x_stride, y_stride, z_stride; |
||
1193 | LLVMValueRef x_offset0, x_offset1; |
||
1194 | LLVMValueRef y_offset0, y_offset1; |
||
1195 | LLVMValueRef z_offset0, z_offset1; |
||
1196 | LLVMValueRef offset[2][2][2]; /* [z][y][x] */ |
||
1197 | LLVMValueRef x_subcoord[2], y_subcoord[2]; |
||
1198 | LLVMValueRef flt_size; |
||
1199 | LLVMValueRef x_icoord0, x_icoord1; |
||
1200 | LLVMValueRef y_icoord0, y_icoord1; |
||
1201 | LLVMValueRef z_icoord0, z_icoord1; |
||
1202 | unsigned x, y, z; |
||
1203 | |||
1204 | flt_size = lp_build_int_to_float(&bld->float_size_bld, int_size); |
||
1205 | |||
1206 | lp_build_extract_image_sizes(bld, |
||
1207 | &bld->float_size_bld, |
||
1208 | bld->coord_type, |
||
1209 | flt_size, |
||
1210 | &width_vec, |
||
1211 | &height_vec, |
||
1212 | &depth_vec); |
||
1213 | |||
1214 | /* do texcoord wrapping and compute texel offsets */ |
||
1215 | lp_build_sample_wrap_linear_float(bld, |
||
1216 | bld->format_desc->block.width, |
||
1217 | s, width_vec, offsets[0], |
||
1218 | bld->static_texture_state->pot_width, |
||
1219 | bld->static_sampler_state->wrap_s, |
||
1220 | &x_icoord0, &x_icoord1, |
||
1221 | &s_fpart, |
||
1222 | bld->static_sampler_state->force_nearest_s); |
||
1223 | |||
1224 | if (dims >= 2) { |
||
1225 | lp_build_sample_wrap_linear_float(bld, |
||
1226 | bld->format_desc->block.height, |
||
1227 | t, height_vec, offsets[1], |
||
1228 | bld->static_texture_state->pot_height, |
||
1229 | bld->static_sampler_state->wrap_t, |
||
1230 | &y_icoord0, &y_icoord1, |
||
1231 | &t_fpart, |
||
1232 | bld->static_sampler_state->force_nearest_t); |
||
1233 | |||
1234 | if (dims >= 3) { |
||
1235 | lp_build_sample_wrap_linear_float(bld, |
||
1236 | bld->format_desc->block.height, |
||
1237 | r, depth_vec, offsets[2], |
||
1238 | bld->static_texture_state->pot_depth, |
||
1239 | bld->static_sampler_state->wrap_r, |
||
1240 | &z_icoord0, &z_icoord1, |
||
1241 | &r_fpart, 0); |
||
1242 | } |
||
1243 | } |
||
1244 | |||
1245 | /* |
||
1246 | * From here on we deal with ints, and we should split up the 256bit |
||
1247 | * vectors manually for better generated code. |
||
1248 | */ |
||
1249 | |||
1250 | /* get pixel, row and image strides */ |
||
1251 | x_stride = lp_build_const_vec(bld->gallivm, |
||
1252 | bld->int_coord_bld.type, |
||
1253 | bld->format_desc->block.bits/8); |
||
1254 | y_stride = row_stride_vec; |
||
1255 | z_stride = img_stride_vec; |
||
1256 | |||
1257 | /* |
||
1258 | * compute texel offset - |
||
1259 | * cannot do offset calc with floats, difficult for block-based formats, |
||
1260 | * and not enough precision anyway. |
||
1261 | */ |
||
1262 | lp_build_sample_partial_offset(&bld->int_coord_bld, |
||
1263 | bld->format_desc->block.width, |
||
1264 | x_icoord0, x_stride, |
||
1265 | &x_offset0, &x_subcoord[0]); |
||
1266 | lp_build_sample_partial_offset(&bld->int_coord_bld, |
||
1267 | bld->format_desc->block.width, |
||
1268 | x_icoord1, x_stride, |
||
1269 | &x_offset1, &x_subcoord[1]); |
||
1270 | |||
1271 | /* add potential cube/array/mip offsets now as they are constant per pixel */ |
||
1272 | if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE || |
||
1273 | bld->static_texture_state->target == PIPE_TEXTURE_1D_ARRAY || |
||
1274 | bld->static_texture_state->target == PIPE_TEXTURE_2D_ARRAY) { |
||
1275 | LLVMValueRef z_offset; |
||
1276 | z_offset = lp_build_mul(&bld->int_coord_bld, r, img_stride_vec); |
||
1277 | /* The r coord is the cube face in [0,5] or array layer */ |
||
1278 | x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, z_offset); |
||
1279 | x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, z_offset); |
||
1280 | } |
||
1281 | if (mipoffsets) { |
||
1282 | x_offset0 = lp_build_add(&bld->int_coord_bld, x_offset0, mipoffsets); |
||
1283 | x_offset1 = lp_build_add(&bld->int_coord_bld, x_offset1, mipoffsets); |
||
1284 | } |
||
1285 | |||
1286 | for (z = 0; z < 2; z++) { |
||
1287 | for (y = 0; y < 2; y++) { |
||
1288 | offset[z][y][0] = x_offset0; |
||
1289 | offset[z][y][1] = x_offset1; |
||
1290 | } |
||
1291 | } |
||
1292 | |||
1293 | if (dims >= 2) { |
||
1294 | lp_build_sample_partial_offset(&bld->int_coord_bld, |
||
1295 | bld->format_desc->block.height, |
||
1296 | y_icoord0, y_stride, |
||
1297 | &y_offset0, &y_subcoord[0]); |
||
1298 | lp_build_sample_partial_offset(&bld->int_coord_bld, |
||
1299 | bld->format_desc->block.height, |
||
1300 | y_icoord1, y_stride, |
||
1301 | &y_offset1, &y_subcoord[1]); |
||
1302 | for (z = 0; z < 2; z++) { |
||
1303 | for (x = 0; x < 2; x++) { |
||
1304 | offset[z][0][x] = lp_build_add(&bld->int_coord_bld, |
||
1305 | offset[z][0][x], y_offset0); |
||
1306 | offset[z][1][x] = lp_build_add(&bld->int_coord_bld, |
||
1307 | offset[z][1][x], y_offset1); |
||
1308 | } |
||
1309 | } |
||
1310 | } |
||
1311 | |||
1312 | if (dims >= 3) { |
||
1313 | LLVMValueRef z_subcoord[2]; |
||
1314 | lp_build_sample_partial_offset(&bld->int_coord_bld, |
||
1315 | 1, |
||
1316 | z_icoord0, z_stride, |
||
1317 | &z_offset0, &z_subcoord[0]); |
||
1318 | lp_build_sample_partial_offset(&bld->int_coord_bld, |
||
1319 | 1, |
||
1320 | z_icoord1, z_stride, |
||
1321 | &z_offset1, &z_subcoord[1]); |
||
1322 | for (y = 0; y < 2; y++) { |
||
1323 | for (x = 0; x < 2; x++) { |
||
1324 | offset[0][y][x] = lp_build_add(&bld->int_coord_bld, |
||
1325 | offset[0][y][x], z_offset0); |
||
1326 | offset[1][y][x] = lp_build_add(&bld->int_coord_bld, |
||
1327 | offset[1][y][x], z_offset1); |
||
1328 | } |
||
1329 | } |
||
1330 | } |
||
1331 | |||
1332 | lp_build_sample_fetch_image_linear(bld, data_ptr, offset, |
||
1333 | x_subcoord, y_subcoord, |
||
1334 | s_fpart, t_fpart, r_fpart, |
||
1335 | colors); |
||
1336 | } |
||
1337 | |||
1338 | |||
1339 | /** |
||
1340 | * Sample the texture/mipmap using given image filter and mip filter. |
||
1341 | * data0_ptr and data1_ptr point to the two mipmap levels to sample |
||
1342 | * from. width0/1_vec, height0/1_vec, depth0/1_vec indicate their sizes. |
||
1343 | * If we're using nearest miplevel sampling the '1' values will be null/unused. |
||
1344 | */ |
||
1345 | static void |
||
1346 | lp_build_sample_mipmap(struct lp_build_sample_context *bld, |
||
1347 | unsigned img_filter, |
||
1348 | unsigned mip_filter, |
||
1349 | LLVMValueRef s, |
||
1350 | LLVMValueRef t, |
||
1351 | LLVMValueRef r, |
||
1352 | const LLVMValueRef *offsets, |
||
1353 | LLVMValueRef ilevel0, |
||
1354 | LLVMValueRef ilevel1, |
||
1355 | LLVMValueRef lod_fpart, |
||
1356 | LLVMValueRef colors_var) |
||
1357 | { |
||
1358 | LLVMBuilderRef builder = bld->gallivm->builder; |
||
1359 | LLVMValueRef size0; |
||
1360 | LLVMValueRef size1; |
||
1361 | LLVMValueRef row_stride0_vec = NULL; |
||
1362 | LLVMValueRef row_stride1_vec = NULL; |
||
1363 | LLVMValueRef img_stride0_vec = NULL; |
||
1364 | LLVMValueRef img_stride1_vec = NULL; |
||
1365 | LLVMValueRef data_ptr0; |
||
1366 | LLVMValueRef data_ptr1; |
||
1367 | LLVMValueRef mipoff0 = NULL; |
||
1368 | LLVMValueRef mipoff1 = NULL; |
||
1369 | LLVMValueRef colors0; |
||
1370 | LLVMValueRef colors1; |
||
1371 | |||
1372 | /* sample the first mipmap level */ |
||
1373 | lp_build_mipmap_level_sizes(bld, ilevel0, |
||
1374 | &size0, |
||
1375 | &row_stride0_vec, &img_stride0_vec); |
||
1376 | if (bld->num_lods == 1) { |
||
1377 | data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0); |
||
1378 | } |
||
1379 | else { |
||
1380 | /* This path should work for num_lods 1 too but slightly less efficient */ |
||
1381 | data_ptr0 = bld->base_ptr; |
||
1382 | mipoff0 = lp_build_get_mip_offsets(bld, ilevel0); |
||
1383 | } |
||
1384 | |||
1385 | if (util_cpu_caps.has_avx && bld->coord_type.length > 4) { |
||
1386 | if (img_filter == PIPE_TEX_FILTER_NEAREST) { |
||
1387 | lp_build_sample_image_nearest_afloat(bld, |
||
1388 | size0, |
||
1389 | row_stride0_vec, img_stride0_vec, |
||
1390 | data_ptr0, mipoff0, s, t, r, offsets, |
||
1391 | &colors0); |
||
1392 | } |
||
1393 | else { |
||
1394 | assert(img_filter == PIPE_TEX_FILTER_LINEAR); |
||
1395 | lp_build_sample_image_linear_afloat(bld, |
||
1396 | size0, |
||
1397 | row_stride0_vec, img_stride0_vec, |
||
1398 | data_ptr0, mipoff0, s, t, r, offsets, |
||
1399 | &colors0); |
||
1400 | } |
||
1401 | } |
||
1402 | else { |
||
1403 | if (img_filter == PIPE_TEX_FILTER_NEAREST) { |
||
1404 | lp_build_sample_image_nearest(bld, |
||
1405 | size0, |
||
1406 | row_stride0_vec, img_stride0_vec, |
||
1407 | data_ptr0, mipoff0, s, t, r, offsets, |
||
1408 | &colors0); |
||
1409 | } |
||
1410 | else { |
||
1411 | assert(img_filter == PIPE_TEX_FILTER_LINEAR); |
||
1412 | lp_build_sample_image_linear(bld, |
||
1413 | size0, |
||
1414 | row_stride0_vec, img_stride0_vec, |
||
1415 | data_ptr0, mipoff0, s, t, r, offsets, |
||
1416 | &colors0); |
||
1417 | } |
||
1418 | } |
||
1419 | |||
1420 | /* Store the first level's colors in the output variables */ |
||
1421 | LLVMBuildStore(builder, colors0, colors_var); |
||
1422 | |||
1423 | if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) { |
||
1424 | LLVMValueRef h16vec_scale = lp_build_const_vec(bld->gallivm, |
||
1425 | bld->levelf_bld.type, 256.0); |
||
1426 | LLVMTypeRef i32vec_type = bld->leveli_bld.vec_type; |
||
1427 | struct lp_build_if_state if_ctx; |
||
1428 | LLVMValueRef need_lerp; |
||
1429 | unsigned num_quads = bld->coord_bld.type.length / 4; |
||
1430 | unsigned i; |
||
1431 | |||
1432 | lod_fpart = LLVMBuildFMul(builder, lod_fpart, h16vec_scale, ""); |
||
1433 | lod_fpart = LLVMBuildFPToSI(builder, lod_fpart, i32vec_type, "lod_fpart.fixed16"); |
||
1434 | |||
1435 | /* need_lerp = lod_fpart > 0 */ |
||
1436 | if (bld->num_lods == 1) { |
||
1437 | need_lerp = LLVMBuildICmp(builder, LLVMIntSGT, |
||
1438 | lod_fpart, bld->leveli_bld.zero, |
||
1439 | "need_lerp"); |
||
1440 | } |
||
1441 | else { |
||
1442 | /* |
||
1443 | * We'll do mip filtering if any of the quads need it. |
||
1444 | * It might be better to split the vectors here and only fetch/filter |
||
1445 | * quads which need it. |
||
1446 | */ |
||
1447 | /* |
||
1448 | * We need to clamp lod_fpart here since we can get negative |
||
1449 | * values which would screw up filtering if not all |
||
1450 | * lod_fpart values have same sign. |
||
1451 | * We can however then skip the greater than comparison. |
||
1452 | */ |
||
1453 | lod_fpart = lp_build_max(&bld->leveli_bld, lod_fpart, |
||
1454 | bld->leveli_bld.zero); |
||
1455 | need_lerp = lp_build_any_true_range(&bld->leveli_bld, bld->num_lods, lod_fpart); |
||
1456 | } |
||
1457 | |||
1458 | lp_build_if(&if_ctx, bld->gallivm, need_lerp); |
||
1459 | { |
||
1460 | struct lp_build_context u8n_bld; |
||
1461 | |||
1462 | lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width)); |
||
1463 | |||
1464 | /* sample the second mipmap level */ |
||
1465 | lp_build_mipmap_level_sizes(bld, ilevel1, |
||
1466 | &size1, |
||
1467 | &row_stride1_vec, &img_stride1_vec); |
||
1468 | if (bld->num_lods == 1) { |
||
1469 | data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1); |
||
1470 | } |
||
1471 | else { |
||
1472 | data_ptr1 = bld->base_ptr; |
||
1473 | mipoff1 = lp_build_get_mip_offsets(bld, ilevel1); |
||
1474 | } |
||
1475 | |||
1476 | if (util_cpu_caps.has_avx && bld->coord_type.length > 4) { |
||
1477 | if (img_filter == PIPE_TEX_FILTER_NEAREST) { |
||
1478 | lp_build_sample_image_nearest_afloat(bld, |
||
1479 | size1, |
||
1480 | row_stride1_vec, img_stride1_vec, |
||
1481 | data_ptr1, mipoff1, s, t, r, offsets, |
||
1482 | &colors1); |
||
1483 | } |
||
1484 | else { |
||
1485 | lp_build_sample_image_linear_afloat(bld, |
||
1486 | size1, |
||
1487 | row_stride1_vec, img_stride1_vec, |
||
1488 | data_ptr1, mipoff1, s, t, r, offsets, |
||
1489 | &colors1); |
||
1490 | } |
||
1491 | } |
||
1492 | else { |
||
1493 | if (img_filter == PIPE_TEX_FILTER_NEAREST) { |
||
1494 | lp_build_sample_image_nearest(bld, |
||
1495 | size1, |
||
1496 | row_stride1_vec, img_stride1_vec, |
||
1497 | data_ptr1, mipoff1, s, t, r, offsets, |
||
1498 | &colors1); |
||
1499 | } |
||
1500 | else { |
||
1501 | lp_build_sample_image_linear(bld, |
||
1502 | size1, |
||
1503 | row_stride1_vec, img_stride1_vec, |
||
1504 | data_ptr1, mipoff1, s, t, r, offsets, |
||
1505 | &colors1); |
||
1506 | } |
||
1507 | } |
||
1508 | |||
1509 | /* interpolate samples from the two mipmap levels */ |
||
1510 | |||
1511 | if (num_quads == 1 && bld->num_lods == 1) { |
||
1512 | lod_fpart = LLVMBuildTrunc(builder, lod_fpart, u8n_bld.elem_type, ""); |
||
1513 | lod_fpart = lp_build_broadcast_scalar(&u8n_bld, lod_fpart); |
||
1514 | |||
1515 | #if HAVE_LLVM == 0x208 |
||
1516 | /* This was a work-around for a bug in LLVM 2.8. |
||
1517 | * Evidently, something goes wrong in the construction of the |
||
1518 | * lod_fpart short[8] vector. Adding this no-effect shuffle seems |
||
1519 | * to force the vector to be properly constructed. |
||
1520 | * Tested with mesa-demos/src/tests/mipmap_limits.c (press t, f). |
||
1521 | */ |
||
1522 | #error Unsupported |
||
1523 | #endif |
||
1524 | } |
||
1525 | else { |
||
1526 | unsigned num_chans_per_lod = 4 * bld->coord_type.length / bld->num_lods; |
||
1527 | LLVMTypeRef tmp_vec_type = LLVMVectorType(u8n_bld.elem_type, bld->leveli_bld.type.length); |
||
1528 | LLVMValueRef shuffle[LP_MAX_VECTOR_LENGTH]; |
||
1529 | |||
1530 | /* Take the LSB of lod_fpart */ |
||
1531 | lod_fpart = LLVMBuildTrunc(builder, lod_fpart, tmp_vec_type, ""); |
||
1532 | |||
1533 | /* Broadcast each lod weight into their respective channels */ |
||
1534 | for (i = 0; i < u8n_bld.type.length; ++i) { |
||
1535 | shuffle[i] = lp_build_const_int32(bld->gallivm, i / num_chans_per_lod); |
||
1536 | } |
||
1537 | lod_fpart = LLVMBuildShuffleVector(builder, lod_fpart, LLVMGetUndef(tmp_vec_type), |
||
1538 | LLVMConstVector(shuffle, u8n_bld.type.length), ""); |
||
1539 | } |
||
1540 | |||
1541 | colors0 = lp_build_lerp(&u8n_bld, lod_fpart, |
||
1542 | colors0, colors1, |
||
1543 | LP_BLD_LERP_PRESCALED_WEIGHTS); |
||
1544 | |||
1545 | LLVMBuildStore(builder, colors0, colors_var); |
||
1546 | } |
||
1547 | lp_build_endif(&if_ctx); |
||
1548 | } |
||
1549 | } |
||
1550 | |||
1551 | |||
1552 | |||
1553 | /** |
||
1554 | * Texture sampling in AoS format. Used when sampling common 32-bit/texel |
||
1555 | * formats. 1D/2D/3D/cube texture supported. All mipmap sampling modes |
||
1556 | * but only limited texture coord wrap modes. |
||
1557 | */ |
||
1558 | void |
||
1559 | lp_build_sample_aos(struct lp_build_sample_context *bld, |
||
1560 | unsigned sampler_unit, |
||
1561 | LLVMValueRef s, |
||
1562 | LLVMValueRef t, |
||
1563 | LLVMValueRef r, |
||
1564 | const LLVMValueRef *offsets, |
||
1565 | LLVMValueRef lod_ipart, |
||
1566 | LLVMValueRef lod_fpart, |
||
1567 | LLVMValueRef ilevel0, |
||
1568 | LLVMValueRef ilevel1, |
||
1569 | LLVMValueRef texel_out[4]) |
||
1570 | { |
||
1571 | struct lp_build_context *int_bld = &bld->int_bld; |
||
1572 | LLVMBuilderRef builder = bld->gallivm->builder; |
||
1573 | const unsigned mip_filter = bld->static_sampler_state->min_mip_filter; |
||
1574 | const unsigned min_filter = bld->static_sampler_state->min_img_filter; |
||
1575 | const unsigned mag_filter = bld->static_sampler_state->mag_img_filter; |
||
1576 | const unsigned dims = bld->dims; |
||
1577 | LLVMValueRef packed_var, packed; |
||
1578 | LLVMValueRef unswizzled[4]; |
||
1579 | struct lp_build_context u8n_bld; |
||
1580 | |||
1581 | /* we only support the common/simple wrap modes at this time */ |
||
1582 | assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_s)); |
||
1583 | if (dims >= 2) |
||
1584 | assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_t)); |
||
1585 | if (dims >= 3) |
||
1586 | assert(lp_is_simple_wrap_mode(bld->static_sampler_state->wrap_r)); |
||
1587 | |||
1588 | |||
1589 | /* make 8-bit unorm builder context */ |
||
1590 | lp_build_context_init(&u8n_bld, bld->gallivm, lp_type_unorm(8, bld->vector_width)); |
||
1591 | |||
1592 | /* |
||
1593 | * Get/interpolate texture colors. |
||
1594 | */ |
||
1595 | |||
1596 | packed_var = lp_build_alloca(bld->gallivm, u8n_bld.vec_type, "packed_var"); |
||
1597 | |||
1598 | if (min_filter == mag_filter) { |
||
1599 | /* no need to distinguish between minification and magnification */ |
||
1600 | lp_build_sample_mipmap(bld, |
||
1601 | min_filter, mip_filter, |
||
1602 | s, t, r, offsets, |
||
1603 | ilevel0, ilevel1, lod_fpart, |
||
1604 | packed_var); |
||
1605 | } |
||
1606 | else { |
||
1607 | /* Emit conditional to choose min image filter or mag image filter |
||
1608 | * depending on the lod being > 0 or <= 0, respectively. |
||
1609 | */ |
||
1610 | struct lp_build_if_state if_ctx; |
||
1611 | LLVMValueRef minify; |
||
1612 | |||
1613 | /* |
||
1614 | * XXX this should to all lods into account, if some are min |
||
1615 | * some max probably could hack up the coords/weights in the linear |
||
1616 | * path with selects to work for nearest. |
||
1617 | * If that's just two quads sitting next to each other it seems |
||
1618 | * quite ok to do the same filtering method on both though, at |
||
1619 | * least unless we have explicit lod (and who uses different |
||
1620 | * min/mag filter with that?) |
||
1621 | */ |
||
1622 | if (bld->num_lods > 1) |
||
1623 | lod_ipart = LLVMBuildExtractElement(builder, lod_ipart, |
||
1624 | lp_build_const_int32(bld->gallivm, 0), ""); |
||
1625 | |||
1626 | /* minify = lod >= 0.0 */ |
||
1627 | minify = LLVMBuildICmp(builder, LLVMIntSGE, |
||
1628 | lod_ipart, int_bld->zero, ""); |
||
1629 | |||
1630 | lp_build_if(&if_ctx, bld->gallivm, minify); |
||
1631 | { |
||
1632 | /* Use the minification filter */ |
||
1633 | lp_build_sample_mipmap(bld, |
||
1634 | min_filter, mip_filter, |
||
1635 | s, t, r, offsets, |
||
1636 | ilevel0, ilevel1, lod_fpart, |
||
1637 | packed_var); |
||
1638 | } |
||
1639 | lp_build_else(&if_ctx); |
||
1640 | { |
||
1641 | /* Use the magnification filter */ |
||
1642 | lp_build_sample_mipmap(bld, |
||
1643 | mag_filter, PIPE_TEX_MIPFILTER_NONE, |
||
1644 | s, t, r, offsets, |
||
1645 | ilevel0, NULL, NULL, |
||
1646 | packed_var); |
||
1647 | } |
||
1648 | lp_build_endif(&if_ctx); |
||
1649 | } |
||
1650 | |||
1651 | packed = LLVMBuildLoad(builder, packed_var, ""); |
||
1652 | |||
1653 | /* |
||
1654 | * Convert to SoA and swizzle. |
||
1655 | */ |
||
1656 | lp_build_rgba8_to_fi32_soa(bld->gallivm, |
||
1657 | bld->texel_type, |
||
1658 | packed, unswizzled); |
||
1659 | |||
1660 | if (util_format_is_rgba8_variant(bld->format_desc)) { |
||
1661 | lp_build_format_swizzle_soa(bld->format_desc, |
||
1662 | &bld->texel_bld, |
||
1663 | unswizzled, texel_out); |
||
1664 | } |
||
1665 | else { |
||
1666 | texel_out[0] = unswizzled[0]; |
||
1667 | texel_out[1] = unswizzled[1]; |
||
1668 | texel_out[2] = unswizzled[2]; |
||
1669 | texel_out[3] = unswizzled[3]; |
||
1670 | } |
||
1671 | }=>>>>>>>>>>>>>>>>>>> |