Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Details | Last modification | View Log | RSS feed

Rev Author Line No. Line
5564 serge 1
/**************************************************************************
2
 *
3
 * Copyright 2009 VMware, Inc.
4
 * All Rights Reserved.
5
 *
6
 * Permission is hereby granted, free of charge, to any person obtaining a
7
 * copy of this software and associated documentation files (the
8
 * "Software"), to deal in the Software without restriction, including
9
 * without limitation the rights to use, copy, modify, merge, publish,
10
 * distribute, sub license, and/or sell copies of the Software, and to
11
 * permit persons to whom the Software is furnished to do so, subject to
12
 * the following conditions:
13
 *
14
 * The above copyright notice and this permission notice (including the
15
 * next paragraph) shall be included in all copies or substantial portions
16
 * of the Software.
17
 *
18
 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
19
 * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
20
 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
21
 * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
22
 * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
23
 * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
24
 * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
25
 *
26
 **************************************************************************/
27
 
28
/**
29
 * @file
30
 * Texture sampling -- SoA.
31
 *
32
 * @author Jose Fonseca 
33
 * @author Brian Paul 
34
 */
35
 
36
#include "pipe/p_defines.h"
37
#include "pipe/p_state.h"
38
#include "pipe/p_shader_tokens.h"
39
#include "util/u_debug.h"
40
#include "util/u_dump.h"
41
#include "util/u_memory.h"
42
#include "util/u_math.h"
43
#include "util/u_format.h"
44
#include "util/u_cpu_detect.h"
45
#include "util/u_format_rgb9e5.h"
46
#include "lp_bld_debug.h"
47
#include "lp_bld_type.h"
48
#include "lp_bld_const.h"
49
#include "lp_bld_conv.h"
50
#include "lp_bld_arit.h"
51
#include "lp_bld_bitarit.h"
52
#include "lp_bld_logic.h"
53
#include "lp_bld_printf.h"
54
#include "lp_bld_swizzle.h"
55
#include "lp_bld_flow.h"
56
#include "lp_bld_gather.h"
57
#include "lp_bld_format.h"
58
#include "lp_bld_sample.h"
59
#include "lp_bld_sample_aos.h"
60
#include "lp_bld_struct.h"
61
#include "lp_bld_quad.h"
62
#include "lp_bld_pack.h"
63
 
64
 
65
/**
66
 * Generate code to fetch a texel from a texture at int coords (x, y, z).
67
 * The computation depends on whether the texture is 1D, 2D or 3D.
68
 * The result, texel, will be float vectors:
69
 *   texel[0] = red values
70
 *   texel[1] = green values
71
 *   texel[2] = blue values
72
 *   texel[3] = alpha values
73
 */
74
static void
75
lp_build_sample_texel_soa(struct lp_build_sample_context *bld,
76
                          LLVMValueRef width,
77
                          LLVMValueRef height,
78
                          LLVMValueRef depth,
79
                          LLVMValueRef x,
80
                          LLVMValueRef y,
81
                          LLVMValueRef z,
82
                          LLVMValueRef y_stride,
83
                          LLVMValueRef z_stride,
84
                          LLVMValueRef data_ptr,
85
                          LLVMValueRef mipoffsets,
86
                          LLVMValueRef texel_out[4])
87
{
88
   const struct lp_static_sampler_state *static_state = bld->static_sampler_state;
89
   const unsigned dims = bld->dims;
90
   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
91
   LLVMBuilderRef builder = bld->gallivm->builder;
92
   LLVMValueRef offset;
93
   LLVMValueRef i, j;
94
   LLVMValueRef use_border = NULL;
95
 
96
   /* use_border = x < 0 || x >= width || y < 0 || y >= height */
97
   if (lp_sampler_wrap_mode_uses_border_color(static_state->wrap_s,
98
                                              static_state->min_img_filter,
99
                                              static_state->mag_img_filter)) {
100
      LLVMValueRef b1, b2;
101
      b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
102
      b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
103
      use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
104
   }
105
 
106
   if (dims >= 2 &&
107
       lp_sampler_wrap_mode_uses_border_color(static_state->wrap_t,
108
                                              static_state->min_img_filter,
109
                                              static_state->mag_img_filter)) {
110
      LLVMValueRef b1, b2;
111
      b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
112
      b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
113
      if (use_border) {
114
         use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
115
         use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
116
      }
117
      else {
118
         use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
119
      }
120
   }
121
 
122
   if (dims == 3 &&
123
       lp_sampler_wrap_mode_uses_border_color(static_state->wrap_r,
124
                                              static_state->min_img_filter,
125
                                              static_state->mag_img_filter)) {
126
      LLVMValueRef b1, b2;
127
      b1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
128
      b2 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
129
      if (use_border) {
130
         use_border = LLVMBuildOr(builder, use_border, b1, "ub_or_b1");
131
         use_border = LLVMBuildOr(builder, use_border, b2, "ub_or_b2");
132
      }
133
      else {
134
         use_border = LLVMBuildOr(builder, b1, b2, "b1_or_b2");
135
      }
136
   }
137
 
138
   /* convert x,y,z coords to linear offset from start of texture, in bytes */
139
   lp_build_sample_offset(&bld->int_coord_bld,
140
                          bld->format_desc,
141
                          x, y, z, y_stride, z_stride,
142
                          &offset, &i, &j);
143
   if (mipoffsets) {
144
      offset = lp_build_add(&bld->int_coord_bld, offset, mipoffsets);
145
   }
146
 
147
   if (use_border) {
148
      /* If we can sample the border color, it means that texcoords may
149
       * lie outside the bounds of the texture image.  We need to do
150
       * something to prevent reading out of bounds and causing a segfault.
151
       *
152
       * Simply AND the texture coords with !use_border.  This will cause
153
       * coords which are out of bounds to become zero.  Zero's guaranteed
154
       * to be inside the texture image.
155
       */
156
      offset = lp_build_andnot(&bld->int_coord_bld, offset, use_border);
157
   }
158
 
159
   lp_build_fetch_rgba_soa(bld->gallivm,
160
                           bld->format_desc,
161
                           bld->texel_type,
162
                           data_ptr, offset,
163
                           i, j,
164
                           texel_out);
165
 
166
   /*
167
    * Note: if we find an app which frequently samples the texture border
168
    * we might want to implement a true conditional here to avoid sampling
169
    * the texture whenever possible (since that's quite a bit of code).
170
    * Ex:
171
    *   if (use_border) {
172
    *      texel = border_color;
173
    *   }
174
    *   else {
175
    *      texel = sample_texture(coord);
176
    *   }
177
    * As it is now, we always sample the texture, then selectively replace
178
    * the texel color results with the border color.
179
    */
180
 
181
   if (use_border) {
182
      /* select texel color or border color depending on use_border. */
183
      const struct util_format_description *format_desc = bld->format_desc;
184
      int chan;
185
      struct lp_type border_type = bld->texel_type;
186
      border_type.length = 4;
187
      /*
188
       * Only replace channels which are actually present. The others should
189
       * get optimized away eventually by sampler_view swizzle anyway but it's
190
       * easier too.
191
       */
192
      for (chan = 0; chan < 4; chan++) {
193
         unsigned chan_s;
194
         /* reverse-map channel... */
195
         for (chan_s = 0; chan_s < 4; chan_s++) {
196
            if (chan_s == format_desc->swizzle[chan]) {
197
               break;
198
            }
199
         }
200
         if (chan_s <= 3) {
201
            /* use the already clamped color */
202
            LLVMValueRef idx = lp_build_const_int32(bld->gallivm, chan);
203
            LLVMValueRef border_chan;
204
 
205
            border_chan = lp_build_extract_broadcast(bld->gallivm,
206
                                                     border_type,
207
                                                     bld->texel_type,
208
                                                     bld->border_color_clamped,
209
                                                     idx);
210
            texel_out[chan] = lp_build_select(&bld->texel_bld, use_border,
211
                                              border_chan, texel_out[chan]);
212
         }
213
      }
214
   }
215
}
216
 
217
 
218
/**
219
 * Helper to compute the mirror function for the PIPE_WRAP_MIRROR modes.
220
 */
221
static LLVMValueRef
222
lp_build_coord_mirror(struct lp_build_sample_context *bld,
223
                      LLVMValueRef coord)
224
{
225
   struct lp_build_context *coord_bld = &bld->coord_bld;
226
   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
227
   LLVMValueRef fract, flr, isOdd;
228
 
229
   lp_build_ifloor_fract(coord_bld, coord, &flr, &fract);
230
 
231
   /* isOdd = flr & 1 */
232
   isOdd = LLVMBuildAnd(bld->gallivm->builder, flr, int_coord_bld->one, "");
233
 
234
   /* make coord positive or negative depending on isOdd */
235
   coord = lp_build_set_sign(coord_bld, fract, isOdd);
236
 
237
   /* convert isOdd to float */
238
   isOdd = lp_build_int_to_float(coord_bld, isOdd);
239
 
240
   /* add isOdd to coord */
241
   coord = lp_build_add(coord_bld, coord, isOdd);
242
 
243
   return coord;
244
}
245
 
246
 
247
/**
248
 * Helper to compute the first coord and the weight for
249
 * linear wrap repeat npot textures
250
 */
251
void
252
lp_build_coord_repeat_npot_linear(struct lp_build_sample_context *bld,
253
                                  LLVMValueRef coord_f,
254
                                  LLVMValueRef length_i,
255
                                  LLVMValueRef length_f,
256
                                  LLVMValueRef *coord0_i,
257
                                  LLVMValueRef *weight_f)
258
{
259
   struct lp_build_context *coord_bld = &bld->coord_bld;
260
   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
261
   LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
262
   LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length_i,
263
                                                int_coord_bld->one);
264
   LLVMValueRef mask;
265
   /* wrap with normalized floats is just fract */
266
   coord_f = lp_build_fract(coord_bld, coord_f);
267
   /* mul by size and subtract 0.5 */
268
   coord_f = lp_build_mul(coord_bld, coord_f, length_f);
269
   coord_f = lp_build_sub(coord_bld, coord_f, half);
270
   /*
271
    * we avoided the 0.5/length division before the repeat wrap,
272
    * now need to fix up edge cases with selects
273
    */
274
   /* convert to int, compute lerp weight */
275
   lp_build_ifloor_fract(coord_bld, coord_f, coord0_i, weight_f);
276
   mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
277
                           PIPE_FUNC_LESS, *coord0_i, int_coord_bld->zero);
278
   *coord0_i = lp_build_select(int_coord_bld, mask, length_minus_one, *coord0_i);
279
}
280
 
281
 
282
/**
283
 * Build LLVM code for texture wrap mode for linear filtering.
284
 * \param x0_out  returns first integer texcoord
285
 * \param x1_out  returns second integer texcoord
286
 * \param weight_out  returns linear interpolation weight
287
 */
288
static void
289
lp_build_sample_wrap_linear(struct lp_build_sample_context *bld,
290
                            LLVMValueRef coord,
291
                            LLVMValueRef length,
292
                            LLVMValueRef length_f,
293
                            LLVMValueRef offset,
294
                            boolean is_pot,
295
                            unsigned wrap_mode,
296
                            LLVMValueRef *x0_out,
297
                            LLVMValueRef *x1_out,
298
                            LLVMValueRef *weight_out)
299
{
300
   struct lp_build_context *coord_bld = &bld->coord_bld;
301
   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
302
   LLVMBuilderRef builder = bld->gallivm->builder;
303
   LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5);
304
   LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
305
   LLVMValueRef coord0, coord1, weight;
306
 
307
   switch(wrap_mode) {
308
   case PIPE_TEX_WRAP_REPEAT:
309
      if (is_pot) {
310
         /* mul by size and subtract 0.5 */
311
         coord = lp_build_mul(coord_bld, coord, length_f);
312
         coord = lp_build_sub(coord_bld, coord, half);
313
         if (offset) {
314
            offset = lp_build_int_to_float(coord_bld, offset);
315
            coord = lp_build_add(coord_bld, coord, offset);
316
         }
317
         /* convert to int, compute lerp weight */
318
         lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
319
         coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
320
         /* repeat wrap */
321
         coord0 = LLVMBuildAnd(builder, coord0, length_minus_one, "");
322
         coord1 = LLVMBuildAnd(builder, coord1, length_minus_one, "");
323
      }
324
      else {
325
         LLVMValueRef mask;
326
         if (offset) {
327
            offset = lp_build_int_to_float(coord_bld, offset);
328
            offset = lp_build_div(coord_bld, offset, length_f);
329
            coord = lp_build_add(coord_bld, coord, offset);
330
         }
331
         lp_build_coord_repeat_npot_linear(bld, coord,
332
                                           length, length_f,
333
                                           &coord0, &weight);
334
         mask = lp_build_compare(int_coord_bld->gallivm, int_coord_bld->type,
335
                                 PIPE_FUNC_NOTEQUAL, coord0, length_minus_one);
336
         coord1 = LLVMBuildAnd(builder,
337
                               lp_build_add(int_coord_bld, coord0, int_coord_bld->one),
338
                               mask, "");
339
      }
340
      break;
341
 
342
   case PIPE_TEX_WRAP_CLAMP:
343
      if (bld->static_sampler_state->normalized_coords) {
344
         /* scale coord to length */
345
         coord = lp_build_mul(coord_bld, coord, length_f);
346
      }
347
      if (offset) {
348
         offset = lp_build_int_to_float(coord_bld, offset);
349
         coord = lp_build_add(coord_bld, coord, offset);
350
      }
351
 
352
      /* clamp to [0, length] */
353
      coord = lp_build_clamp(coord_bld, coord, coord_bld->zero, length_f);
354
 
355
      coord = lp_build_sub(coord_bld, coord, half);
356
 
357
      /* convert to int, compute lerp weight */
358
      lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
359
      coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
360
      break;
361
 
362
   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
363
      {
364
         struct lp_build_context abs_coord_bld = bld->coord_bld;
365
         abs_coord_bld.type.sign = FALSE;
366
 
367
         if (bld->static_sampler_state->normalized_coords) {
368
            /* mul by tex size */
369
            coord = lp_build_mul(coord_bld, coord, length_f);
370
         }
371
         if (offset) {
372
            offset = lp_build_int_to_float(coord_bld, offset);
373
            coord = lp_build_add(coord_bld, coord, offset);
374
         }
375
 
376
         /* clamp to length max */
377
         coord = lp_build_min(coord_bld, coord, length_f);
378
         /* subtract 0.5 */
379
         coord = lp_build_sub(coord_bld, coord, half);
380
         /* clamp to [0, length - 0.5] */
381
         coord = lp_build_max(coord_bld, coord, coord_bld->zero);
382
         /* convert to int, compute lerp weight */
383
         lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
384
         coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
385
         /* coord1 = min(coord1, length-1) */
386
         coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
387
         break;
388
      }
389
 
390
   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
391
      if (bld->static_sampler_state->normalized_coords) {
392
         /* scale coord to length */
393
         coord = lp_build_mul(coord_bld, coord, length_f);
394
      }
395
      if (offset) {
396
         offset = lp_build_int_to_float(coord_bld, offset);
397
         coord = lp_build_add(coord_bld, coord, offset);
398
      }
399
      /* was: clamp to [-0.5, length + 0.5], then sub 0.5 */
400
      /* can skip clamp (though might not work for very large coord values */
401
      coord = lp_build_sub(coord_bld, coord, half);
402
      /* convert to int, compute lerp weight */
403
      lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
404
      coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
405
      break;
406
 
407
   case PIPE_TEX_WRAP_MIRROR_REPEAT:
408
      /* compute mirror function */
409
      coord = lp_build_coord_mirror(bld, coord);
410
 
411
      /* scale coord to length */
412
      coord = lp_build_mul(coord_bld, coord, length_f);
413
      coord = lp_build_sub(coord_bld, coord, half);
414
      if (offset) {
415
         offset = lp_build_int_to_float(coord_bld, offset);
416
         coord = lp_build_add(coord_bld, coord, offset);
417
      }
418
 
419
      /* convert to int, compute lerp weight */
420
      lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
421
      coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
422
 
423
      /* coord0 = max(coord0, 0) */
424
      coord0 = lp_build_max(int_coord_bld, coord0, int_coord_bld->zero);
425
      /* coord1 = min(coord1, length-1) */
426
      coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
427
      break;
428
 
429
   case PIPE_TEX_WRAP_MIRROR_CLAMP:
430
      if (bld->static_sampler_state->normalized_coords) {
431
         /* scale coord to length */
432
         coord = lp_build_mul(coord_bld, coord, length_f);
433
      }
434
      if (offset) {
435
         offset = lp_build_int_to_float(coord_bld, offset);
436
         coord = lp_build_add(coord_bld, coord, offset);
437
      }
438
      coord = lp_build_abs(coord_bld, coord);
439
 
440
      /* clamp to [0, length] */
441
      coord = lp_build_min(coord_bld, coord, length_f);
442
 
443
      coord = lp_build_sub(coord_bld, coord, half);
444
 
445
      /* convert to int, compute lerp weight */
446
      lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
447
      coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
448
      break;
449
 
450
   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
451
      {
452
         struct lp_build_context abs_coord_bld = bld->coord_bld;
453
         abs_coord_bld.type.sign = FALSE;
454
 
455
         if (bld->static_sampler_state->normalized_coords) {
456
            /* scale coord to length */
457
            coord = lp_build_mul(coord_bld, coord, length_f);
458
         }
459
         if (offset) {
460
            offset = lp_build_int_to_float(coord_bld, offset);
461
            coord = lp_build_add(coord_bld, coord, offset);
462
         }
463
         coord = lp_build_abs(coord_bld, coord);
464
 
465
         /* clamp to length max */
466
         coord = lp_build_min(coord_bld, coord, length_f);
467
         /* subtract 0.5 */
468
         coord = lp_build_sub(coord_bld, coord, half);
469
         /* clamp to [0, length - 0.5] */
470
         coord = lp_build_max(coord_bld, coord, coord_bld->zero);
471
 
472
         /* convert to int, compute lerp weight */
473
         lp_build_ifloor_fract(&abs_coord_bld, coord, &coord0, &weight);
474
         coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
475
         /* coord1 = min(coord1, length-1) */
476
         coord1 = lp_build_min(int_coord_bld, coord1, length_minus_one);
477
      }
478
      break;
479
 
480
   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
481
      {
482
         if (bld->static_sampler_state->normalized_coords) {
483
            /* scale coord to length */
484
            coord = lp_build_mul(coord_bld, coord, length_f);
485
         }
486
         if (offset) {
487
            offset = lp_build_int_to_float(coord_bld, offset);
488
            coord = lp_build_add(coord_bld, coord, offset);
489
         }
490
         coord = lp_build_abs(coord_bld, coord);
491
 
492
         /* was: clamp to [-0.5, length + 0.5] then sub 0.5 */
493
         /* skip clamp - always positive, and other side
494
            only potentially matters for very large coords */
495
         coord = lp_build_sub(coord_bld, coord, half);
496
 
497
         /* convert to int, compute lerp weight */
498
         lp_build_ifloor_fract(coord_bld, coord, &coord0, &weight);
499
         coord1 = lp_build_add(int_coord_bld, coord0, int_coord_bld->one);
500
      }
501
      break;
502
 
503
   default:
504
      assert(0);
505
      coord0 = NULL;
506
      coord1 = NULL;
507
      weight = NULL;
508
   }
509
 
510
   *x0_out = coord0;
511
   *x1_out = coord1;
512
   *weight_out = weight;
513
}
514
 
515
 
516
/**
517
 * Build LLVM code for texture wrap mode for nearest filtering.
518
 * \param coord  the incoming texcoord (nominally in [0,1])
519
 * \param length  the texture size along one dimension, as int vector
520
 * \param length_f  the texture size along one dimension, as float vector
521
 * \param offset  texel offset along one dimension (as int vector)
522
 * \param is_pot  if TRUE, length is a power of two
523
 * \param wrap_mode  one of PIPE_TEX_WRAP_x
524
 */
525
static LLVMValueRef
526
lp_build_sample_wrap_nearest(struct lp_build_sample_context *bld,
527
                             LLVMValueRef coord,
528
                             LLVMValueRef length,
529
                             LLVMValueRef length_f,
530
                             LLVMValueRef offset,
531
                             boolean is_pot,
532
                             unsigned wrap_mode)
533
{
534
   struct lp_build_context *coord_bld = &bld->coord_bld;
535
   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
536
   LLVMBuilderRef builder = bld->gallivm->builder;
537
   LLVMValueRef length_minus_one = lp_build_sub(int_coord_bld, length, int_coord_bld->one);
538
   LLVMValueRef icoord;
539
 
540
   switch(wrap_mode) {
541
   case PIPE_TEX_WRAP_REPEAT:
542
      if (is_pot) {
543
         coord = lp_build_mul(coord_bld, coord, length_f);
544
         icoord = lp_build_ifloor(coord_bld, coord);
545
         if (offset) {
546
            icoord = lp_build_add(int_coord_bld, icoord, offset);
547
         }
548
         icoord = LLVMBuildAnd(builder, icoord, length_minus_one, "");
549
      }
550
      else {
551
          if (offset) {
552
             offset = lp_build_int_to_float(coord_bld, offset);
553
             offset = lp_build_div(coord_bld, offset, length_f);
554
             coord = lp_build_add(coord_bld, coord, offset);
555
          }
556
          /* take fraction, unnormalize */
557
          coord = lp_build_fract_safe(coord_bld, coord);
558
          coord = lp_build_mul(coord_bld, coord, length_f);
559
          icoord = lp_build_itrunc(coord_bld, coord);
560
      }
561
      break;
562
 
563
   case PIPE_TEX_WRAP_CLAMP:
564
   case PIPE_TEX_WRAP_CLAMP_TO_EDGE:
565
      if (bld->static_sampler_state->normalized_coords) {
566
         /* scale coord to length */
567
         coord = lp_build_mul(coord_bld, coord, length_f);
568
      }
569
 
570
      /* floor */
571
      /* use itrunc instead since we clamp to 0 anyway */
572
      icoord = lp_build_itrunc(coord_bld, coord);
573
      if (offset) {
574
         icoord = lp_build_add(int_coord_bld, icoord, offset);
575
      }
576
 
577
      /* clamp to [0, length - 1]. */
578
      icoord = lp_build_clamp(int_coord_bld, icoord, int_coord_bld->zero,
579
                              length_minus_one);
580
      break;
581
 
582
   case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
583
      if (bld->static_sampler_state->normalized_coords) {
584
         /* scale coord to length */
585
         coord = lp_build_mul(coord_bld, coord, length_f);
586
      }
587
      /* no clamp necessary, border masking will handle this */
588
      icoord = lp_build_ifloor(coord_bld, coord);
589
      if (offset) {
590
         icoord = lp_build_add(int_coord_bld, icoord, offset);
591
      }
592
      break;
593
 
594
   case PIPE_TEX_WRAP_MIRROR_REPEAT:
595
      if (offset) {
596
         offset = lp_build_int_to_float(coord_bld, offset);
597
         offset = lp_build_div(coord_bld, offset, length_f);
598
         coord = lp_build_add(coord_bld, coord, offset);
599
      }
600
      /* compute mirror function */
601
      coord = lp_build_coord_mirror(bld, coord);
602
 
603
      /* scale coord to length */
604
      assert(bld->static_sampler_state->normalized_coords);
605
      coord = lp_build_mul(coord_bld, coord, length_f);
606
 
607
      /* itrunc == ifloor here */
608
      icoord = lp_build_itrunc(coord_bld, coord);
609
 
610
      /* clamp to [0, length - 1] */
611
      icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
612
      break;
613
 
614
   case PIPE_TEX_WRAP_MIRROR_CLAMP:
615
   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:
616
      if (bld->static_sampler_state->normalized_coords) {
617
         /* scale coord to length */
618
         coord = lp_build_mul(coord_bld, coord, length_f);
619
      }
620
      if (offset) {
621
         offset = lp_build_int_to_float(coord_bld, offset);
622
         coord = lp_build_add(coord_bld, coord, offset);
623
      }
624
      coord = lp_build_abs(coord_bld, coord);
625
 
626
      /* itrunc == ifloor here */
627
      icoord = lp_build_itrunc(coord_bld, coord);
628
 
629
      /* clamp to [0, length - 1] */
630
      icoord = lp_build_min(int_coord_bld, icoord, length_minus_one);
631
      break;
632
 
633
   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER:
634
      if (bld->static_sampler_state->normalized_coords) {
635
         /* scale coord to length */
636
         coord = lp_build_mul(coord_bld, coord, length_f);
637
      }
638
      if (offset) {
639
         offset = lp_build_int_to_float(coord_bld, offset);
640
         coord = lp_build_add(coord_bld, coord, offset);
641
      }
642
      coord = lp_build_abs(coord_bld, coord);
643
 
644
      /* itrunc == ifloor here */
645
      icoord = lp_build_itrunc(coord_bld, coord);
646
      break;
647
 
648
   default:
649
      assert(0);
650
      icoord = NULL;
651
   }
652
 
653
   return icoord;
654
}
655
 
656
 
657
/**
658
 * Do shadow test/comparison.
659
 * \param p shadow ref value
660
 * \param texel  the texel to compare against
661
 */
662
static LLVMValueRef
663
lp_build_sample_comparefunc(struct lp_build_sample_context *bld,
664
                            LLVMValueRef p,
665
                            LLVMValueRef texel)
666
{
667
   struct lp_build_context *texel_bld = &bld->texel_bld;
668
   LLVMValueRef res;
669
 
670
   if (0) {
671
      //lp_build_print_value(bld->gallivm, "shadow cmp coord", p);
672
      lp_build_print_value(bld->gallivm, "shadow cmp texel", texel);
673
   }
674
 
675
   /* result = (p FUNC texel) ? 1 : 0 */
676
   /*
677
    * honor d3d10 floating point rules here, which state that comparisons
678
    * are ordered except NOT_EQUAL which is unordered.
679
    */
680
   if (bld->static_sampler_state->compare_func != PIPE_FUNC_NOTEQUAL) {
681
      res = lp_build_cmp_ordered(texel_bld, bld->static_sampler_state->compare_func,
682
                                 p, texel);
683
   }
684
   else {
685
      res = lp_build_cmp(texel_bld, bld->static_sampler_state->compare_func,
686
                         p, texel);
687
   }
688
   return res;
689
}
690
 
691
 
692
/**
693
 * Generate code to sample a mipmap level with nearest filtering.
694
 * If sampling a cube texture, r = cube face in [0,5].
695
 */
696
static void
697
lp_build_sample_image_nearest(struct lp_build_sample_context *bld,
698
                              LLVMValueRef size,
699
                              LLVMValueRef row_stride_vec,
700
                              LLVMValueRef img_stride_vec,
701
                              LLVMValueRef data_ptr,
702
                              LLVMValueRef mipoffsets,
703
                              LLVMValueRef *coords,
704
                              const LLVMValueRef *offsets,
705
                              LLVMValueRef colors_out[4])
706
{
707
   const unsigned dims = bld->dims;
708
   LLVMValueRef width_vec;
709
   LLVMValueRef height_vec;
710
   LLVMValueRef depth_vec;
711
   LLVMValueRef flt_size;
712
   LLVMValueRef flt_width_vec;
713
   LLVMValueRef flt_height_vec;
714
   LLVMValueRef flt_depth_vec;
715
   LLVMValueRef x, y = NULL, z = NULL;
716
 
717
   lp_build_extract_image_sizes(bld,
718
                                &bld->int_size_bld,
719
                                bld->int_coord_type,
720
                                size,
721
                                &width_vec, &height_vec, &depth_vec);
722
 
723
   flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
724
 
725
   lp_build_extract_image_sizes(bld,
726
                                &bld->float_size_bld,
727
                                bld->coord_type,
728
                                flt_size,
729
                                &flt_width_vec, &flt_height_vec, &flt_depth_vec);
730
 
731
   /*
732
    * Compute integer texcoords.
733
    */
734
   x = lp_build_sample_wrap_nearest(bld, coords[0], width_vec,
735
                                    flt_width_vec, offsets[0],
736
                                    bld->static_texture_state->pot_width,
737
                                    bld->static_sampler_state->wrap_s);
738
   lp_build_name(x, "tex.x.wrapped");
739
 
740
   if (dims >= 2) {
741
      y = lp_build_sample_wrap_nearest(bld, coords[1], height_vec,
742
                                       flt_height_vec, offsets[1],
743
                                       bld->static_texture_state->pot_height,
744
                                       bld->static_sampler_state->wrap_t);
745
      lp_build_name(y, "tex.y.wrapped");
746
 
747
      if (dims == 3) {
748
         z = lp_build_sample_wrap_nearest(bld, coords[2], depth_vec,
749
                                          flt_depth_vec, offsets[2],
750
                                          bld->static_texture_state->pot_depth,
751
                                          bld->static_sampler_state->wrap_r);
752
         lp_build_name(z, "tex.z.wrapped");
753
      }
754
   }
755
   if (has_layer_coord(bld->static_texture_state->target)) {
756
      if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
757
         /* add cube layer to face */
758
         z = lp_build_add(&bld->int_coord_bld, coords[2], coords[3]);
759
      }
760
      else {
761
         z = coords[2];
762
      }
763
      lp_build_name(z, "tex.z.layer");
764
   }
765
 
766
   /*
767
    * Get texture colors.
768
    */
769
   lp_build_sample_texel_soa(bld,
770
                             width_vec, height_vec, depth_vec,
771
                             x, y, z,
772
                             row_stride_vec, img_stride_vec,
773
                             data_ptr, mipoffsets, colors_out);
774
 
775
   if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
776
      LLVMValueRef cmpval;
777
      cmpval = lp_build_sample_comparefunc(bld, coords[4], colors_out[0]);
778
      /* this is really just a AND 1.0, cmpval but llvm is clever enough */
779
      colors_out[0] = lp_build_select(&bld->texel_bld, cmpval,
780
                                      bld->texel_bld.one, bld->texel_bld.zero);
781
      colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
782
   }
783
 
784
}
785
 
786
 
787
/**
788
 * Like a lerp, but inputs are 0/~0 masks, so can simplify slightly.
789
 */
790
static LLVMValueRef
791
lp_build_masklerp(struct lp_build_context *bld,
792
                 LLVMValueRef weight,
793
                 LLVMValueRef mask0,
794
                 LLVMValueRef mask1)
795
{
796
   struct gallivm_state *gallivm = bld->gallivm;
797
   LLVMBuilderRef builder = gallivm->builder;
798
   LLVMValueRef weight2;
799
 
800
   weight2 = lp_build_sub(bld, bld->one, weight);
801
   weight = LLVMBuildBitCast(builder, weight,
802
                              lp_build_int_vec_type(gallivm, bld->type), "");
803
   weight2 = LLVMBuildBitCast(builder, weight2,
804
                              lp_build_int_vec_type(gallivm, bld->type), "");
805
   weight = LLVMBuildAnd(builder, weight, mask1, "");
806
   weight2 = LLVMBuildAnd(builder, weight2, mask0, "");
807
   weight = LLVMBuildBitCast(builder, weight, bld->vec_type, "");
808
   weight2 = LLVMBuildBitCast(builder, weight2, bld->vec_type, "");
809
   return lp_build_add(bld, weight, weight2);
810
}
811
 
812
/**
813
 * Like a 2d lerp, but inputs are 0/~0 masks, so can simplify slightly.
814
 */
815
static LLVMValueRef
816
lp_build_masklerp2d(struct lp_build_context *bld,
817
                    LLVMValueRef weight0,
818
                    LLVMValueRef weight1,
819
                    LLVMValueRef mask00,
820
                    LLVMValueRef mask01,
821
                    LLVMValueRef mask10,
822
                    LLVMValueRef mask11)
823
{
824
   LLVMValueRef val0 = lp_build_masklerp(bld, weight0, mask00, mask01);
825
   LLVMValueRef val1 = lp_build_masklerp(bld, weight0, mask10, mask11);
826
   return lp_build_lerp(bld, weight1, val0, val1, 0);
827
}
828
 
829
/*
830
 * this is a bit excessive code for something OpenGL just recommends
831
 * but does not require.
832
 */
833
#define ACCURATE_CUBE_CORNERS 1
834
 
835
/**
836
 * Generate code to sample a mipmap level with linear filtering.
837
 * If sampling a cube texture, r = cube face in [0,5].
838
 * If linear_mask is present, only pixels having their mask set
839
 * will receive linear filtering, the rest will use nearest.
840
 */
841
static void
842
lp_build_sample_image_linear(struct lp_build_sample_context *bld,
843
                             boolean is_gather,
844
                             LLVMValueRef size,
845
                             LLVMValueRef linear_mask,
846
                             LLVMValueRef row_stride_vec,
847
                             LLVMValueRef img_stride_vec,
848
                             LLVMValueRef data_ptr,
849
                             LLVMValueRef mipoffsets,
850
                             LLVMValueRef *coords,
851
                             const LLVMValueRef *offsets,
852
                             LLVMValueRef colors_out[4])
853
{
854
   LLVMBuilderRef builder = bld->gallivm->builder;
855
   struct lp_build_context *ivec_bld = &bld->int_coord_bld;
856
   struct lp_build_context *coord_bld = &bld->coord_bld;
857
   struct lp_build_context *texel_bld = &bld->texel_bld;
858
   const unsigned dims = bld->dims;
859
   LLVMValueRef width_vec;
860
   LLVMValueRef height_vec;
861
   LLVMValueRef depth_vec;
862
   LLVMValueRef flt_size;
863
   LLVMValueRef flt_width_vec;
864
   LLVMValueRef flt_height_vec;
865
   LLVMValueRef flt_depth_vec;
866
   LLVMValueRef fall_off[4], have_corners;
867
   LLVMValueRef z1 = NULL;
868
   LLVMValueRef z00 = NULL, z01 = NULL, z10 = NULL, z11 = NULL;
869
   LLVMValueRef x00 = NULL, x01 = NULL, x10 = NULL, x11 = NULL;
870
   LLVMValueRef y00 = NULL, y01 = NULL, y10 = NULL, y11 = NULL;
871
   LLVMValueRef s_fpart, t_fpart = NULL, r_fpart = NULL;
872
   LLVMValueRef xs[4], ys[4], zs[4];
873
   LLVMValueRef neighbors[2][2][4];
874
   int chan, texel_index;
875
   boolean seamless_cube_filter, accurate_cube_corners;
876
 
877
   seamless_cube_filter = (bld->static_texture_state->target == PIPE_TEXTURE_CUBE ||
878
                           bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
879
                          bld->static_sampler_state->seamless_cube_map;
880
   /*
881
    * XXX I don't know how this is really supposed to work with gather. From GL
882
    * spec wording (not gather specific) it sounds like the 4th missing texel
883
    * should be an average of the other 3, hence for gather could return this.
884
    * This is however NOT how the code here works, which just fixes up the
885
    * weights used for filtering instead. And of course for gather there is
886
    * no filter to tweak...
887
    */
888
   accurate_cube_corners = ACCURATE_CUBE_CORNERS && seamless_cube_filter &&
889
                           !is_gather;
890
 
891
   lp_build_extract_image_sizes(bld,
892
                                &bld->int_size_bld,
893
                                bld->int_coord_type,
894
                                size,
895
                                &width_vec, &height_vec, &depth_vec);
896
 
897
   flt_size = lp_build_int_to_float(&bld->float_size_bld, size);
898
 
899
   lp_build_extract_image_sizes(bld,
900
                                &bld->float_size_bld,
901
                                bld->coord_type,
902
                                flt_size,
903
                                &flt_width_vec, &flt_height_vec, &flt_depth_vec);
904
 
905
   /*
906
    * Compute integer texcoords.
907
    */
908
 
909
   if (!seamless_cube_filter) {
910
      lp_build_sample_wrap_linear(bld, coords[0], width_vec,
911
                                  flt_width_vec, offsets[0],
912
                                  bld->static_texture_state->pot_width,
913
                                  bld->static_sampler_state->wrap_s,
914
                                  &x00, &x01, &s_fpart);
915
      lp_build_name(x00, "tex.x0.wrapped");
916
      lp_build_name(x01, "tex.x1.wrapped");
917
      x10 = x00;
918
      x11 = x01;
919
 
920
      if (dims >= 2) {
921
         lp_build_sample_wrap_linear(bld, coords[1], height_vec,
922
                                     flt_height_vec, offsets[1],
923
                                     bld->static_texture_state->pot_height,
924
                                     bld->static_sampler_state->wrap_t,
925
                                     &y00, &y10, &t_fpart);
926
         lp_build_name(y00, "tex.y0.wrapped");
927
         lp_build_name(y10, "tex.y1.wrapped");
928
         y01 = y00;
929
         y11 = y10;
930
 
931
         if (dims == 3) {
932
            lp_build_sample_wrap_linear(bld, coords[2], depth_vec,
933
                                        flt_depth_vec, offsets[2],
934
                                        bld->static_texture_state->pot_depth,
935
                                        bld->static_sampler_state->wrap_r,
936
                                        &z00, &z1, &r_fpart);
937
            z01 = z10 = z11 = z00;
938
            lp_build_name(z00, "tex.z0.wrapped");
939
            lp_build_name(z1, "tex.z1.wrapped");
940
         }
941
      }
942
      if (has_layer_coord(bld->static_texture_state->target)) {
943
         if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
944
            /* add cube layer to face */
945
            z00 = z01 = z10 = z11 = z1 =
946
               lp_build_add(&bld->int_coord_bld, coords[2], coords[3]);
947
         }
948
         else {
949
            z00 = z01 = z10 = z11 = z1 = coords[2];  /* cube face or layer */
950
         }
951
         lp_build_name(z00, "tex.z0.layer");
952
         lp_build_name(z1, "tex.z1.layer");
953
      }
954
   }
955
   else {
956
      struct lp_build_if_state edge_if;
957
      LLVMTypeRef int1t;
958
      LLVMValueRef new_faces[4], new_xcoords[4][2], new_ycoords[4][2];
959
      LLVMValueRef coord, have_edge, have_corner;
960
      LLVMValueRef fall_off_ym_notxm, fall_off_ym_notxp, fall_off_x, fall_off_y;
961
      LLVMValueRef fall_off_yp_notxm, fall_off_yp_notxp;
962
      LLVMValueRef x0, x1, y0, y1, y0_clamped, y1_clamped;
963
      LLVMValueRef face = coords[2];
964
      LLVMValueRef half = lp_build_const_vec(bld->gallivm, coord_bld->type, 0.5f);
965
      LLVMValueRef length_minus_one = lp_build_sub(ivec_bld, width_vec, ivec_bld->one);
966
      /* XXX drop height calcs. Could (should) do this without seamless filtering too */
967
      height_vec = width_vec;
968
      flt_height_vec = flt_width_vec;
969
 
970
      /* XXX the overflow logic is actually sort of duplicated with trilinear,
971
       * since an overflow in one mip should also have a corresponding overflow
972
       * in another.
973
       */
974
      /* should always have normalized coords, and offsets are undefined */
975
      assert(bld->static_sampler_state->normalized_coords);
976
      coord = lp_build_mul(coord_bld, coords[0], flt_width_vec);
977
      /* instead of clamp, build mask if overflowed */
978
      coord = lp_build_sub(coord_bld, coord, half);
979
      /* convert to int, compute lerp weight */
980
      /* not ideal with AVX (and no AVX2) */
981
      lp_build_ifloor_fract(coord_bld, coord, &x0, &s_fpart);
982
      x1 = lp_build_add(ivec_bld, x0, ivec_bld->one);
983
      coord = lp_build_mul(coord_bld, coords[1], flt_height_vec);
984
      coord = lp_build_sub(coord_bld, coord, half);
985
      lp_build_ifloor_fract(coord_bld, coord, &y0, &t_fpart);
986
      y1 = lp_build_add(ivec_bld, y0, ivec_bld->one);
987
 
988
      fall_off[0] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, x0, ivec_bld->zero);
989
      fall_off[1] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, x1, length_minus_one);
990
      fall_off[2] = lp_build_cmp(ivec_bld, PIPE_FUNC_LESS, y0, ivec_bld->zero);
991
      fall_off[3] = lp_build_cmp(ivec_bld, PIPE_FUNC_GREATER, y1, length_minus_one);
992
 
993
      fall_off_x = lp_build_or(ivec_bld, fall_off[0], fall_off[1]);
994
      fall_off_y = lp_build_or(ivec_bld, fall_off[2], fall_off[3]);
995
      have_edge = lp_build_or(ivec_bld, fall_off_x, fall_off_y);
996
      have_edge = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_edge);
997
 
998
      /* needed for accurate corner filtering branch later, rely on 0 init */
999
      int1t = LLVMInt1TypeInContext(bld->gallivm->context);
1000
      have_corners = lp_build_alloca(bld->gallivm, int1t, "have_corner");
1001
 
1002
      for (texel_index = 0; texel_index < 4; texel_index++) {
1003
         xs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "xs");
1004
         ys[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "ys");
1005
         zs[texel_index] = lp_build_alloca(bld->gallivm, ivec_bld->vec_type, "zs");
1006
      }
1007
 
1008
      lp_build_if(&edge_if, bld->gallivm, have_edge);
1009
 
1010
      have_corner = lp_build_and(ivec_bld, fall_off_x, fall_off_y);
1011
      have_corner = lp_build_any_true_range(ivec_bld, ivec_bld->type.length, have_corner);
1012
      LLVMBuildStore(builder, have_corner, have_corners);
1013
 
1014
      /*
1015
       * Need to feed clamped values here for cheap corner handling,
1016
       * but only for y coord (as when falling off both edges we only
1017
       * fall off the x one) - this should be sufficient.
1018
       */
1019
      y0_clamped = lp_build_max(ivec_bld, y0, ivec_bld->zero);
1020
      y1_clamped = lp_build_min(ivec_bld, y1, length_minus_one);
1021
 
1022
      /*
1023
       * Get all possible new coords.
1024
       */
1025
      lp_build_cube_new_coords(ivec_bld, face,
1026
                               x0, x1, y0_clamped, y1_clamped,
1027
                               length_minus_one,
1028
                               new_faces, new_xcoords, new_ycoords);
1029
 
1030
      /* handle fall off x-, x+ direction */
1031
      /* determine new coords, face (not both fall_off vars can be true at same time) */
1032
      x00 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][0], x0);
1033
      y00 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][0], y0_clamped);
1034
      x10 = lp_build_select(ivec_bld, fall_off[0], new_xcoords[0][1], x0);
1035
      y10 = lp_build_select(ivec_bld, fall_off[0], new_ycoords[0][1], y1_clamped);
1036
      x01 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][0], x1);
1037
      y01 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][0], y0_clamped);
1038
      x11 = lp_build_select(ivec_bld, fall_off[1], new_xcoords[1][1], x1);
1039
      y11 = lp_build_select(ivec_bld, fall_off[1], new_ycoords[1][1], y1_clamped);
1040
 
1041
      z00 = z10 = lp_build_select(ivec_bld, fall_off[0], new_faces[0], face);
1042
      z01 = z11 = lp_build_select(ivec_bld, fall_off[1], new_faces[1], face);
1043
 
1044
      /* handle fall off y-, y+ direction */
1045
      /*
1046
       * Cheap corner logic: just hack up things so a texel doesn't fall
1047
       * off both sides (which means filter weights will be wrong but we'll only
1048
       * use valid texels in the filter).
1049
       * This means however (y) coords must additionally be clamped (see above).
1050
       * This corner handling should be fully OpenGL (but not d3d10) compliant.
1051
       */
1052
      fall_off_ym_notxm = lp_build_andnot(ivec_bld, fall_off[2], fall_off[0]);
1053
      fall_off_ym_notxp = lp_build_andnot(ivec_bld, fall_off[2], fall_off[1]);
1054
      fall_off_yp_notxm = lp_build_andnot(ivec_bld, fall_off[3], fall_off[0]);
1055
      fall_off_yp_notxp = lp_build_andnot(ivec_bld, fall_off[3], fall_off[1]);
1056
 
1057
      x00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_xcoords[2][0], x00);
1058
      y00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_ycoords[2][0], y00);
1059
      x01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_xcoords[2][1], x01);
1060
      y01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_ycoords[2][1], y01);
1061
      x10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_xcoords[3][0], x10);
1062
      y10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_ycoords[3][0], y10);
1063
      x11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_xcoords[3][1], x11);
1064
      y11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_ycoords[3][1], y11);
1065
 
1066
      z00 = lp_build_select(ivec_bld, fall_off_ym_notxm, new_faces[2], z00);
1067
      z01 = lp_build_select(ivec_bld, fall_off_ym_notxp, new_faces[2], z01);
1068
      z10 = lp_build_select(ivec_bld, fall_off_yp_notxm, new_faces[3], z10);
1069
      z11 = lp_build_select(ivec_bld, fall_off_yp_notxp, new_faces[3], z11);
1070
 
1071
      if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
1072
         /* now can add cube layer to face (per sample) */
1073
         z00 = lp_build_add(ivec_bld, z00, coords[3]);
1074
         z01 = lp_build_add(ivec_bld, z01, coords[3]);
1075
         z10 = lp_build_add(ivec_bld, z10, coords[3]);
1076
         z11 = lp_build_add(ivec_bld, z11, coords[3]);
1077
      }
1078
 
1079
      LLVMBuildStore(builder, x00, xs[0]);
1080
      LLVMBuildStore(builder, x01, xs[1]);
1081
      LLVMBuildStore(builder, x10, xs[2]);
1082
      LLVMBuildStore(builder, x11, xs[3]);
1083
      LLVMBuildStore(builder, y00, ys[0]);
1084
      LLVMBuildStore(builder, y01, ys[1]);
1085
      LLVMBuildStore(builder, y10, ys[2]);
1086
      LLVMBuildStore(builder, y11, ys[3]);
1087
      LLVMBuildStore(builder, z00, zs[0]);
1088
      LLVMBuildStore(builder, z01, zs[1]);
1089
      LLVMBuildStore(builder, z10, zs[2]);
1090
      LLVMBuildStore(builder, z11, zs[3]);
1091
 
1092
      lp_build_else(&edge_if);
1093
 
1094
      LLVMBuildStore(builder, x0, xs[0]);
1095
      LLVMBuildStore(builder, x1, xs[1]);
1096
      LLVMBuildStore(builder, x0, xs[2]);
1097
      LLVMBuildStore(builder, x1, xs[3]);
1098
      LLVMBuildStore(builder, y0, ys[0]);
1099
      LLVMBuildStore(builder, y0, ys[1]);
1100
      LLVMBuildStore(builder, y1, ys[2]);
1101
      LLVMBuildStore(builder, y1, ys[3]);
1102
      if (bld->static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
1103
         LLVMValueRef cube_layer = lp_build_add(ivec_bld, face, coords[3]);
1104
         LLVMBuildStore(builder, cube_layer, zs[0]);
1105
         LLVMBuildStore(builder, cube_layer, zs[1]);
1106
         LLVMBuildStore(builder, cube_layer, zs[2]);
1107
         LLVMBuildStore(builder, cube_layer, zs[3]);
1108
      }
1109
      else {
1110
         LLVMBuildStore(builder, face, zs[0]);
1111
         LLVMBuildStore(builder, face, zs[1]);
1112
         LLVMBuildStore(builder, face, zs[2]);
1113
         LLVMBuildStore(builder, face, zs[3]);
1114
      }
1115
 
1116
      lp_build_endif(&edge_if);
1117
 
1118
      x00 = LLVMBuildLoad(builder, xs[0], "");
1119
      x01 = LLVMBuildLoad(builder, xs[1], "");
1120
      x10 = LLVMBuildLoad(builder, xs[2], "");
1121
      x11 = LLVMBuildLoad(builder, xs[3], "");
1122
      y00 = LLVMBuildLoad(builder, ys[0], "");
1123
      y01 = LLVMBuildLoad(builder, ys[1], "");
1124
      y10 = LLVMBuildLoad(builder, ys[2], "");
1125
      y11 = LLVMBuildLoad(builder, ys[3], "");
1126
      z00 = LLVMBuildLoad(builder, zs[0], "");
1127
      z01 = LLVMBuildLoad(builder, zs[1], "");
1128
      z10 = LLVMBuildLoad(builder, zs[2], "");
1129
      z11 = LLVMBuildLoad(builder, zs[3], "");
1130
   }
1131
 
1132
   if (linear_mask) {
1133
      /*
1134
       * Whack filter weights into place. Whatever texel had more weight is
1135
       * the one which should have been selected by nearest filtering hence
1136
       * just use 100% weight for it.
1137
       */
1138
      struct lp_build_context *c_bld = &bld->coord_bld;
1139
      LLVMValueRef w1_mask, w1_weight;
1140
      LLVMValueRef half = lp_build_const_vec(bld->gallivm, c_bld->type, 0.5f);
1141
 
1142
      w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, s_fpart, half);
1143
      /* this select is really just a "and" */
1144
      w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1145
      s_fpart = lp_build_select(c_bld, linear_mask, s_fpart, w1_weight);
1146
      if (dims >= 2) {
1147
         w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, t_fpart, half);
1148
         w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1149
         t_fpart = lp_build_select(c_bld, linear_mask, t_fpart, w1_weight);
1150
         if (dims == 3) {
1151
            w1_mask = lp_build_cmp(c_bld, PIPE_FUNC_GREATER, r_fpart, half);
1152
            w1_weight = lp_build_select(c_bld, w1_mask, c_bld->one, c_bld->zero);
1153
            r_fpart = lp_build_select(c_bld, linear_mask, r_fpart, w1_weight);
1154
         }
1155
      }
1156
   }
1157
 
1158
   /*
1159
    * Get texture colors.
1160
    */
1161
   /* get x0/x1 texels */
1162
   lp_build_sample_texel_soa(bld,
1163
                             width_vec, height_vec, depth_vec,
1164
                             x00, y00, z00,
1165
                             row_stride_vec, img_stride_vec,
1166
                             data_ptr, mipoffsets, neighbors[0][0]);
1167
   lp_build_sample_texel_soa(bld,
1168
                             width_vec, height_vec, depth_vec,
1169
                             x01, y01, z01,
1170
                             row_stride_vec, img_stride_vec,
1171
                             data_ptr, mipoffsets, neighbors[0][1]);
1172
 
1173
   if (dims == 1) {
1174
      assert(!is_gather);
1175
      if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1176
         /* Interpolate two samples from 1D image to produce one color */
1177
         for (chan = 0; chan < 4; chan++) {
1178
            colors_out[chan] = lp_build_lerp(texel_bld, s_fpart,
1179
                                             neighbors[0][0][chan],
1180
                                             neighbors[0][1][chan],
1181
                                             0);
1182
         }
1183
      }
1184
      else {
1185
         LLVMValueRef cmpval0, cmpval1;
1186
         cmpval0 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1187
         cmpval1 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1188
         /* simplified lerp, AND mask with weight and add */
1189
         colors_out[0] = lp_build_masklerp(texel_bld, s_fpart,
1190
                                           cmpval0, cmpval1);
1191
         colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
1192
      }
1193
   }
1194
   else {
1195
      /* 2D/3D texture */
1196
      struct lp_build_if_state corner_if;
1197
      LLVMValueRef colors0[4], colorss[4];
1198
 
1199
      /* get x0/x1 texels at y1 */
1200
      lp_build_sample_texel_soa(bld,
1201
                                width_vec, height_vec, depth_vec,
1202
                                x10, y10, z10,
1203
                                row_stride_vec, img_stride_vec,
1204
                                data_ptr, mipoffsets, neighbors[1][0]);
1205
      lp_build_sample_texel_soa(bld,
1206
                                width_vec, height_vec, depth_vec,
1207
                                x11, y11, z11,
1208
                                row_stride_vec, img_stride_vec,
1209
                                data_ptr, mipoffsets, neighbors[1][1]);
1210
 
1211
      /*
1212
       * To avoid having to duplicate linear_mask / fetch code use
1213
       * another branch (with corner condition though edge would work
1214
       * as well) here.
1215
       */
1216
      if (accurate_cube_corners) {
1217
         LLVMValueRef w00, w01, w10, w11, wx0, wy0;
1218
         LLVMValueRef c_weight, c00, c01, c10, c11;
1219
         LLVMValueRef have_corner, one_third, tmp;
1220
 
1221
         colorss[0] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
1222
         colorss[1] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
1223
         colorss[2] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
1224
         colorss[3] = lp_build_alloca(bld->gallivm, coord_bld->vec_type, "cs");
1225
 
1226
         have_corner = LLVMBuildLoad(builder, have_corners, "");
1227
 
1228
         lp_build_if(&corner_if, bld->gallivm, have_corner);
1229
 
1230
         /*
1231
          * we can't use standard 2d lerp as we need per-element weight
1232
          * in case of corners, so just calculate bilinear result as
1233
          * w00*s00 + w01*s01 + w10*s10 + w11*s11.
1234
          * (This is actually less work than using 2d lerp, 7 vs. 9 instructions,
1235
          * however calculating the weights needs another 6, so actually probably
1236
          * not slower than 2d lerp only for 4 channels as weights only need
1237
          * to be calculated once - of course fixing the weights has additional cost.)
1238
          */
1239
         wx0 = lp_build_sub(coord_bld, coord_bld->one, s_fpart);
1240
         wy0 = lp_build_sub(coord_bld, coord_bld->one, t_fpart);
1241
         w00 = lp_build_mul(coord_bld, wx0, wy0);
1242
         w01 = lp_build_mul(coord_bld, s_fpart, wy0);
1243
         w10 = lp_build_mul(coord_bld, wx0, t_fpart);
1244
         w11 = lp_build_mul(coord_bld, s_fpart, t_fpart);
1245
 
1246
         /* find corner weight */
1247
         c00 = lp_build_and(ivec_bld, fall_off[0], fall_off[2]);
1248
         c_weight = lp_build_select(coord_bld, c00, w00, coord_bld->zero);
1249
         c01 = lp_build_and(ivec_bld, fall_off[1], fall_off[2]);
1250
         c_weight = lp_build_select(coord_bld, c01, w01, c_weight);
1251
         c10 = lp_build_and(ivec_bld, fall_off[0], fall_off[3]);
1252
         c_weight = lp_build_select(coord_bld, c10, w10, c_weight);
1253
         c11 = lp_build_and(ivec_bld, fall_off[1], fall_off[3]);
1254
         c_weight = lp_build_select(coord_bld, c11, w11, c_weight);
1255
 
1256
         /*
1257
          * add 1/3 of the corner weight to each of the 3 other samples
1258
          * and null out corner weight
1259
          */
1260
         one_third = lp_build_const_vec(bld->gallivm, coord_bld->type, 1.0f/3.0f);
1261
         c_weight = lp_build_mul(coord_bld, c_weight, one_third);
1262
         w00 = lp_build_add(coord_bld, w00, c_weight);
1263
         c00 = LLVMBuildBitCast(builder, c00, coord_bld->vec_type, "");
1264
         w00 = lp_build_andnot(coord_bld, w00, c00);
1265
         w01 = lp_build_add(coord_bld, w01, c_weight);
1266
         c01 = LLVMBuildBitCast(builder, c01, coord_bld->vec_type, "");
1267
         w01 = lp_build_andnot(coord_bld, w01, c01);
1268
         w10 = lp_build_add(coord_bld, w10, c_weight);
1269
         c10 = LLVMBuildBitCast(builder, c10, coord_bld->vec_type, "");
1270
         w10 = lp_build_andnot(coord_bld, w10, c10);
1271
         w11 = lp_build_add(coord_bld, w11, c_weight);
1272
         c11 = LLVMBuildBitCast(builder, c11, coord_bld->vec_type, "");
1273
         w11 = lp_build_andnot(coord_bld, w11, c11);
1274
 
1275
         if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1276
            for (chan = 0; chan < 4; chan++) {
1277
               colors0[chan] = lp_build_mul(coord_bld, w00, neighbors[0][0][chan]);
1278
               tmp = lp_build_mul(coord_bld, w01, neighbors[0][1][chan]);
1279
               colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1280
               tmp = lp_build_mul(coord_bld, w10, neighbors[1][0][chan]);
1281
               colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1282
               tmp = lp_build_mul(coord_bld, w11, neighbors[1][1][chan]);
1283
               colors0[chan] = lp_build_add(coord_bld, tmp, colors0[chan]);
1284
            }
1285
         }
1286
         else {
1287
            LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1288
            cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1289
            cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1290
            cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1291
            cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1292
            /* inputs to interpolation are just masks so just add masked weights together */
1293
            cmpval00 = LLVMBuildBitCast(builder, cmpval00, coord_bld->vec_type, "");
1294
            cmpval01 = LLVMBuildBitCast(builder, cmpval01, coord_bld->vec_type, "");
1295
            cmpval10 = LLVMBuildBitCast(builder, cmpval10, coord_bld->vec_type, "");
1296
            cmpval11 = LLVMBuildBitCast(builder, cmpval11, coord_bld->vec_type, "");
1297
            colors0[0] = lp_build_and(coord_bld, w00, cmpval00);
1298
            tmp = lp_build_and(coord_bld, w01, cmpval01);
1299
            colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1300
            tmp = lp_build_and(coord_bld, w10, cmpval10);
1301
            colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1302
            tmp = lp_build_and(coord_bld, w11, cmpval11);
1303
            colors0[0] = lp_build_add(coord_bld, tmp, colors0[0]);
1304
            colors0[1] = colors0[2] = colors0[3] = colors0[0];
1305
         }
1306
 
1307
         LLVMBuildStore(builder, colors0[0], colorss[0]);
1308
         LLVMBuildStore(builder, colors0[1], colorss[1]);
1309
         LLVMBuildStore(builder, colors0[2], colorss[2]);
1310
         LLVMBuildStore(builder, colors0[3], colorss[3]);
1311
 
1312
         lp_build_else(&corner_if);
1313
      }
1314
 
1315
      if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1316
         if (is_gather) {
1317
            /*
1318
             * Just assign the red channel (no component selection yet).
1319
             * This is a bit hackish, we usually do the swizzle at the
1320
             * end of sampling (much less values to swizzle), but this
1321
             * obviously cannot work when using gather.
1322
             */
1323
            unsigned chan_swiz = bld->static_texture_state->swizzle_r;
1324
            colors0[0] = lp_build_swizzle_soa_channel(texel_bld,
1325
                                                      neighbors[1][0],
1326
                                                      chan_swiz);
1327
            colors0[1] = lp_build_swizzle_soa_channel(texel_bld,
1328
                                                      neighbors[1][1],
1329
                                                      chan_swiz);
1330
            colors0[2] = lp_build_swizzle_soa_channel(texel_bld,
1331
                                                      neighbors[0][1],
1332
                                                      chan_swiz);
1333
            colors0[3] = lp_build_swizzle_soa_channel(texel_bld,
1334
                                                      neighbors[0][0],
1335
                                                      chan_swiz);
1336
         }
1337
         else {
1338
            /* Bilinear interpolate the four samples from the 2D image / 3D slice */
1339
            for (chan = 0; chan < 4; chan++) {
1340
               colors0[chan] = lp_build_lerp_2d(texel_bld,
1341
                                                s_fpart, t_fpart,
1342
                                                neighbors[0][0][chan],
1343
                                                neighbors[0][1][chan],
1344
                                                neighbors[1][0][chan],
1345
                                                neighbors[1][1][chan],
1346
                                                0);
1347
            }
1348
         }
1349
      }
1350
      else {
1351
         LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1352
         cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1353
         cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1354
         cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1355
         cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1356
 
1357
         if (is_gather) {
1358
            /* more hacks for swizzling, should be X, ONE or ZERO... */
1359
            unsigned chan_swiz = bld->static_texture_state->swizzle_r;
1360
            if (chan_swiz <= PIPE_SWIZZLE_ALPHA) {
1361
               colors0[0] = lp_build_select(texel_bld, cmpval10,
1362
                                            texel_bld->one, texel_bld->zero);
1363
               colors0[1] = lp_build_select(texel_bld, cmpval11,
1364
                                            texel_bld->one, texel_bld->zero);
1365
               colors0[2] = lp_build_select(texel_bld, cmpval01,
1366
                                            texel_bld->one, texel_bld->zero);
1367
               colors0[3] = lp_build_select(texel_bld, cmpval00,
1368
                                            texel_bld->one, texel_bld->zero);
1369
            }
1370
            else if (chan_swiz == PIPE_SWIZZLE_ZERO) {
1371
               colors0[0] = colors0[1] = colors0[2] = colors0[3] =
1372
                            texel_bld->zero;
1373
            }
1374
            else {
1375
               colors0[0] = colors0[1] = colors0[2] = colors0[3] =
1376
                            texel_bld->one;
1377
            }
1378
         }
1379
         else {
1380
            colors0[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart,
1381
                                             cmpval00, cmpval01, cmpval10, cmpval11);
1382
            colors0[1] = colors0[2] = colors0[3] = colors0[0];
1383
         }
1384
      }
1385
 
1386
      if (accurate_cube_corners) {
1387
         LLVMBuildStore(builder, colors0[0], colorss[0]);
1388
         LLVMBuildStore(builder, colors0[1], colorss[1]);
1389
         LLVMBuildStore(builder, colors0[2], colorss[2]);
1390
         LLVMBuildStore(builder, colors0[3], colorss[3]);
1391
 
1392
         lp_build_endif(&corner_if);
1393
 
1394
         colors0[0] = LLVMBuildLoad(builder, colorss[0], "");
1395
         colors0[1] = LLVMBuildLoad(builder, colorss[1], "");
1396
         colors0[2] = LLVMBuildLoad(builder, colorss[2], "");
1397
         colors0[3] = LLVMBuildLoad(builder, colorss[3], "");
1398
      }
1399
 
1400
      if (dims == 3) {
1401
         LLVMValueRef neighbors1[2][2][4];
1402
         LLVMValueRef colors1[4];
1403
 
1404
         assert(!is_gather);
1405
 
1406
         /* get x0/x1/y0/y1 texels at z1 */
1407
         lp_build_sample_texel_soa(bld,
1408
                                   width_vec, height_vec, depth_vec,
1409
                                   x00, y00, z1,
1410
                                   row_stride_vec, img_stride_vec,
1411
                                   data_ptr, mipoffsets, neighbors1[0][0]);
1412
         lp_build_sample_texel_soa(bld,
1413
                                   width_vec, height_vec, depth_vec,
1414
                                   x01, y01, z1,
1415
                                   row_stride_vec, img_stride_vec,
1416
                                   data_ptr, mipoffsets, neighbors1[0][1]);
1417
         lp_build_sample_texel_soa(bld,
1418
                                   width_vec, height_vec, depth_vec,
1419
                                   x10, y10, z1,
1420
                                   row_stride_vec, img_stride_vec,
1421
                                   data_ptr, mipoffsets, neighbors1[1][0]);
1422
         lp_build_sample_texel_soa(bld,
1423
                                   width_vec, height_vec, depth_vec,
1424
                                   x11, y11, z1,
1425
                                   row_stride_vec, img_stride_vec,
1426
                                   data_ptr, mipoffsets, neighbors1[1][1]);
1427
 
1428
         if (bld->static_sampler_state->compare_mode == PIPE_TEX_COMPARE_NONE) {
1429
            /* Bilinear interpolate the four samples from the second Z slice */
1430
            for (chan = 0; chan < 4; chan++) {
1431
               colors1[chan] = lp_build_lerp_2d(texel_bld,
1432
                                                s_fpart, t_fpart,
1433
                                                neighbors1[0][0][chan],
1434
                                                neighbors1[0][1][chan],
1435
                                                neighbors1[1][0][chan],
1436
                                                neighbors1[1][1][chan],
1437
                                                0);
1438
            }
1439
            /* Linearly interpolate the two samples from the two 3D slices */
1440
            for (chan = 0; chan < 4; chan++) {
1441
               colors_out[chan] = lp_build_lerp(texel_bld,
1442
                                                r_fpart,
1443
                                                colors0[chan], colors1[chan],
1444
                                                0);
1445
            }
1446
         }
1447
         else {
1448
            LLVMValueRef cmpval00, cmpval01, cmpval10, cmpval11;
1449
            cmpval00 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][0][0]);
1450
            cmpval01 = lp_build_sample_comparefunc(bld, coords[4], neighbors[0][1][0]);
1451
            cmpval10 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][0][0]);
1452
            cmpval11 = lp_build_sample_comparefunc(bld, coords[4], neighbors[1][1][0]);
1453
            colors1[0] = lp_build_masklerp2d(texel_bld, s_fpart, t_fpart,
1454
                                             cmpval00, cmpval01, cmpval10, cmpval11);
1455
            /* Linearly interpolate the two samples from the two 3D slices */
1456
            colors_out[0] = lp_build_lerp(texel_bld,
1457
                                          r_fpart,
1458
                                          colors0[0], colors1[0],
1459
                                          0);
1460
            colors_out[1] = colors_out[2] = colors_out[3] = colors_out[0];
1461
         }
1462
      }
1463
      else {
1464
         /* 2D tex */
1465
         for (chan = 0; chan < 4; chan++) {
1466
            colors_out[chan] = colors0[chan];
1467
         }
1468
      }
1469
   }
1470
}
1471
 
1472
 
1473
/**
1474
 * Sample the texture/mipmap using given image filter and mip filter.
1475
 * ilevel0 and ilevel1 indicate the two mipmap levels to sample
1476
 * from (vectors or scalars).
1477
 * If we're using nearest miplevel sampling the '1' values will be null/unused.
1478
 */
1479
static void
1480
lp_build_sample_mipmap(struct lp_build_sample_context *bld,
1481
                       unsigned img_filter,
1482
                       unsigned mip_filter,
1483
                       boolean is_gather,
1484
                       LLVMValueRef *coords,
1485
                       const LLVMValueRef *offsets,
1486
                       LLVMValueRef ilevel0,
1487
                       LLVMValueRef ilevel1,
1488
                       LLVMValueRef lod_fpart,
1489
                       LLVMValueRef *colors_out)
1490
{
1491
   LLVMBuilderRef builder = bld->gallivm->builder;
1492
   LLVMValueRef size0 = NULL;
1493
   LLVMValueRef size1 = NULL;
1494
   LLVMValueRef row_stride0_vec = NULL;
1495
   LLVMValueRef row_stride1_vec = NULL;
1496
   LLVMValueRef img_stride0_vec = NULL;
1497
   LLVMValueRef img_stride1_vec = NULL;
1498
   LLVMValueRef data_ptr0 = NULL;
1499
   LLVMValueRef data_ptr1 = NULL;
1500
   LLVMValueRef mipoff0 = NULL;
1501
   LLVMValueRef mipoff1 = NULL;
1502
   LLVMValueRef colors0[4], colors1[4];
1503
   unsigned chan;
1504
 
1505
   /* sample the first mipmap level */
1506
   lp_build_mipmap_level_sizes(bld, ilevel0,
1507
                               &size0,
1508
                               &row_stride0_vec, &img_stride0_vec);
1509
   if (bld->num_mips == 1) {
1510
      data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1511
   }
1512
   else {
1513
      /* This path should work for num_lods 1 too but slightly less efficient */
1514
      data_ptr0 = bld->base_ptr;
1515
      mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
1516
   }
1517
   if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1518
      lp_build_sample_image_nearest(bld, size0,
1519
                                    row_stride0_vec, img_stride0_vec,
1520
                                    data_ptr0, mipoff0, coords, offsets,
1521
                                    colors0);
1522
   }
1523
   else {
1524
      assert(img_filter == PIPE_TEX_FILTER_LINEAR);
1525
      lp_build_sample_image_linear(bld, is_gather, size0, NULL,
1526
                                   row_stride0_vec, img_stride0_vec,
1527
                                   data_ptr0, mipoff0, coords, offsets,
1528
                                   colors0);
1529
   }
1530
 
1531
   /* Store the first level's colors in the output variables */
1532
   for (chan = 0; chan < 4; chan++) {
1533
       LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1534
   }
1535
 
1536
   if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1537
      struct lp_build_if_state if_ctx;
1538
      LLVMValueRef need_lerp;
1539
 
1540
      /* need_lerp = lod_fpart > 0 */
1541
      if (bld->num_lods == 1) {
1542
         need_lerp = LLVMBuildFCmp(builder, LLVMRealUGT,
1543
                                   lod_fpart, bld->lodf_bld.zero,
1544
                                   "need_lerp");
1545
      }
1546
      else {
1547
         /*
1548
          * We'll do mip filtering if any of the quads (or individual
1549
          * pixel in case of per-pixel lod) need it.
1550
          * It might be better to split the vectors here and only fetch/filter
1551
          * quads which need it (if there's one lod per quad).
1552
          */
1553
         need_lerp = lp_build_compare(bld->gallivm, bld->lodf_bld.type,
1554
                                      PIPE_FUNC_GREATER,
1555
                                      lod_fpart, bld->lodf_bld.zero);
1556
         need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, need_lerp);
1557
      }
1558
 
1559
      lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1560
      {
1561
         /*
1562
          * We unfortunately need to clamp lod_fpart here since we can get
1563
          * negative values which would screw up filtering if not all
1564
          * lod_fpart values have same sign.
1565
          */
1566
         lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
1567
                                  bld->lodf_bld.zero);
1568
         /* sample the second mipmap level */
1569
         lp_build_mipmap_level_sizes(bld, ilevel1,
1570
                                     &size1,
1571
                                     &row_stride1_vec, &img_stride1_vec);
1572
         if (bld->num_mips == 1) {
1573
            data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1574
         }
1575
         else {
1576
            data_ptr1 = bld->base_ptr;
1577
            mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1578
         }
1579
         if (img_filter == PIPE_TEX_FILTER_NEAREST) {
1580
            lp_build_sample_image_nearest(bld, size1,
1581
                                          row_stride1_vec, img_stride1_vec,
1582
                                          data_ptr1, mipoff1, coords, offsets,
1583
                                          colors1);
1584
         }
1585
         else {
1586
            lp_build_sample_image_linear(bld, FALSE, size1, NULL,
1587
                                         row_stride1_vec, img_stride1_vec,
1588
                                         data_ptr1, mipoff1, coords, offsets,
1589
                                         colors1);
1590
         }
1591
 
1592
         /* interpolate samples from the two mipmap levels */
1593
 
1594
         if (bld->num_lods != bld->coord_type.length)
1595
            lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1596
                                                              bld->lodf_bld.type,
1597
                                                              bld->texel_bld.type,
1598
                                                              lod_fpart);
1599
 
1600
         for (chan = 0; chan < 4; chan++) {
1601
            colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
1602
                                          colors0[chan], colors1[chan],
1603
                                          0);
1604
            LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1605
         }
1606
      }
1607
      lp_build_endif(&if_ctx);
1608
   }
1609
}
1610
 
1611
 
1612
/**
1613
 * Sample the texture/mipmap using given mip filter, and using
1614
 * both nearest and linear filtering at the same time depending
1615
 * on linear_mask.
1616
 * lod can be per quad but linear_mask is always per pixel.
1617
 * ilevel0 and ilevel1 indicate the two mipmap levels to sample
1618
 * from (vectors or scalars).
1619
 * If we're using nearest miplevel sampling the '1' values will be null/unused.
1620
 */
1621
static void
1622
lp_build_sample_mipmap_both(struct lp_build_sample_context *bld,
1623
                            LLVMValueRef linear_mask,
1624
                            unsigned mip_filter,
1625
                            LLVMValueRef *coords,
1626
                            const LLVMValueRef *offsets,
1627
                            LLVMValueRef ilevel0,
1628
                            LLVMValueRef ilevel1,
1629
                            LLVMValueRef lod_fpart,
1630
                            LLVMValueRef lod_positive,
1631
                            LLVMValueRef *colors_out)
1632
{
1633
   LLVMBuilderRef builder = bld->gallivm->builder;
1634
   LLVMValueRef size0 = NULL;
1635
   LLVMValueRef size1 = NULL;
1636
   LLVMValueRef row_stride0_vec = NULL;
1637
   LLVMValueRef row_stride1_vec = NULL;
1638
   LLVMValueRef img_stride0_vec = NULL;
1639
   LLVMValueRef img_stride1_vec = NULL;
1640
   LLVMValueRef data_ptr0 = NULL;
1641
   LLVMValueRef data_ptr1 = NULL;
1642
   LLVMValueRef mipoff0 = NULL;
1643
   LLVMValueRef mipoff1 = NULL;
1644
   LLVMValueRef colors0[4], colors1[4];
1645
   unsigned chan;
1646
 
1647
   /* sample the first mipmap level */
1648
   lp_build_mipmap_level_sizes(bld, ilevel0,
1649
                               &size0,
1650
                               &row_stride0_vec, &img_stride0_vec);
1651
   if (bld->num_mips == 1) {
1652
      data_ptr0 = lp_build_get_mipmap_level(bld, ilevel0);
1653
   }
1654
   else {
1655
      /* This path should work for num_lods 1 too but slightly less efficient */
1656
      data_ptr0 = bld->base_ptr;
1657
      mipoff0 = lp_build_get_mip_offsets(bld, ilevel0);
1658
   }
1659
 
1660
   lp_build_sample_image_linear(bld, FALSE, size0, linear_mask,
1661
                                row_stride0_vec, img_stride0_vec,
1662
                                data_ptr0, mipoff0, coords, offsets,
1663
                                colors0);
1664
 
1665
   /* Store the first level's colors in the output variables */
1666
   for (chan = 0; chan < 4; chan++) {
1667
       LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1668
   }
1669
 
1670
   if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
1671
      struct lp_build_if_state if_ctx;
1672
      LLVMValueRef need_lerp;
1673
 
1674
      /*
1675
       * We'll do mip filtering if any of the quads (or individual
1676
       * pixel in case of per-pixel lod) need it.
1677
       * Note using lod_positive here not lod_fpart since it may be the same
1678
       * condition as that used in the outer "if" in the caller hence llvm
1679
       * should be able to merge the branches in this case.
1680
       */
1681
      need_lerp = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods, lod_positive);
1682
 
1683
      lp_build_if(&if_ctx, bld->gallivm, need_lerp);
1684
      {
1685
         /*
1686
          * We unfortunately need to clamp lod_fpart here since we can get
1687
          * negative values which would screw up filtering if not all
1688
          * lod_fpart values have same sign.
1689
          */
1690
         lod_fpart = lp_build_max(&bld->lodf_bld, lod_fpart,
1691
                                  bld->lodf_bld.zero);
1692
         /* sample the second mipmap level */
1693
         lp_build_mipmap_level_sizes(bld, ilevel1,
1694
                                     &size1,
1695
                                     &row_stride1_vec, &img_stride1_vec);
1696
         if (bld->num_mips == 1) {
1697
            data_ptr1 = lp_build_get_mipmap_level(bld, ilevel1);
1698
         }
1699
         else {
1700
            data_ptr1 = bld->base_ptr;
1701
            mipoff1 = lp_build_get_mip_offsets(bld, ilevel1);
1702
         }
1703
 
1704
         lp_build_sample_image_linear(bld, FALSE, size1, linear_mask,
1705
                                      row_stride1_vec, img_stride1_vec,
1706
                                      data_ptr1, mipoff1, coords, offsets,
1707
                                      colors1);
1708
 
1709
         /* interpolate samples from the two mipmap levels */
1710
 
1711
         if (bld->num_lods != bld->coord_type.length)
1712
            lod_fpart = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
1713
                                                              bld->lodf_bld.type,
1714
                                                              bld->texel_bld.type,
1715
                                                              lod_fpart);
1716
 
1717
         for (chan = 0; chan < 4; chan++) {
1718
            colors0[chan] = lp_build_lerp(&bld->texel_bld, lod_fpart,
1719
                                          colors0[chan], colors1[chan],
1720
                                          0);
1721
            LLVMBuildStore(builder, colors0[chan], colors_out[chan]);
1722
         }
1723
      }
1724
      lp_build_endif(&if_ctx);
1725
   }
1726
}
1727
 
1728
 
1729
/**
1730
 * Build (per-coord) layer value.
1731
 * Either clamp layer to valid values or fill in optional out_of_bounds
1732
 * value and just return value unclamped.
1733
 */
1734
static LLVMValueRef
1735
lp_build_layer_coord(struct lp_build_sample_context *bld,
1736
                     unsigned texture_unit,
1737
                     boolean is_cube_array,
1738
                     LLVMValueRef layer,
1739
                     LLVMValueRef *out_of_bounds)
1740
{
1741
   LLVMValueRef num_layers;
1742
   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
1743
 
1744
   num_layers = bld->dynamic_state->depth(bld->dynamic_state, bld->gallivm,
1745
                                          bld->context_ptr, texture_unit);
1746
 
1747
   if (out_of_bounds) {
1748
      LLVMValueRef out1, out;
1749
      assert(!is_cube_array);
1750
      num_layers = lp_build_broadcast_scalar(int_coord_bld, num_layers);
1751
      out = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, layer, int_coord_bld->zero);
1752
      out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, layer, num_layers);
1753
      *out_of_bounds = lp_build_or(int_coord_bld, out, out1);
1754
      return layer;
1755
   }
1756
   else {
1757
      LLVMValueRef maxlayer;
1758
      LLVMValueRef s = is_cube_array ? lp_build_const_int32(bld->gallivm, 6) :
1759
                                       bld->int_bld.one;
1760
      maxlayer = lp_build_sub(&bld->int_bld, num_layers, s);
1761
      maxlayer = lp_build_broadcast_scalar(int_coord_bld, maxlayer);
1762
      return lp_build_clamp(int_coord_bld, layer, int_coord_bld->zero, maxlayer);
1763
   }
1764
}
1765
 
1766
 
1767
/**
1768
 * Calculate cube face, lod, mip levels.
1769
 */
1770
static void
1771
lp_build_sample_common(struct lp_build_sample_context *bld,
1772
                       unsigned texture_index,
1773
                       unsigned sampler_index,
1774
                       LLVMValueRef *coords,
1775
                       const struct lp_derivatives *derivs, /* optional */
1776
                       LLVMValueRef lod_bias, /* optional */
1777
                       LLVMValueRef explicit_lod, /* optional */
1778
                       LLVMValueRef *lod_pos_or_zero,
1779
                       LLVMValueRef *lod_fpart,
1780
                       LLVMValueRef *ilevel0,
1781
                       LLVMValueRef *ilevel1)
1782
{
1783
   const unsigned mip_filter = bld->static_sampler_state->min_mip_filter;
1784
   const unsigned min_filter = bld->static_sampler_state->min_img_filter;
1785
   const unsigned mag_filter = bld->static_sampler_state->mag_img_filter;
1786
   const unsigned target = bld->static_texture_state->target;
1787
   LLVMValueRef first_level, cube_rho = NULL;
1788
   LLVMValueRef lod_ipart = NULL;
1789
   struct lp_derivatives cube_derivs;
1790
 
1791
   /*
1792
   printf("%s mip %d  min %d  mag %d\n", __FUNCTION__,
1793
          mip_filter, min_filter, mag_filter);
1794
   */
1795
 
1796
   /*
1797
    * Choose cube face, recompute texcoords for the chosen face and
1798
    * compute rho here too (as it requires transform of derivatives).
1799
    */
1800
   if (target == PIPE_TEXTURE_CUBE || target == PIPE_TEXTURE_CUBE_ARRAY) {
1801
      boolean need_derivs;
1802
      need_derivs = ((min_filter != mag_filter ||
1803
                      mip_filter != PIPE_TEX_MIPFILTER_NONE) &&
1804
                      !bld->static_sampler_state->min_max_lod_equal &&
1805
                      !explicit_lod);
1806
      lp_build_cube_lookup(bld, coords, derivs, &cube_rho, &cube_derivs, need_derivs);
1807
      derivs = &cube_derivs;
1808
      if (target == PIPE_TEXTURE_CUBE_ARRAY) {
1809
         /* calculate cube layer coord now */
1810
         LLVMValueRef layer = lp_build_iround(&bld->coord_bld, coords[3]);
1811
         LLVMValueRef six = lp_build_const_int_vec(bld->gallivm, bld->int_coord_type, 6);
1812
         layer = lp_build_mul(&bld->int_coord_bld, layer, six);
1813
         coords[3] = lp_build_layer_coord(bld, texture_index, TRUE, layer, NULL);
1814
         /* because of seamless filtering can't add it to face (coords[2]) here. */
1815
      }
1816
   }
1817
   else if (target == PIPE_TEXTURE_1D_ARRAY ||
1818
            target == PIPE_TEXTURE_2D_ARRAY) {
1819
      coords[2] = lp_build_iround(&bld->coord_bld, coords[2]);
1820
      coords[2] = lp_build_layer_coord(bld, texture_index, FALSE, coords[2], NULL);
1821
   }
1822
 
1823
   if (bld->static_sampler_state->compare_mode != PIPE_TEX_COMPARE_NONE) {
1824
      /*
1825
       * Clamp p coords to [0,1] for fixed function depth texture format here.
1826
       * Technically this is not entirely correct for unorm depth as the ref value
1827
       * should be converted to the depth format (quantization!) and comparison
1828
       * then done in texture format. This would actually help performance (since
1829
       * only need to do it once and could save the per-sample conversion of texels
1830
       * to floats instead), but it would need more messy code (would need to push
1831
       * at least some bits down to actual fetch so conversion could be skipped,
1832
       * and would have ugly interaction with border color, would need to convert
1833
       * border color to that format too or do some other tricks to make it work).
1834
       */
1835
      const struct util_format_description *format_desc = bld->format_desc;
1836
      unsigned chan_type;
1837
      /* not entirely sure we couldn't end up with non-valid swizzle here */
1838
      chan_type = format_desc->swizzle[0] <= UTIL_FORMAT_SWIZZLE_W ?
1839
                     format_desc->channel[format_desc->swizzle[0]].type :
1840
                     UTIL_FORMAT_TYPE_FLOAT;
1841
      if (chan_type != UTIL_FORMAT_TYPE_FLOAT) {
1842
         coords[4] = lp_build_clamp(&bld->coord_bld, coords[4],
1843
                                    bld->coord_bld.zero, bld->coord_bld.one);
1844
      }
1845
   }
1846
 
1847
   /*
1848
    * Compute the level of detail (float).
1849
    */
1850
   if (min_filter != mag_filter ||
1851
       mip_filter != PIPE_TEX_MIPFILTER_NONE) {
1852
      /* Need to compute lod either to choose mipmap levels or to
1853
       * distinguish between minification/magnification with one mipmap level.
1854
       */
1855
      lp_build_lod_selector(bld, texture_index, sampler_index,
1856
                            coords[0], coords[1], coords[2], cube_rho,
1857
                            derivs, lod_bias, explicit_lod,
1858
                            mip_filter,
1859
                            &lod_ipart, lod_fpart, lod_pos_or_zero);
1860
   } else {
1861
      lod_ipart = bld->lodi_bld.zero;
1862
      *lod_pos_or_zero = bld->lodi_bld.zero;
1863
   }
1864
 
1865
   if (bld->num_lods != bld->num_mips) {
1866
      /* only makes sense if there's just a single mip level */
1867
      assert(bld->num_mips == 1);
1868
      lod_ipart = lp_build_extract_range(bld->gallivm, lod_ipart, 0, 1);
1869
   }
1870
 
1871
   /*
1872
    * Compute integer mipmap level(s) to fetch texels from: ilevel0, ilevel1
1873
    */
1874
   switch (mip_filter) {
1875
   default:
1876
      assert(0 && "bad mip_filter value in lp_build_sample_soa()");
1877
      /* fall-through */
1878
   case PIPE_TEX_MIPFILTER_NONE:
1879
      /* always use mip level 0 */
1880
      first_level = bld->dynamic_state->first_level(bld->dynamic_state,
1881
                                                    bld->gallivm, bld->context_ptr,
1882
                                                    texture_index);
1883
      first_level = lp_build_broadcast_scalar(&bld->leveli_bld, first_level);
1884
      *ilevel0 = first_level;
1885
      break;
1886
   case PIPE_TEX_MIPFILTER_NEAREST:
1887
      assert(lod_ipart);
1888
      lp_build_nearest_mip_level(bld, texture_index, lod_ipart, ilevel0, NULL);
1889
      break;
1890
   case PIPE_TEX_MIPFILTER_LINEAR:
1891
      assert(lod_ipart);
1892
      assert(*lod_fpart);
1893
      lp_build_linear_mip_levels(bld, texture_index,
1894
                                 lod_ipart, lod_fpart,
1895
                                 ilevel0, ilevel1);
1896
      break;
1897
   }
1898
}
1899
 
1900
static void
1901
lp_build_clamp_border_color(struct lp_build_sample_context *bld,
1902
                            unsigned sampler_unit)
1903
{
1904
   struct gallivm_state *gallivm = bld->gallivm;
1905
   LLVMBuilderRef builder = gallivm->builder;
1906
   LLVMValueRef border_color_ptr =
1907
      bld->dynamic_state->border_color(bld->dynamic_state, gallivm,
1908
                                       bld->context_ptr, sampler_unit);
1909
   LLVMValueRef border_color;
1910
   const struct util_format_description *format_desc = bld->format_desc;
1911
   struct lp_type vec4_type = bld->texel_type;
1912
   struct lp_build_context vec4_bld;
1913
   LLVMValueRef min_clamp = NULL;
1914
   LLVMValueRef max_clamp = NULL;
1915
 
1916
   /*
1917
    * For normalized format need to clamp border color (technically
1918
    * probably should also quantize the data). Really sucks doing this
1919
    * here but can't avoid at least for now since this is part of
1920
    * sampler state and texture format is part of sampler_view state.
1921
    * GL expects also expects clamping for uint/sint formats too so
1922
    * do that as well (d3d10 can't end up here with uint/sint since it
1923
    * only supports them with ld).
1924
    */
1925
   vec4_type.length = 4;
1926
   lp_build_context_init(&vec4_bld, gallivm, vec4_type);
1927
 
1928
   /*
1929
    * Vectorized clamping of border color. Loading is a bit of a hack since
1930
    * we just cast the pointer to float array to pointer to vec4
1931
    * (int or float).
1932
    */
1933
   border_color_ptr = lp_build_array_get_ptr(gallivm, border_color_ptr,
1934
                                             lp_build_const_int32(gallivm, 0));
1935
   border_color_ptr = LLVMBuildBitCast(builder, border_color_ptr,
1936
                                       LLVMPointerType(vec4_bld.vec_type, 0), "");
1937
   border_color = LLVMBuildLoad(builder, border_color_ptr, "");
1938
   /* we don't have aligned type in the dynamic state unfortunately */
1939
   lp_set_load_alignment(border_color, 4);
1940
 
1941
   /*
1942
    * Instead of having some incredibly complex logic which will try to figure out
1943
    * clamping necessary for each channel, simply use the first channel, and treat
1944
    * mixed signed/unsigned normalized formats specially.
1945
    * (Mixed non-normalized, which wouldn't work at all here, do not exist for a
1946
    * good reason.)
1947
    */
1948
   if (format_desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {
1949
      int chan;
1950
      /* d/s needs special handling because both present means just sampling depth */
1951
      if (util_format_is_depth_and_stencil(format_desc->format)) {
1952
         chan = format_desc->swizzle[0];
1953
      }
1954
      else {
1955
         chan = util_format_get_first_non_void_channel(format_desc->format);
1956
      }
1957
      if (chan >= 0 && chan <= UTIL_FORMAT_SWIZZLE_W) {
1958
         unsigned chan_type = format_desc->channel[chan].type;
1959
         unsigned chan_norm = format_desc->channel[chan].normalized;
1960
         unsigned chan_pure = format_desc->channel[chan].pure_integer;
1961
         if (chan_type == UTIL_FORMAT_TYPE_SIGNED) {
1962
            if (chan_norm) {
1963
               min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
1964
               max_clamp = vec4_bld.one;
1965
            }
1966
            else if (chan_pure) {
1967
               /*
1968
                * Border color was stored as int, hence need min/max clamp
1969
                * only if chan has less than 32 bits..
1970
                */
1971
               unsigned chan_size = format_desc->channel[chan].size;
1972
               if (chan_size < 32) {
1973
                  min_clamp = lp_build_const_int_vec(gallivm, vec4_type,
1974
 
1975
                  max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
1976
                                                     (1 << (chan_size - 1)) - 1);
1977
               }
1978
            }
1979
            /* TODO: no idea about non-pure, non-normalized! */
1980
         }
1981
         else if (chan_type == UTIL_FORMAT_TYPE_UNSIGNED) {
1982
            if (chan_norm) {
1983
               min_clamp = vec4_bld.zero;
1984
               max_clamp = vec4_bld.one;
1985
            }
1986
            /*
1987
             * Need a ugly hack here, because we don't have Z32_FLOAT_X8X24
1988
             * we use Z32_FLOAT_S8X24 to imply sampling depth component
1989
             * and ignoring stencil, which will blow up here if we try to
1990
             * do a uint clamp in a float texel build...
1991
             * And even if we had that format, mesa st also thinks using z24s8
1992
             * means depth sampling ignoring stencil.
1993
             */
1994
            else if (chan_pure) {
1995
               /*
1996
                * Border color was stored as uint, hence never need min
1997
                * clamp, and only need max clamp if chan has less than 32 bits.
1998
                */
1999
               unsigned chan_size = format_desc->channel[chan].size;
2000
               if (chan_size < 32) {
2001
                  max_clamp = lp_build_const_int_vec(gallivm, vec4_type,
2002
                                                     (1 << chan_size) - 1);
2003
               }
2004
               /* TODO: no idea about non-pure, non-normalized! */
2005
            }
2006
         }
2007
         else if (chan_type == UTIL_FORMAT_TYPE_FIXED) {
2008
            /* TODO: I have no idea what clamp this would need if any! */
2009
         }
2010
      }
2011
      /* mixed plain formats (or different pure size) */
2012
      switch (format_desc->format) {
2013
      case PIPE_FORMAT_B10G10R10A2_UINT:
2014
      case PIPE_FORMAT_R10G10B10A2_UINT:
2015
      {
2016
         unsigned max10 = (1 << 10) - 1;
2017
         max_clamp = lp_build_const_aos(gallivm, vec4_type, max10, max10,
2018
                                        max10, (1 << 2) - 1, NULL);
2019
      }
2020
         break;
2021
      case PIPE_FORMAT_R10SG10SB10SA2U_NORM:
2022
         min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
2023
                                        -1.0F, 0.0F, NULL);
2024
         max_clamp = vec4_bld.one;
2025
         break;
2026
      case PIPE_FORMAT_R8SG8SB8UX8U_NORM:
2027
      case PIPE_FORMAT_R5SG5SB6U_NORM:
2028
         min_clamp = lp_build_const_aos(gallivm, vec4_type, -1.0F, -1.0F,
2029
                                        0.0F, 0.0F, NULL);
2030
         max_clamp = vec4_bld.one;
2031
         break;
2032
      default:
2033
         break;
2034
      }
2035
   }
2036
   else {
2037
      /* cannot figure this out from format description */
2038
      if (format_desc->layout == UTIL_FORMAT_LAYOUT_S3TC) {
2039
         /* s3tc formats are always unorm */
2040
         min_clamp = vec4_bld.zero;
2041
         max_clamp = vec4_bld.one;
2042
      }
2043
      else if (format_desc->layout == UTIL_FORMAT_LAYOUT_RGTC ||
2044
               format_desc->layout == UTIL_FORMAT_LAYOUT_ETC) {
2045
         switch (format_desc->format) {
2046
         case PIPE_FORMAT_RGTC1_UNORM:
2047
         case PIPE_FORMAT_RGTC2_UNORM:
2048
         case PIPE_FORMAT_LATC1_UNORM:
2049
         case PIPE_FORMAT_LATC2_UNORM:
2050
         case PIPE_FORMAT_ETC1_RGB8:
2051
            min_clamp = vec4_bld.zero;
2052
            max_clamp = vec4_bld.one;
2053
            break;
2054
         case PIPE_FORMAT_RGTC1_SNORM:
2055
         case PIPE_FORMAT_RGTC2_SNORM:
2056
         case PIPE_FORMAT_LATC1_SNORM:
2057
         case PIPE_FORMAT_LATC2_SNORM:
2058
            min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
2059
            max_clamp = vec4_bld.one;
2060
            break;
2061
         default:
2062
            assert(0);
2063
            break;
2064
         }
2065
      }
2066
      /*
2067
       * all others from subsampled/other group, though we don't care
2068
       * about yuv (and should not have any from zs here)
2069
       */
2070
      else if (format_desc->colorspace != UTIL_FORMAT_COLORSPACE_YUV){
2071
         switch (format_desc->format) {
2072
         case PIPE_FORMAT_R8G8_B8G8_UNORM:
2073
         case PIPE_FORMAT_G8R8_G8B8_UNORM:
2074
         case PIPE_FORMAT_G8R8_B8R8_UNORM:
2075
         case PIPE_FORMAT_R8G8_R8B8_UNORM:
2076
         case PIPE_FORMAT_R1_UNORM: /* doesn't make sense but ah well */
2077
            min_clamp = vec4_bld.zero;
2078
            max_clamp = vec4_bld.one;
2079
            break;
2080
         case PIPE_FORMAT_R8G8Bx_SNORM:
2081
            min_clamp = lp_build_const_vec(gallivm, vec4_type, -1.0F);
2082
            max_clamp = vec4_bld.one;
2083
            break;
2084
            /*
2085
             * Note smallfloat formats usually don't need clamping
2086
             * (they still have infinite range) however this is not
2087
             * true for r11g11b10 and r9g9b9e5, which can't represent
2088
             * negative numbers (and additionally r9g9b9e5 can't represent
2089
             * very large numbers). d3d10 seems happy without clamping in
2090
             * this case, but gl spec is pretty clear: "for floating
2091
             * point and integer formats, border values are clamped to
2092
             * the representable range of the format" so do that here.
2093
             */
2094
         case PIPE_FORMAT_R11G11B10_FLOAT:
2095
            min_clamp = vec4_bld.zero;
2096
            break;
2097
         case PIPE_FORMAT_R9G9B9E5_FLOAT:
2098
            min_clamp = vec4_bld.zero;
2099
            max_clamp = lp_build_const_vec(gallivm, vec4_type, MAX_RGB9E5);
2100
            break;
2101
         default:
2102
            assert(0);
2103
            break;
2104
         }
2105
      }
2106
   }
2107
 
2108
   if (min_clamp) {
2109
      border_color = lp_build_max(&vec4_bld, border_color, min_clamp);
2110
   }
2111
   if (max_clamp) {
2112
      border_color = lp_build_min(&vec4_bld, border_color, max_clamp);
2113
   }
2114
 
2115
   bld->border_color_clamped = border_color;
2116
}
2117
 
2118
 
2119
/**
2120
 * General texture sampling codegen.
2121
 * This function handles texture sampling for all texture targets (1D,
2122
 * 2D, 3D, cube) and all filtering modes.
2123
 */
2124
static void
2125
lp_build_sample_general(struct lp_build_sample_context *bld,
2126
                        unsigned sampler_unit,
2127
                        boolean is_gather,
2128
                        LLVMValueRef *coords,
2129
                        const LLVMValueRef *offsets,
2130
                        LLVMValueRef lod_positive,
2131
                        LLVMValueRef lod_fpart,
2132
                        LLVMValueRef ilevel0,
2133
                        LLVMValueRef ilevel1,
2134
                        LLVMValueRef *colors_out)
2135
{
2136
   LLVMBuilderRef builder = bld->gallivm->builder;
2137
   const struct lp_static_sampler_state *sampler_state = bld->static_sampler_state;
2138
   const unsigned mip_filter = sampler_state->min_mip_filter;
2139
   const unsigned min_filter = sampler_state->min_img_filter;
2140
   const unsigned mag_filter = sampler_state->mag_img_filter;
2141
   LLVMValueRef texels[4];
2142
   unsigned chan;
2143
 
2144
   /* if we need border color, (potentially) clamp it now */
2145
   if (lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_s,
2146
                                              min_filter,
2147
                                              mag_filter) ||
2148
       (bld->dims > 1 &&
2149
           lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_t,
2150
                                                  min_filter,
2151
                                                  mag_filter)) ||
2152
       (bld->dims > 2 &&
2153
           lp_sampler_wrap_mode_uses_border_color(sampler_state->wrap_r,
2154
                                                  min_filter,
2155
                                                  mag_filter))) {
2156
      lp_build_clamp_border_color(bld, sampler_unit);
2157
   }
2158
 
2159
 
2160
   /*
2161
    * Get/interpolate texture colors.
2162
    */
2163
 
2164
   for (chan = 0; chan < 4; ++chan) {
2165
     texels[chan] = lp_build_alloca(bld->gallivm, bld->texel_bld.vec_type, "");
2166
     lp_build_name(texels[chan], "sampler%u_texel_%c_var", sampler_unit, "xyzw"[chan]);
2167
   }
2168
 
2169
   if (min_filter == mag_filter) {
2170
      /* no need to distinguish between minification and magnification */
2171
      lp_build_sample_mipmap(bld, min_filter, mip_filter,
2172
                             is_gather,
2173
                             coords, offsets,
2174
                             ilevel0, ilevel1, lod_fpart,
2175
                             texels);
2176
   }
2177
   else {
2178
      /*
2179
       * Could also get rid of the if-logic and always use mipmap_both, both
2180
       * for the single lod and multi-lod case if nothing really uses this.
2181
       */
2182
      if (bld->num_lods == 1) {
2183
         /* Emit conditional to choose min image filter or mag image filter
2184
          * depending on the lod being > 0 or <= 0, respectively.
2185
          */
2186
         struct lp_build_if_state if_ctx;
2187
 
2188
         lod_positive = LLVMBuildTrunc(builder, lod_positive,
2189
                                       LLVMInt1TypeInContext(bld->gallivm->context), "");
2190
 
2191
         lp_build_if(&if_ctx, bld->gallivm, lod_positive);
2192
         {
2193
            /* Use the minification filter */
2194
            lp_build_sample_mipmap(bld, min_filter, mip_filter, FALSE,
2195
                                   coords, offsets,
2196
                                   ilevel0, ilevel1, lod_fpart,
2197
                                   texels);
2198
         }
2199
         lp_build_else(&if_ctx);
2200
         {
2201
            /* Use the magnification filter */
2202
            lp_build_sample_mipmap(bld, mag_filter, PIPE_TEX_MIPFILTER_NONE,
2203
                                   FALSE,
2204
                                   coords, offsets,
2205
                                   ilevel0, NULL, NULL,
2206
                                   texels);
2207
         }
2208
         lp_build_endif(&if_ctx);
2209
      }
2210
      else {
2211
         LLVMValueRef need_linear, linear_mask;
2212
         unsigned mip_filter_for_nearest;
2213
         struct lp_build_if_state if_ctx;
2214
 
2215
         if (min_filter == PIPE_TEX_FILTER_LINEAR) {
2216
            linear_mask = lod_positive;
2217
            mip_filter_for_nearest = PIPE_TEX_MIPFILTER_NONE;
2218
         }
2219
         else {
2220
            linear_mask = lp_build_not(&bld->lodi_bld, lod_positive);
2221
            mip_filter_for_nearest = mip_filter;
2222
         }
2223
         need_linear = lp_build_any_true_range(&bld->lodi_bld, bld->num_lods,
2224
                                               linear_mask);
2225
 
2226
         if (bld->num_lods != bld->coord_type.length) {
2227
            linear_mask = lp_build_unpack_broadcast_aos_scalars(bld->gallivm,
2228
                                                                bld->lodi_type,
2229
                                                                bld->int_coord_type,
2230
                                                                linear_mask);
2231
         }
2232
 
2233
         lp_build_if(&if_ctx, bld->gallivm, need_linear);
2234
         {
2235
            /*
2236
             * Do sampling with both filters simultaneously. This means using
2237
             * a linear filter and doing some tricks (with weights) for the pixels
2238
             * which need nearest filter.
2239
             * Note that it's probably rare some pixels need nearest and some
2240
             * linear filter but the fixups required for the nearest pixels
2241
             * aren't all that complicated so just always run a combined path
2242
             * if at least some pixels require linear.
2243
             */
2244
            lp_build_sample_mipmap_both(bld, linear_mask, mip_filter,
2245
                                        coords, offsets,
2246
                                        ilevel0, ilevel1,
2247
                                        lod_fpart, lod_positive,
2248
                                        texels);
2249
         }
2250
         lp_build_else(&if_ctx);
2251
         {
2252
            /*
2253
             * All pixels require just nearest filtering, which is way
2254
             * cheaper than linear, hence do a separate path for that.
2255
             */
2256
            lp_build_sample_mipmap(bld, PIPE_TEX_FILTER_NEAREST, FALSE,
2257
                                   mip_filter_for_nearest,
2258
                                   coords, offsets,
2259
                                   ilevel0, ilevel1, lod_fpart,
2260
                                   texels);
2261
         }
2262
         lp_build_endif(&if_ctx);
2263
      }
2264
   }
2265
 
2266
   for (chan = 0; chan < 4; ++chan) {
2267
     colors_out[chan] = LLVMBuildLoad(builder, texels[chan], "");
2268
     lp_build_name(colors_out[chan], "sampler%u_texel_%c", sampler_unit, "xyzw"[chan]);
2269
   }
2270
}
2271
 
2272
 
2273
/**
2274
 * Texel fetch function.
2275
 * In contrast to general sampling there is no filtering, no coord minification,
2276
 * lod (if any) is always explicit uint, coords are uints (in terms of texel units)
2277
 * directly to be applied to the selected mip level (after adding texel offsets).
2278
 * This function handles texel fetch for all targets where texel fetch is supported
2279
 * (no cube maps, but 1d, 2d, 3d are supported, arrays and buffers should be too).
2280
 */
2281
static void
2282
lp_build_fetch_texel(struct lp_build_sample_context *bld,
2283
                     unsigned texture_unit,
2284
                     const LLVMValueRef *coords,
2285
                     LLVMValueRef explicit_lod,
2286
                     const LLVMValueRef *offsets,
2287
                     LLVMValueRef *colors_out)
2288
{
2289
   struct lp_build_context *perquadi_bld = &bld->lodi_bld;
2290
   struct lp_build_context *int_coord_bld = &bld->int_coord_bld;
2291
   unsigned dims = bld->dims, chan;
2292
   unsigned target = bld->static_texture_state->target;
2293
   boolean out_of_bound_ret_zero = TRUE;
2294
   LLVMValueRef size, ilevel;
2295
   LLVMValueRef row_stride_vec = NULL, img_stride_vec = NULL;
2296
   LLVMValueRef x = coords[0], y = coords[1], z = coords[2];
2297
   LLVMValueRef width, height, depth, i, j;
2298
   LLVMValueRef offset, out_of_bounds, out1;
2299
 
2300
   out_of_bounds = int_coord_bld->zero;
2301
 
2302
   if (explicit_lod && bld->static_texture_state->target != PIPE_BUFFER) {
2303
      if (bld->num_mips != int_coord_bld->type.length) {
2304
         ilevel = lp_build_pack_aos_scalars(bld->gallivm, int_coord_bld->type,
2305
                                            perquadi_bld->type, explicit_lod, 0);
2306
      }
2307
      else {
2308
         ilevel = explicit_lod;
2309
      }
2310
      lp_build_nearest_mip_level(bld, texture_unit, ilevel, &ilevel,
2311
                                 out_of_bound_ret_zero ? &out_of_bounds : NULL);
2312
   }
2313
   else {
2314
      assert(bld->num_mips == 1);
2315
      if (bld->static_texture_state->target != PIPE_BUFFER) {
2316
         ilevel = bld->dynamic_state->first_level(bld->dynamic_state, bld->gallivm,
2317
                                                  bld->context_ptr, texture_unit);
2318
      }
2319
      else {
2320
         ilevel = lp_build_const_int32(bld->gallivm, 0);
2321
      }
2322
   }
2323
   lp_build_mipmap_level_sizes(bld, ilevel,
2324
                               &size,
2325
                               &row_stride_vec, &img_stride_vec);
2326
   lp_build_extract_image_sizes(bld, &bld->int_size_bld, int_coord_bld->type,
2327
                                size, &width, &height, &depth);
2328
 
2329
   if (target == PIPE_TEXTURE_1D_ARRAY ||
2330
       target == PIPE_TEXTURE_2D_ARRAY) {
2331
      if (out_of_bound_ret_zero) {
2332
         z = lp_build_layer_coord(bld, texture_unit, FALSE, z, &out1);
2333
         out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2334
      }
2335
      else {
2336
         z = lp_build_layer_coord(bld, texture_unit, FALSE, z, NULL);
2337
      }
2338
   }
2339
 
2340
   /* This is a lot like border sampling */
2341
   if (offsets[0]) {
2342
      /*
2343
       * coords are really unsigned, offsets are signed, but I don't think
2344
       * exceeding 31 bits is possible
2345
       */
2346
      x = lp_build_add(int_coord_bld, x, offsets[0]);
2347
   }
2348
   out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, x, int_coord_bld->zero);
2349
   out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2350
   out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, x, width);
2351
   out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2352
 
2353
   if (dims >= 2) {
2354
      if (offsets[1]) {
2355
         y = lp_build_add(int_coord_bld, y, offsets[1]);
2356
      }
2357
      out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, y, int_coord_bld->zero);
2358
      out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2359
      out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, y, height);
2360
      out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2361
 
2362
      if (dims >= 3) {
2363
         if (offsets[2]) {
2364
            z = lp_build_add(int_coord_bld, z, offsets[2]);
2365
         }
2366
         out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_LESS, z, int_coord_bld->zero);
2367
         out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2368
         out1 = lp_build_cmp(int_coord_bld, PIPE_FUNC_GEQUAL, z, depth);
2369
         out_of_bounds = lp_build_or(int_coord_bld, out_of_bounds, out1);
2370
      }
2371
   }
2372
 
2373
   lp_build_sample_offset(int_coord_bld,
2374
                          bld->format_desc,
2375
                          x, y, z, row_stride_vec, img_stride_vec,
2376
                          &offset, &i, &j);
2377
 
2378
   if (bld->static_texture_state->target != PIPE_BUFFER) {
2379
      offset = lp_build_add(int_coord_bld, offset,
2380
                            lp_build_get_mip_offsets(bld, ilevel));
2381
   }
2382
 
2383
   offset = lp_build_andnot(int_coord_bld, offset, out_of_bounds);
2384
 
2385
   lp_build_fetch_rgba_soa(bld->gallivm,
2386
                           bld->format_desc,
2387
                           bld->texel_type,
2388
                           bld->base_ptr, offset,
2389
                           i, j,
2390
                           colors_out);
2391
 
2392
   if (out_of_bound_ret_zero) {
2393
      /*
2394
       * Only needed for ARB_robust_buffer_access_behavior and d3d10.
2395
       * Could use min/max above instead of out-of-bounds comparisons
2396
       * if we don't care about the result returned for out-of-bounds.
2397
       */
2398
      for (chan = 0; chan < 4; chan++) {
2399
         colors_out[chan] = lp_build_select(&bld->texel_bld, out_of_bounds,
2400
                                            bld->texel_bld.zero, colors_out[chan]);
2401
      }
2402
   }
2403
}
2404
 
2405
 
2406
/**
2407
 * Just set texels to white instead of actually sampling the texture.
2408
 * For debugging.
2409
 */
2410
void
2411
lp_build_sample_nop(struct gallivm_state *gallivm,
2412
                    struct lp_type type,
2413
                    const LLVMValueRef *coords,
2414
                    LLVMValueRef texel_out[4])
2415
{
2416
   LLVMValueRef one = lp_build_one(gallivm, type);
2417
   unsigned chan;
2418
 
2419
   for (chan = 0; chan < 4; chan++) {
2420
      texel_out[chan] = one;
2421
   }
2422
}
2423
 
2424
 
2425
/**
2426
 * Build the actual texture sampling code.
2427
 * 'texel' will return a vector of four LLVMValueRefs corresponding to
2428
 * R, G, B, A.
2429
 * \param type  vector float type to use for coords, etc.
2430
 * \param sample_key
2431
 * \param derivs  partial derivatives of (s,t,r,q) with respect to x and y
2432
 */
2433
static void
2434
lp_build_sample_soa_code(struct gallivm_state *gallivm,
2435
                         const struct lp_static_texture_state *static_texture_state,
2436
                         const struct lp_static_sampler_state *static_sampler_state,
2437
                         struct lp_sampler_dynamic_state *dynamic_state,
2438
                         struct lp_type type,
2439
                         unsigned sample_key,
2440
                         unsigned texture_index,
2441
                         unsigned sampler_index,
2442
                         LLVMValueRef context_ptr,
2443
                         const LLVMValueRef *coords,
2444
                         const LLVMValueRef *offsets,
2445
                         const struct lp_derivatives *derivs, /* optional */
2446
                         LLVMValueRef lod, /* optional */
2447
                         LLVMValueRef texel_out[4])
2448
{
2449
   unsigned target = static_texture_state->target;
2450
   unsigned dims = texture_dims(target);
2451
   unsigned num_quads = type.length / 4;
2452
   unsigned mip_filter, min_img_filter, mag_img_filter, i;
2453
   struct lp_build_sample_context bld;
2454
   struct lp_static_sampler_state derived_sampler_state = *static_sampler_state;
2455
   LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
2456
   LLVMBuilderRef builder = gallivm->builder;
2457
   LLVMValueRef tex_width, newcoords[5];
2458
   enum lp_sampler_lod_property lod_property;
2459
   enum lp_sampler_lod_control lod_control;
2460
   enum lp_sampler_op_type op_type;
2461
   LLVMValueRef lod_bias = NULL;
2462
   LLVMValueRef explicit_lod = NULL;
2463
   boolean op_is_tex;
2464
 
2465
   if (0) {
2466
      enum pipe_format fmt = static_texture_state->format;
2467
      debug_printf("Sample from %s\n", util_format_name(fmt));
2468
   }
2469
 
2470
   lod_property = (sample_key & LP_SAMPLER_LOD_PROPERTY_MASK) >>
2471
                     LP_SAMPLER_LOD_PROPERTY_SHIFT;
2472
   lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
2473
                    LP_SAMPLER_LOD_CONTROL_SHIFT;
2474
   op_type = (sample_key & LP_SAMPLER_OP_TYPE_MASK) >>
2475
                 LP_SAMPLER_OP_TYPE_SHIFT;
2476
 
2477
   op_is_tex = op_type == LP_SAMPLER_OP_TEXTURE;
2478
 
2479
   if (lod_control == LP_SAMPLER_LOD_BIAS) {
2480
      lod_bias = lod;
2481
      assert(lod);
2482
      assert(derivs == NULL);
2483
   }
2484
   else if (lod_control == LP_SAMPLER_LOD_EXPLICIT) {
2485
      explicit_lod = lod;
2486
      assert(lod);
2487
      assert(derivs == NULL);
2488
   }
2489
   else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
2490
      assert(derivs);
2491
      assert(lod == NULL);
2492
   }
2493
   else {
2494
      assert(derivs == NULL);
2495
      assert(lod == NULL);
2496
   }
2497
 
2498
   if (static_texture_state->format == PIPE_FORMAT_NONE) {
2499
      /*
2500
       * If there's nothing bound, format is NONE, and we must return
2501
       * all zero as mandated by d3d10 in this case.
2502
       */
2503
      unsigned chan;
2504
      LLVMValueRef zero = lp_build_const_vec(gallivm, type, 0.0F);
2505
      for (chan = 0; chan < 4; chan++) {
2506
         texel_out[chan] = zero;
2507
      }
2508
      return;
2509
   }
2510
 
2511
   assert(type.floating);
2512
 
2513
   /* Setup our build context */
2514
   memset(&bld, 0, sizeof bld);
2515
   bld.gallivm = gallivm;
2516
   bld.context_ptr = context_ptr;
2517
   bld.static_sampler_state = &derived_sampler_state;
2518
   bld.static_texture_state = static_texture_state;
2519
   bld.dynamic_state = dynamic_state;
2520
   bld.format_desc = util_format_description(static_texture_state->format);
2521
   bld.dims = dims;
2522
 
2523
   bld.vector_width = lp_type_width(type);
2524
 
2525
   bld.float_type = lp_type_float(32);
2526
   bld.int_type = lp_type_int(32);
2527
   bld.coord_type = type;
2528
   bld.int_coord_type = lp_int_type(type);
2529
   bld.float_size_in_type = lp_type_float(32);
2530
   bld.float_size_in_type.length = dims > 1 ? 4 : 1;
2531
   bld.int_size_in_type = lp_int_type(bld.float_size_in_type);
2532
   bld.texel_type = type;
2533
 
2534
   /* always using the first channel hopefully should be safe,
2535
    * if not things WILL break in other places anyway.
2536
    */
2537
   if (bld.format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB &&
2538
       bld.format_desc->channel[0].pure_integer) {
2539
      if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_SIGNED) {
2540
         bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
2541
      }
2542
      else if (bld.format_desc->channel[0].type == UTIL_FORMAT_TYPE_UNSIGNED) {
2543
         bld.texel_type = lp_type_uint_vec(type.width, type.width * type.length);
2544
      }
2545
   }
2546
   else if (util_format_has_stencil(bld.format_desc) &&
2547
       !util_format_has_depth(bld.format_desc)) {
2548
      /* for stencil only formats, sample stencil (uint) */
2549
      bld.texel_type = lp_type_int_vec(type.width, type.width * type.length);
2550
   }
2551
 
2552
   if (!static_texture_state->level_zero_only) {
2553
      derived_sampler_state.min_mip_filter = static_sampler_state->min_mip_filter;
2554
   } else {
2555
      derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
2556
   }
2557
   if (op_type == LP_SAMPLER_OP_GATHER) {
2558
      /*
2559
       * gather4 is exactly like GL_LINEAR filtering but in the end skipping
2560
       * the actual filtering. Using mostly the same paths, so cube face
2561
       * selection, coord wrapping etc. all naturally uses the same code.
2562
       */
2563
      derived_sampler_state.min_mip_filter = PIPE_TEX_MIPFILTER_NONE;
2564
      derived_sampler_state.min_img_filter = PIPE_TEX_FILTER_LINEAR;
2565
      derived_sampler_state.mag_img_filter = PIPE_TEX_FILTER_LINEAR;
2566
   }
2567
   mip_filter = derived_sampler_state.min_mip_filter;
2568
 
2569
   if (0) {
2570
      debug_printf("  .min_mip_filter = %u\n", derived_sampler_state.min_mip_filter);
2571
   }
2572
 
2573
   if (static_texture_state->target == PIPE_TEXTURE_CUBE ||
2574
       static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY)
2575
   {
2576
      /*
2577
       * Seamless filtering ignores wrap modes.
2578
       * Setting to CLAMP_TO_EDGE is correct for nearest filtering, for
2579
       * bilinear it's not correct but way better than using for instance repeat.
2580
       * Note we even set this for non-seamless. Technically GL allows any wrap
2581
       * mode, which made sense when supporting true borders (can get seamless
2582
       * effect with border and CLAMP_TO_BORDER), but gallium doesn't support
2583
       * borders and d3d9 requires wrap modes to be ignored and it's a pain to fix
2584
       * up the sampler state (as it makes it texture dependent).
2585
       */
2586
      derived_sampler_state.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2587
      derived_sampler_state.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
2588
   }
2589
 
2590
   min_img_filter = derived_sampler_state.min_img_filter;
2591
   mag_img_filter = derived_sampler_state.mag_img_filter;
2592
 
2593
 
2594
   /*
2595
    * This is all a bit complicated different paths are chosen for performance
2596
    * reasons.
2597
    * Essentially, there can be 1 lod per element, 1 lod per quad or 1 lod for
2598
    * everything (the last two options are equivalent for 4-wide case).
2599
    * If there's per-quad lod but we split to 4-wide so we can use AoS, per-quad
2600
    * lod is calculated then the lod value extracted afterwards so making this
2601
    * case basically the same as far as lod handling is concerned for the
2602
    * further sample/filter code as the 1 lod for everything case.
2603
    * Different lod handling mostly shows up when building mipmap sizes
2604
    * (lp_build_mipmap_level_sizes() and friends) and also in filtering
2605
    * (getting the fractional part of the lod to the right texels).
2606
    */
2607
 
2608
   /*
2609
    * There are other situations where at least the multiple int lods could be
2610
    * avoided like min and max lod being equal.
2611
    */
2612
   bld.num_mips = bld.num_lods = 1;
2613
 
2614
   if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
2615
       (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
2616
       (static_texture_state->target == PIPE_TEXTURE_CUBE ||
2617
        static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
2618
       (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
2619
      /*
2620
       * special case for using per-pixel lod even for implicit lod,
2621
       * which is generally never required (ok by APIs) except to please
2622
       * some (somewhat broken imho) tests (because per-pixel face selection
2623
       * can cause derivatives to be different for pixels outside the primitive
2624
       * due to the major axis division even if pre-project derivatives are
2625
       * looking normal).
2626
       */
2627
      bld.num_mips = type.length;
2628
      bld.num_lods = type.length;
2629
   }
2630
   else if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT ||
2631
       (explicit_lod || lod_bias || derivs)) {
2632
      if ((!op_is_tex && target != PIPE_BUFFER) ||
2633
          (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
2634
         bld.num_mips = type.length;
2635
         bld.num_lods = type.length;
2636
      }
2637
      else if (op_is_tex && min_img_filter != mag_img_filter) {
2638
         bld.num_mips = 1;
2639
         bld.num_lods = type.length;
2640
      }
2641
   }
2642
   /* TODO: for true scalar_lod should only use 1 lod value */
2643
   else if ((!op_is_tex && explicit_lod && target != PIPE_BUFFER) ||
2644
            (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
2645
      bld.num_mips = num_quads;
2646
      bld.num_lods = num_quads;
2647
   }
2648
   else if (op_is_tex && min_img_filter != mag_img_filter) {
2649
      bld.num_mips = 1;
2650
      bld.num_lods = num_quads;
2651
   }
2652
 
2653
 
2654
   bld.lodf_type = type;
2655
   /* we want native vector size to be able to use our intrinsics */
2656
   if (bld.num_lods != type.length) {
2657
      /* TODO: this currently always has to be per-quad or per-element */
2658
      bld.lodf_type.length = type.length > 4 ? ((type.length + 15) / 16) * 4 : 1;
2659
   }
2660
   bld.lodi_type = lp_int_type(bld.lodf_type);
2661
   bld.levelf_type = bld.lodf_type;
2662
   if (bld.num_mips == 1) {
2663
      bld.levelf_type.length = 1;
2664
   }
2665
   bld.leveli_type = lp_int_type(bld.levelf_type);
2666
   bld.float_size_type = bld.float_size_in_type;
2667
   /* Note: size vectors may not be native. They contain minified w/h/d/_ values,
2668
    * with per-element lod that is w0/h0/d0/_/w1/h1/d1_/... so up to 8x4f32 */
2669
   if (bld.num_mips > 1) {
2670
      bld.float_size_type.length = bld.num_mips == type.length ?
2671
                                      bld.num_mips * bld.float_size_in_type.length :
2672
                                      type.length;
2673
   }
2674
   bld.int_size_type = lp_int_type(bld.float_size_type);
2675
 
2676
   lp_build_context_init(&bld.float_bld, gallivm, bld.float_type);
2677
   lp_build_context_init(&bld.float_vec_bld, gallivm, type);
2678
   lp_build_context_init(&bld.int_bld, gallivm, bld.int_type);
2679
   lp_build_context_init(&bld.coord_bld, gallivm, bld.coord_type);
2680
   lp_build_context_init(&bld.int_coord_bld, gallivm, bld.int_coord_type);
2681
   lp_build_context_init(&bld.int_size_in_bld, gallivm, bld.int_size_in_type);
2682
   lp_build_context_init(&bld.float_size_in_bld, gallivm, bld.float_size_in_type);
2683
   lp_build_context_init(&bld.int_size_bld, gallivm, bld.int_size_type);
2684
   lp_build_context_init(&bld.float_size_bld, gallivm, bld.float_size_type);
2685
   lp_build_context_init(&bld.texel_bld, gallivm, bld.texel_type);
2686
   lp_build_context_init(&bld.levelf_bld, gallivm, bld.levelf_type);
2687
   lp_build_context_init(&bld.leveli_bld, gallivm, bld.leveli_type);
2688
   lp_build_context_init(&bld.lodf_bld, gallivm, bld.lodf_type);
2689
   lp_build_context_init(&bld.lodi_bld, gallivm, bld.lodi_type);
2690
 
2691
   /* Get the dynamic state */
2692
   tex_width = dynamic_state->width(dynamic_state, gallivm,
2693
                                    context_ptr, texture_index);
2694
   bld.row_stride_array = dynamic_state->row_stride(dynamic_state, gallivm,
2695
                                                    context_ptr, texture_index);
2696
   bld.img_stride_array = dynamic_state->img_stride(dynamic_state, gallivm,
2697
                                                    context_ptr, texture_index);
2698
   bld.base_ptr = dynamic_state->base_ptr(dynamic_state, gallivm,
2699
                                          context_ptr, texture_index);
2700
   bld.mip_offsets = dynamic_state->mip_offsets(dynamic_state, gallivm,
2701
                                                context_ptr, texture_index);
2702
   /* Note that mip_offsets is an array[level] of offsets to texture images */
2703
 
2704
   /* width, height, depth as single int vector */
2705
   if (dims <= 1) {
2706
      bld.int_size = tex_width;
2707
   }
2708
   else {
2709
      bld.int_size = LLVMBuildInsertElement(builder, bld.int_size_in_bld.undef,
2710
                                            tex_width,
2711
                                            LLVMConstInt(i32t, 0, 0), "");
2712
      if (dims >= 2) {
2713
         LLVMValueRef tex_height =
2714
            dynamic_state->height(dynamic_state, gallivm,
2715
                                  context_ptr, texture_index);
2716
         bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
2717
                                               tex_height,
2718
                                               LLVMConstInt(i32t, 1, 0), "");
2719
         if (dims >= 3) {
2720
            LLVMValueRef tex_depth =
2721
               dynamic_state->depth(dynamic_state, gallivm, context_ptr,
2722
                                    texture_index);
2723
            bld.int_size = LLVMBuildInsertElement(builder, bld.int_size,
2724
                                                  tex_depth,
2725
                                                  LLVMConstInt(i32t, 2, 0), "");
2726
         }
2727
      }
2728
   }
2729
 
2730
   for (i = 0; i < 5; i++) {
2731
      newcoords[i] = coords[i];
2732
   }
2733
 
2734
   if (0) {
2735
      /* For debug: no-op texture sampling */
2736
      lp_build_sample_nop(gallivm,
2737
                          bld.texel_type,
2738
                          newcoords,
2739
                          texel_out);
2740
   }
2741
 
2742
   else if (op_type == LP_SAMPLER_OP_FETCH) {
2743
      lp_build_fetch_texel(&bld, texture_index, newcoords,
2744
                           lod, offsets,
2745
                           texel_out);
2746
   }
2747
 
2748
   else {
2749
      LLVMValueRef lod_fpart = NULL, lod_positive = NULL;
2750
      LLVMValueRef ilevel0 = NULL, ilevel1 = NULL;
2751
      boolean use_aos = util_format_fits_8unorm(bld.format_desc) &&
2752
                        op_is_tex &&
2753
                        /* not sure this is strictly needed or simply impossible */
2754
                        derived_sampler_state.compare_mode == PIPE_TEX_COMPARE_NONE &&
2755
                        lp_is_simple_wrap_mode(derived_sampler_state.wrap_s);
2756
 
2757
      use_aos &= bld.num_lods <= num_quads ||
2758
                 derived_sampler_state.min_img_filter ==
2759
                    derived_sampler_state.mag_img_filter;
2760
      if (dims > 1) {
2761
         use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_t);
2762
         if (dims > 2) {
2763
            use_aos &= lp_is_simple_wrap_mode(derived_sampler_state.wrap_r);
2764
         }
2765
      }
2766
      if ((static_texture_state->target == PIPE_TEXTURE_CUBE ||
2767
           static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
2768
          derived_sampler_state.seamless_cube_map &&
2769
          (derived_sampler_state.min_img_filter == PIPE_TEX_FILTER_LINEAR ||
2770
           derived_sampler_state.mag_img_filter == PIPE_TEX_FILTER_LINEAR)) {
2771
         /* theoretically possible with AoS filtering but not implemented (complex!) */
2772
         use_aos = 0;
2773
      }
2774
 
2775
      if ((gallivm_debug & GALLIVM_DEBUG_PERF) &&
2776
          !use_aos && util_format_fits_8unorm(bld.format_desc)) {
2777
         debug_printf("%s: using floating point linear filtering for %s\n",
2778
                      __FUNCTION__, bld.format_desc->short_name);
2779
         debug_printf("  min_img %d  mag_img %d  mip %d  target %d  seamless %d"
2780
                      "  wraps %d  wrapt %d  wrapr %d\n",
2781
                      derived_sampler_state.min_img_filter,
2782
                      derived_sampler_state.mag_img_filter,
2783
                      derived_sampler_state.min_mip_filter,
2784
                      static_texture_state->target,
2785
                      derived_sampler_state.seamless_cube_map,
2786
                      derived_sampler_state.wrap_s,
2787
                      derived_sampler_state.wrap_t,
2788
                      derived_sampler_state.wrap_r);
2789
      }
2790
 
2791
      lp_build_sample_common(&bld, texture_index, sampler_index,
2792
                             newcoords,
2793
                             derivs, lod_bias, explicit_lod,
2794
                             &lod_positive, &lod_fpart,
2795
                             &ilevel0, &ilevel1);
2796
 
2797
      if (use_aos && static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) {
2798
         /* The aos path doesn't do seamless filtering so simply add cube layer
2799
          * to face now.
2800
          */
2801
         newcoords[2] = lp_build_add(&bld.int_coord_bld, newcoords[2], newcoords[3]);
2802
      }
2803
 
2804
      /*
2805
       * we only try 8-wide sampling with soa as it appears to
2806
       * be a loss with aos with AVX (but it should work, except
2807
       * for conformance if min_filter != mag_filter if num_lods > 1).
2808
       * (It should be faster if we'd support avx2)
2809
       */
2810
      if (num_quads == 1 || !use_aos) {
2811
         if (use_aos) {
2812
            /* do sampling/filtering with fixed pt arithmetic */
2813
            lp_build_sample_aos(&bld, sampler_index,
2814
                                newcoords[0], newcoords[1],
2815
                                newcoords[2],
2816
                                offsets, lod_positive, lod_fpart,
2817
                                ilevel0, ilevel1,
2818
                                texel_out);
2819
         }
2820
 
2821
         else {
2822
            lp_build_sample_general(&bld, sampler_index,
2823
                                    op_type == LP_SAMPLER_OP_GATHER,
2824
                                    newcoords, offsets,
2825
                                    lod_positive, lod_fpart,
2826
                                    ilevel0, ilevel1,
2827
                                    texel_out);
2828
         }
2829
      }
2830
      else {
2831
         unsigned j;
2832
         struct lp_build_sample_context bld4;
2833
         struct lp_type type4 = type;
2834
         unsigned i;
2835
         LLVMValueRef texelout4[4];
2836
         LLVMValueRef texelouttmp[4][LP_MAX_VECTOR_LENGTH/16];
2837
 
2838
         type4.length = 4;
2839
 
2840
         /* Setup our build context */
2841
         memset(&bld4, 0, sizeof bld4);
2842
         bld4.gallivm = bld.gallivm;
2843
         bld4.context_ptr = bld.context_ptr;
2844
         bld4.static_texture_state = bld.static_texture_state;
2845
         bld4.static_sampler_state = bld.static_sampler_state;
2846
         bld4.dynamic_state = bld.dynamic_state;
2847
         bld4.format_desc = bld.format_desc;
2848
         bld4.dims = bld.dims;
2849
         bld4.row_stride_array = bld.row_stride_array;
2850
         bld4.img_stride_array = bld.img_stride_array;
2851
         bld4.base_ptr = bld.base_ptr;
2852
         bld4.mip_offsets = bld.mip_offsets;
2853
         bld4.int_size = bld.int_size;
2854
 
2855
         bld4.vector_width = lp_type_width(type4);
2856
 
2857
         bld4.float_type = lp_type_float(32);
2858
         bld4.int_type = lp_type_int(32);
2859
         bld4.coord_type = type4;
2860
         bld4.int_coord_type = lp_int_type(type4);
2861
         bld4.float_size_in_type = lp_type_float(32);
2862
         bld4.float_size_in_type.length = dims > 1 ? 4 : 1;
2863
         bld4.int_size_in_type = lp_int_type(bld4.float_size_in_type);
2864
         bld4.texel_type = bld.texel_type;
2865
         bld4.texel_type.length = 4;
2866
 
2867
         bld4.num_mips = bld4.num_lods = 1;
2868
         if ((gallivm_debug & GALLIVM_DEBUG_NO_QUAD_LOD) &&
2869
             (gallivm_debug & GALLIVM_DEBUG_NO_RHO_APPROX) &&
2870
             (static_texture_state->target == PIPE_TEXTURE_CUBE ||
2871
              static_texture_state->target == PIPE_TEXTURE_CUBE_ARRAY) &&
2872
             (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
2873
            bld4.num_mips = type4.length;
2874
            bld4.num_lods = type4.length;
2875
         }
2876
         if (lod_property == LP_SAMPLER_LOD_PER_ELEMENT &&
2877
             (explicit_lod || lod_bias || derivs)) {
2878
            if ((!op_is_tex && target != PIPE_BUFFER) ||
2879
                (op_is_tex && mip_filter != PIPE_TEX_MIPFILTER_NONE)) {
2880
               bld4.num_mips = type4.length;
2881
               bld4.num_lods = type4.length;
2882
            }
2883
            else if (op_is_tex && min_img_filter != mag_img_filter) {
2884
               bld4.num_mips = 1;
2885
               bld4.num_lods = type4.length;
2886
            }
2887
         }
2888
 
2889
         /* we want native vector size to be able to use our intrinsics */
2890
         bld4.lodf_type = type4;
2891
         if (bld4.num_lods != type4.length) {
2892
            bld4.lodf_type.length = 1;
2893
         }
2894
         bld4.lodi_type = lp_int_type(bld4.lodf_type);
2895
         bld4.levelf_type = type4;
2896
         if (bld4.num_mips != type4.length) {
2897
            bld4.levelf_type.length = 1;
2898
         }
2899
         bld4.leveli_type = lp_int_type(bld4.levelf_type);
2900
         bld4.float_size_type = bld4.float_size_in_type;
2901
         if (bld4.num_mips > 1) {
2902
            bld4.float_size_type.length = bld4.num_mips == type4.length ?
2903
                                            bld4.num_mips * bld4.float_size_in_type.length :
2904
                                            type4.length;
2905
         }
2906
         bld4.int_size_type = lp_int_type(bld4.float_size_type);
2907
 
2908
         lp_build_context_init(&bld4.float_bld, gallivm, bld4.float_type);
2909
         lp_build_context_init(&bld4.float_vec_bld, gallivm, type4);
2910
         lp_build_context_init(&bld4.int_bld, gallivm, bld4.int_type);
2911
         lp_build_context_init(&bld4.coord_bld, gallivm, bld4.coord_type);
2912
         lp_build_context_init(&bld4.int_coord_bld, gallivm, bld4.int_coord_type);
2913
         lp_build_context_init(&bld4.int_size_in_bld, gallivm, bld4.int_size_in_type);
2914
         lp_build_context_init(&bld4.float_size_in_bld, gallivm, bld4.float_size_in_type);
2915
         lp_build_context_init(&bld4.int_size_bld, gallivm, bld4.int_size_type);
2916
         lp_build_context_init(&bld4.float_size_bld, gallivm, bld4.float_size_type);
2917
         lp_build_context_init(&bld4.texel_bld, gallivm, bld4.texel_type);
2918
         lp_build_context_init(&bld4.levelf_bld, gallivm, bld4.levelf_type);
2919
         lp_build_context_init(&bld4.leveli_bld, gallivm, bld4.leveli_type);
2920
         lp_build_context_init(&bld4.lodf_bld, gallivm, bld4.lodf_type);
2921
         lp_build_context_init(&bld4.lodi_bld, gallivm, bld4.lodi_type);
2922
 
2923
         for (i = 0; i < num_quads; i++) {
2924
            LLVMValueRef s4, t4, r4;
2925
            LLVMValueRef lod_positive4, lod_fpart4 = NULL;
2926
            LLVMValueRef ilevel04, ilevel14 = NULL;
2927
            LLVMValueRef offsets4[4] = { NULL };
2928
            unsigned num_lods = bld4.num_lods;
2929
 
2930
            s4 = lp_build_extract_range(gallivm, newcoords[0], 4*i, 4);
2931
            t4 = lp_build_extract_range(gallivm, newcoords[1], 4*i, 4);
2932
            r4 = lp_build_extract_range(gallivm, newcoords[2], 4*i, 4);
2933
 
2934
            if (offsets[0]) {
2935
               offsets4[0] = lp_build_extract_range(gallivm, offsets[0], 4*i, 4);
2936
               if (dims > 1) {
2937
                  offsets4[1] = lp_build_extract_range(gallivm, offsets[1], 4*i, 4);
2938
                  if (dims > 2) {
2939
                     offsets4[2] = lp_build_extract_range(gallivm, offsets[2], 4*i, 4);
2940
                  }
2941
               }
2942
            }
2943
            lod_positive4 = lp_build_extract_range(gallivm, lod_positive, num_lods * i, num_lods);
2944
            ilevel04 = bld.num_mips == 1 ? ilevel0 :
2945
                          lp_build_extract_range(gallivm, ilevel0, num_lods * i, num_lods);
2946
            if (mip_filter == PIPE_TEX_MIPFILTER_LINEAR) {
2947
               ilevel14 = lp_build_extract_range(gallivm, ilevel1, num_lods * i, num_lods);
2948
               lod_fpart4 = lp_build_extract_range(gallivm, lod_fpart, num_lods * i, num_lods);
2949
            }
2950
 
2951
            if (use_aos) {
2952
               /* do sampling/filtering with fixed pt arithmetic */
2953
               lp_build_sample_aos(&bld4, sampler_index,
2954
                                   s4, t4, r4, offsets4,
2955
                                   lod_positive4, lod_fpart4,
2956
                                   ilevel04, ilevel14,
2957
                                   texelout4);
2958
            }
2959
 
2960
            else {
2961
               /* this path is currently unreachable and hence might break easily... */
2962
               LLVMValueRef newcoords4[5];
2963
               newcoords4[0] = s4;
2964
               newcoords4[1] = t4;
2965
               newcoords4[2] = r4;
2966
               newcoords4[3] = lp_build_extract_range(gallivm, newcoords[3], 4*i, 4);
2967
               newcoords4[4] = lp_build_extract_range(gallivm, newcoords[4], 4*i, 4);
2968
 
2969
               lp_build_sample_general(&bld4, sampler_index,
2970
                                       op_type == LP_SAMPLER_OP_GATHER,
2971
                                       newcoords4, offsets4,
2972
                                       lod_positive4, lod_fpart4,
2973
                                       ilevel04, ilevel14,
2974
                                       texelout4);
2975
            }
2976
            for (j = 0; j < 4; j++) {
2977
               texelouttmp[j][i] = texelout4[j];
2978
            }
2979
         }
2980
 
2981
         for (j = 0; j < 4; j++) {
2982
            texel_out[j] = lp_build_concat(gallivm, texelouttmp[j], type4, num_quads);
2983
         }
2984
      }
2985
   }
2986
 
2987
   if (target != PIPE_BUFFER && op_type != LP_SAMPLER_OP_GATHER) {
2988
      apply_sampler_swizzle(&bld, texel_out);
2989
   }
2990
 
2991
   /*
2992
    * texel type can be a (32bit) int/uint (for pure int formats only),
2993
    * however we are expected to always return floats (storage is untyped).
2994
    */
2995
   if (!bld.texel_type.floating) {
2996
      unsigned chan;
2997
      for (chan = 0; chan < 4; chan++) {
2998
         texel_out[chan] = LLVMBuildBitCast(builder, texel_out[chan],
2999
                                            lp_build_vec_type(gallivm, type), "");
3000
      }
3001
   }
3002
}
3003
 
3004
 
3005
#define USE_TEX_FUNC_CALL 1
3006
 
3007
#define LP_MAX_TEX_FUNC_ARGS 32
3008
 
3009
static inline void
3010
get_target_info(enum pipe_texture_target target,
3011
                unsigned *num_coords, unsigned *num_derivs,
3012
                unsigned *num_offsets, unsigned *layer)
3013
{
3014
   unsigned dims = texture_dims(target);
3015
   *num_coords = dims;
3016
   *num_offsets = dims;
3017
   *num_derivs = (target == PIPE_TEXTURE_CUBE ||
3018
                  target == PIPE_TEXTURE_CUBE_ARRAY) ? 3 : dims;
3019
   *layer = has_layer_coord(target) ? 2: 0;
3020
   if (target == PIPE_TEXTURE_CUBE_ARRAY) {
3021
      /*
3022
       * dims doesn't include r coord for cubes - this is handled
3023
       * by layer instead, but need to fix up for cube arrays...
3024
       */
3025
      *layer = 3;
3026
      *num_coords = 3;
3027
   }
3028
}
3029
 
3030
 
3031
/**
3032
 * Generate the function body for a texture sampling function.
3033
 */
3034
static void
3035
lp_build_sample_gen_func(struct gallivm_state *gallivm,
3036
                         const struct lp_static_texture_state *static_texture_state,
3037
                         const struct lp_static_sampler_state *static_sampler_state,
3038
                         struct lp_sampler_dynamic_state *dynamic_state,
3039
                         struct lp_type type,
3040
                         unsigned texture_index,
3041
                         unsigned sampler_index,
3042
                         LLVMValueRef function,
3043
                         unsigned num_args,
3044
                         unsigned sample_key)
3045
{
3046
   LLVMBuilderRef old_builder;
3047
   LLVMBasicBlockRef block;
3048
   LLVMValueRef coords[5];
3049
   LLVMValueRef offsets[3] = { NULL };
3050
   LLVMValueRef lod = NULL;
3051
   LLVMValueRef context_ptr;
3052
   LLVMValueRef texel_out[4];
3053
   struct lp_derivatives derivs;
3054
   struct lp_derivatives *deriv_ptr = NULL;
3055
   unsigned num_param = 0;
3056
   unsigned i, num_coords, num_derivs, num_offsets, layer;
3057
   enum lp_sampler_lod_control lod_control;
3058
 
3059
   lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
3060
                    LP_SAMPLER_LOD_CONTROL_SHIFT;
3061
 
3062
   get_target_info(static_texture_state->target,
3063
                   &num_coords, &num_derivs, &num_offsets, &layer);
3064
 
3065
   /* "unpack" arguments */
3066
   context_ptr = LLVMGetParam(function, num_param++);
3067
   for (i = 0; i < num_coords; i++) {
3068
      coords[i] = LLVMGetParam(function, num_param++);
3069
   }
3070
   for (i = num_coords; i < 5; i++) {
3071
      /* This is rather unfortunate... */
3072
      coords[i] = lp_build_undef(gallivm, type);
3073
   }
3074
   if (layer) {
3075
      coords[layer] = LLVMGetParam(function, num_param++);
3076
   }
3077
   if (sample_key & LP_SAMPLER_SHADOW) {
3078
      coords[4] = LLVMGetParam(function, num_param++);
3079
   }
3080
   if (sample_key & LP_SAMPLER_OFFSETS) {
3081
      for (i = 0; i < num_offsets; i++) {
3082
         offsets[i] = LLVMGetParam(function, num_param++);
3083
      }
3084
   }
3085
   if (lod_control == LP_SAMPLER_LOD_BIAS ||
3086
       lod_control == LP_SAMPLER_LOD_EXPLICIT) {
3087
      lod = LLVMGetParam(function, num_param++);
3088
   }
3089
   else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
3090
      for (i = 0; i < num_derivs; i++) {
3091
         derivs.ddx[i] = LLVMGetParam(function, num_param++);
3092
         derivs.ddy[i] = LLVMGetParam(function, num_param++);
3093
      }
3094
      deriv_ptr = &derivs;
3095
   }
3096
 
3097
   assert(num_args == num_param);
3098
 
3099
   /*
3100
    * Function body
3101
    */
3102
 
3103
   old_builder = gallivm->builder;
3104
   block = LLVMAppendBasicBlockInContext(gallivm->context, function, "entry");
3105
   gallivm->builder = LLVMCreateBuilderInContext(gallivm->context);
3106
   LLVMPositionBuilderAtEnd(gallivm->builder, block);
3107
 
3108
   lp_build_sample_soa_code(gallivm,
3109
                            static_texture_state,
3110
                            static_sampler_state,
3111
                            dynamic_state,
3112
                            type,
3113
                            sample_key,
3114
                            texture_index,
3115
                            sampler_index,
3116
                            context_ptr,
3117
                            coords,
3118
                            offsets,
3119
                            deriv_ptr,
3120
                            lod,
3121
                            texel_out);
3122
 
3123
   LLVMBuildAggregateRet(gallivm->builder, texel_out, 4);
3124
 
3125
   LLVMDisposeBuilder(gallivm->builder);
3126
   gallivm->builder = old_builder;
3127
 
3128
   gallivm_verify_function(gallivm, function);
3129
}
3130
 
3131
 
3132
/**
3133
 * Call the matching function for texture sampling.
3134
 * If there's no match, generate a new one.
3135
 */
3136
static void
3137
lp_build_sample_soa_func(struct gallivm_state *gallivm,
3138
                         const struct lp_static_texture_state *static_texture_state,
3139
                         const struct lp_static_sampler_state *static_sampler_state,
3140
                         struct lp_sampler_dynamic_state *dynamic_state,
3141
                         const struct lp_sampler_params *params)
3142
{
3143
   LLVMBuilderRef builder = gallivm->builder;
3144
   LLVMModuleRef module = LLVMGetGlobalParent(LLVMGetBasicBlockParent(
3145
                             LLVMGetInsertBlock(builder)));
3146
   LLVMValueRef function, inst;
3147
   LLVMValueRef args[LP_MAX_TEX_FUNC_ARGS];
3148
   LLVMBasicBlockRef bb;
3149
   LLVMValueRef tex_ret;
3150
   unsigned num_args = 0;
3151
   char func_name[64];
3152
   unsigned i, num_coords, num_derivs, num_offsets, layer;
3153
   unsigned texture_index = params->texture_index;
3154
   unsigned sampler_index = params->sampler_index;
3155
   unsigned sample_key = params->sample_key;
3156
   const LLVMValueRef *coords = params->coords;
3157
   const LLVMValueRef *offsets = params->offsets;
3158
   const struct lp_derivatives *derivs = params->derivs;
3159
   enum lp_sampler_lod_control lod_control;
3160
 
3161
   lod_control = (sample_key & LP_SAMPLER_LOD_CONTROL_MASK) >>
3162
                    LP_SAMPLER_LOD_CONTROL_SHIFT;
3163
 
3164
   get_target_info(static_texture_state->target,
3165
                   &num_coords, &num_derivs, &num_offsets, &layer);
3166
 
3167
   /*
3168
    * texture function matches are found by name.
3169
    * Thus the name has to include both the texture and sampler unit
3170
    * (which covers all static state) plus the actual texture function
3171
    * (including things like offsets, shadow coord, lod control).
3172
    * Additionally lod_property has to be included too.
3173
    */
3174
 
3175
   util_snprintf(func_name, sizeof(func_name), "texfunc_res_%d_sam_%d_%x",
3176
                 texture_index, sampler_index, sample_key);
3177
 
3178
   function = LLVMGetNamedFunction(module, func_name);
3179
 
3180
   if(!function) {
3181
      LLVMTypeRef arg_types[LP_MAX_TEX_FUNC_ARGS];
3182
      LLVMTypeRef ret_type;
3183
      LLVMTypeRef function_type;
3184
      LLVMTypeRef val_type[4];
3185
      unsigned num_param = 0;
3186
 
3187
      /*
3188
       * Generate the function prototype.
3189
       */
3190
 
3191
      arg_types[num_param++] = LLVMTypeOf(params->context_ptr);
3192
      for (i = 0; i < num_coords; i++) {
3193
         arg_types[num_param++] = LLVMTypeOf(coords[0]);
3194
         assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[i]));
3195
      }
3196
      if (layer) {
3197
         arg_types[num_param++] = LLVMTypeOf(coords[layer]);
3198
         assert(LLVMTypeOf(coords[0]) == LLVMTypeOf(coords[layer]));
3199
      }
3200
      if (sample_key & LP_SAMPLER_SHADOW) {
3201
         arg_types[num_param++] = LLVMTypeOf(coords[0]);
3202
      }
3203
      if (sample_key & LP_SAMPLER_OFFSETS) {
3204
         for (i = 0; i < num_offsets; i++) {
3205
            arg_types[num_param++] = LLVMTypeOf(offsets[0]);
3206
            assert(LLVMTypeOf(offsets[0]) == LLVMTypeOf(offsets[i]));
3207
         }
3208
      }
3209
      if (lod_control == LP_SAMPLER_LOD_BIAS ||
3210
          lod_control == LP_SAMPLER_LOD_EXPLICIT) {
3211
         arg_types[num_param++] = LLVMTypeOf(params->lod);
3212
      }
3213
      else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
3214
         for (i = 0; i < num_derivs; i++) {
3215
            arg_types[num_param++] = LLVMTypeOf(derivs->ddx[i]);
3216
            arg_types[num_param++] = LLVMTypeOf(derivs->ddy[i]);
3217
            assert(LLVMTypeOf(derivs->ddx[0]) == LLVMTypeOf(derivs->ddx[i]));
3218
            assert(LLVMTypeOf(derivs->ddy[0]) == LLVMTypeOf(derivs->ddy[i]));
3219
         }
3220
      }
3221
 
3222
      val_type[0] = val_type[1] = val_type[2] = val_type[3] =
3223
         lp_build_vec_type(gallivm, params->type);
3224
      ret_type = LLVMStructTypeInContext(gallivm->context, val_type, 4, 0);
3225
      function_type = LLVMFunctionType(ret_type, arg_types, num_param, 0);
3226
      function = LLVMAddFunction(module, func_name, function_type);
3227
 
3228
      for (i = 0; i < num_param; ++i) {
3229
         if(LLVMGetTypeKind(arg_types[i]) == LLVMPointerTypeKind) {
3230
            LLVMAddAttribute(LLVMGetParam(function, i), LLVMNoAliasAttribute);
3231
         }
3232
      }
3233
 
3234
      LLVMSetFunctionCallConv(function, LLVMFastCallConv);
3235
      LLVMSetLinkage(function, LLVMPrivateLinkage);
3236
 
3237
      lp_build_sample_gen_func(gallivm,
3238
                               static_texture_state,
3239
                               static_sampler_state,
3240
                               dynamic_state,
3241
                               params->type,
3242
                               texture_index,
3243
                               sampler_index,
3244
                               function,
3245
                               num_param,
3246
                               sample_key);
3247
   }
3248
 
3249
   num_args = 0;
3250
   args[num_args++] = params->context_ptr;
3251
   for (i = 0; i < num_coords; i++) {
3252
      args[num_args++] = coords[i];
3253
   }
3254
   if (layer) {
3255
      args[num_args++] = coords[layer];
3256
   }
3257
   if (sample_key & LP_SAMPLER_SHADOW) {
3258
      args[num_args++] = coords[4];
3259
   }
3260
   if (sample_key & LP_SAMPLER_OFFSETS) {
3261
      for (i = 0; i < num_offsets; i++) {
3262
         args[num_args++] = offsets[i];
3263
      }
3264
   }
3265
   if (lod_control == LP_SAMPLER_LOD_BIAS ||
3266
       lod_control == LP_SAMPLER_LOD_EXPLICIT) {
3267
      args[num_args++] = params->lod;
3268
   }
3269
   else if (lod_control == LP_SAMPLER_LOD_DERIVATIVES) {
3270
      for (i = 0; i < num_derivs; i++) {
3271
         args[num_args++] = derivs->ddx[i];
3272
         args[num_args++] = derivs->ddy[i];
3273
      }
3274
   }
3275
 
3276
   assert(num_args <= LP_MAX_TEX_FUNC_ARGS);
3277
 
3278
   tex_ret = LLVMBuildCall(builder, function, args, num_args, "");
3279
   bb = LLVMGetInsertBlock(builder);
3280
   inst = LLVMGetLastInstruction(bb);
3281
   LLVMSetInstructionCallConv(inst, LLVMFastCallConv);
3282
 
3283
   for (i = 0; i < 4; i++) {
3284
      params->texel[i] = LLVMBuildExtractValue(gallivm->builder, tex_ret, i, "");
3285
   }
3286
}
3287
 
3288
 
3289
/**
3290
 * Build texture sampling code.
3291
 * Either via a function call or inline it directly.
3292
 */
3293
void
3294
lp_build_sample_soa(const struct lp_static_texture_state *static_texture_state,
3295
                    const struct lp_static_sampler_state *static_sampler_state,
3296
                    struct lp_sampler_dynamic_state *dynamic_state,
3297
                    struct gallivm_state *gallivm,
3298
                    const struct lp_sampler_params *params)
3299
{
3300
   boolean use_tex_func = FALSE;
3301
 
3302
   /*
3303
    * Do not use a function call if the sampling is "simple enough".
3304
    * We define this by
3305
    * a) format
3306
    * b) no mips (either one level only or no mip filter)
3307
    * No mips will definitely make the code smaller, though
3308
    * the format requirement is a bit iffy - there's some (SoA) formats
3309
    * which definitely generate less code. This does happen to catch
3310
    * some important cases though which are hurt quite a bit by using
3311
    * a call (though not really because of the call overhead but because
3312
    * they are reusing the same texture unit with some of the same
3313
    * parameters).
3314
    * Ideally we'd let llvm recognize this stuff by doing IPO passes.
3315
    */
3316
 
3317
   if (USE_TEX_FUNC_CALL) {
3318
      const struct util_format_description *format_desc;
3319
      boolean simple_format;
3320
      boolean simple_tex;
3321
      enum lp_sampler_op_type op_type;
3322
      format_desc = util_format_description(static_texture_state->format);
3323
      simple_format = !format_desc ||
3324
                         (util_format_is_rgba8_variant(format_desc) &&
3325
                          format_desc->colorspace == UTIL_FORMAT_COLORSPACE_RGB);
3326
 
3327
      op_type = (params->sample_key & LP_SAMPLER_OP_TYPE_MASK) >>
3328
                    LP_SAMPLER_OP_TYPE_SHIFT;
3329
      simple_tex =
3330
         op_type != LP_SAMPLER_OP_TEXTURE ||
3331
           ((static_sampler_state->min_mip_filter == PIPE_TEX_MIPFILTER_NONE ||
3332
             static_texture_state->level_zero_only == TRUE) &&
3333
            static_sampler_state->min_img_filter == static_sampler_state->mag_img_filter);
3334
 
3335
      use_tex_func = format_desc && !(simple_format && simple_tex);
3336
   }
3337
 
3338
   if (use_tex_func) {
3339
      lp_build_sample_soa_func(gallivm,
3340
                               static_texture_state,
3341
                               static_sampler_state,
3342
                               dynamic_state,
3343
                               params);
3344
   }
3345
   else {
3346
      lp_build_sample_soa_code(gallivm,
3347
                               static_texture_state,
3348
                               static_sampler_state,
3349
                               dynamic_state,
3350
                               params->type,
3351
                               params->sample_key,
3352
                               params->texture_index,
3353
                               params->sampler_index,
3354
                               params->context_ptr,
3355
                               params->coords,
3356
                               params->offsets,
3357
                               params->derivs,
3358
                               params->lod,
3359
                               params->texel);
3360
   }
3361
}
3362
 
3363
 
3364
void
3365
lp_build_size_query_soa(struct gallivm_state *gallivm,
3366
                        const struct lp_static_texture_state *static_state,
3367
                        struct lp_sampler_dynamic_state *dynamic_state,
3368
                        struct lp_type int_type,
3369
                        unsigned texture_unit,
3370
                        unsigned target,
3371
                        LLVMValueRef context_ptr,
3372
                        boolean is_sviewinfo,
3373
                        enum lp_sampler_lod_property lod_property,
3374
                        LLVMValueRef explicit_lod,
3375
                        LLVMValueRef *sizes_out)
3376
{
3377
   LLVMValueRef lod, level, size;
3378
   LLVMValueRef first_level = NULL;
3379
   int dims, i;
3380
   boolean has_array;
3381
   unsigned num_lods = 1;
3382
   struct lp_build_context bld_int_vec4;
3383
 
3384
   if (static_state->format == PIPE_FORMAT_NONE) {
3385
      /*
3386
       * If there's nothing bound, format is NONE, and we must return
3387
       * all zero as mandated by d3d10 in this case.
3388
       */
3389
      unsigned chan;
3390
      LLVMValueRef zero = lp_build_const_vec(gallivm, int_type, 0.0F);
3391
      for (chan = 0; chan < 4; chan++) {
3392
         sizes_out[chan] = zero;
3393
      }
3394
      return;
3395
   }
3396
 
3397
   /*
3398
    * Do some sanity verification about bound texture and shader dcl target.
3399
    * Not entirely sure what's possible but assume array/non-array
3400
    * always compatible (probably not ok for OpenGL but d3d10 has no
3401
    * distinction of arrays at the resource level).
3402
    * Everything else looks bogus (though not entirely sure about rect/2d).
3403
    * Currently disabled because it causes assertion failures if there's
3404
    * nothing bound (or rather a dummy texture, not that this case would
3405
    * return the right values).
3406
    */
3407
   if (0 && static_state->target != target) {
3408
      if (static_state->target == PIPE_TEXTURE_1D)
3409
         assert(target == PIPE_TEXTURE_1D_ARRAY);
3410
      else if (static_state->target == PIPE_TEXTURE_1D_ARRAY)
3411
         assert(target == PIPE_TEXTURE_1D);
3412
      else if (static_state->target == PIPE_TEXTURE_2D)
3413
         assert(target == PIPE_TEXTURE_2D_ARRAY);
3414
      else if (static_state->target == PIPE_TEXTURE_2D_ARRAY)
3415
         assert(target == PIPE_TEXTURE_2D);
3416
      else if (static_state->target == PIPE_TEXTURE_CUBE)
3417
         assert(target == PIPE_TEXTURE_CUBE_ARRAY);
3418
      else if (static_state->target == PIPE_TEXTURE_CUBE_ARRAY)
3419
         assert(target == PIPE_TEXTURE_CUBE);
3420
      else
3421
         assert(0);
3422
   }
3423
 
3424
   dims = texture_dims(target);
3425
 
3426
   switch (target) {
3427
   case PIPE_TEXTURE_1D_ARRAY:
3428
   case PIPE_TEXTURE_2D_ARRAY:
3429
   case PIPE_TEXTURE_CUBE_ARRAY:
3430
      has_array = TRUE;
3431
      break;
3432
   default:
3433
      has_array = FALSE;
3434
      break;
3435
   }
3436
 
3437
   assert(!int_type.floating);
3438
 
3439
   lp_build_context_init(&bld_int_vec4, gallivm, lp_type_int_vec(32, 128));
3440
 
3441
   if (explicit_lod) {
3442
      /* FIXME: this needs to honor per-element lod */
3443
      lod = LLVMBuildExtractElement(gallivm->builder, explicit_lod,
3444
                                    lp_build_const_int32(gallivm, 0), "");
3445
      first_level = dynamic_state->first_level(dynamic_state, gallivm,
3446
                                               context_ptr, texture_unit);
3447
      level = LLVMBuildAdd(gallivm->builder, lod, first_level, "level");
3448
      lod = lp_build_broadcast_scalar(&bld_int_vec4, level);
3449
   } else {
3450
      lod = bld_int_vec4.zero;
3451
   }
3452
 
3453
   size = bld_int_vec4.undef;
3454
 
3455
   size = LLVMBuildInsertElement(gallivm->builder, size,
3456
                                 dynamic_state->width(dynamic_state, gallivm,
3457
                                                      context_ptr, texture_unit),
3458
                                 lp_build_const_int32(gallivm, 0), "");
3459
 
3460
   if (dims >= 2) {
3461
      size = LLVMBuildInsertElement(gallivm->builder, size,
3462
                                    dynamic_state->height(dynamic_state, gallivm,
3463
                                                          context_ptr, texture_unit),
3464
                                    lp_build_const_int32(gallivm, 1), "");
3465
   }
3466
 
3467
   if (dims >= 3) {
3468
      size = LLVMBuildInsertElement(gallivm->builder, size,
3469
                                    dynamic_state->depth(dynamic_state, gallivm,
3470
                                                         context_ptr, texture_unit),
3471
                                    lp_build_const_int32(gallivm, 2), "");
3472
   }
3473
 
3474
   size = lp_build_minify(&bld_int_vec4, size, lod, TRUE);
3475
 
3476
   if (has_array) {
3477
      LLVMValueRef layers = dynamic_state->depth(dynamic_state, gallivm,
3478
                                                 context_ptr, texture_unit);
3479
      if (target == PIPE_TEXTURE_CUBE_ARRAY) {
3480
         /*
3481
          * It looks like GL wants number of cubes, d3d10.1 has it undefined?
3482
          * Could avoid this by passing in number of cubes instead of total
3483
          * number of layers (might make things easier elsewhere too).
3484
          */
3485
         LLVMValueRef six = lp_build_const_int32(gallivm, 6);
3486
         layers = LLVMBuildSDiv(gallivm->builder, layers, six, "");
3487
      }
3488
      size = LLVMBuildInsertElement(gallivm->builder, size, layers,
3489
                                    lp_build_const_int32(gallivm, dims), "");
3490
   }
3491
 
3492
   /*
3493
    * d3d10 requires zero for x/y/z values (but not w, i.e. mip levels)
3494
    * if level is out of bounds (note this can't cover unbound texture
3495
    * here, which also requires returning zero).
3496
    */
3497
   if (explicit_lod && is_sviewinfo) {
3498
      LLVMValueRef last_level, out, out1;
3499
      struct lp_build_context leveli_bld;
3500
 
3501
      /* everything is scalar for now */
3502
      lp_build_context_init(&leveli_bld, gallivm, lp_type_int_vec(32, 32));
3503
      last_level = dynamic_state->last_level(dynamic_state, gallivm,
3504
                                             context_ptr, texture_unit);
3505
 
3506
      out = lp_build_cmp(&leveli_bld, PIPE_FUNC_LESS, level, first_level);
3507
      out1 = lp_build_cmp(&leveli_bld, PIPE_FUNC_GREATER, level, last_level);
3508
      out = lp_build_or(&leveli_bld, out, out1);
3509
      if (num_lods == 1) {
3510
         out = lp_build_broadcast_scalar(&bld_int_vec4, out);
3511
      }
3512
      else {
3513
         /* TODO */
3514
         assert(0);
3515
      }
3516
      size = lp_build_andnot(&bld_int_vec4, size, out);
3517
   }
3518
   for (i = 0; i < dims + (has_array ? 1 : 0); i++) {
3519
      sizes_out[i] = lp_build_extract_broadcast(gallivm, bld_int_vec4.type, int_type,
3520
                                                size,
3521
                                                lp_build_const_int32(gallivm, i));
3522
   }
3523
   if (is_sviewinfo) {
3524
      for (; i < 4; i++) {
3525
         sizes_out[i] = lp_build_const_vec(gallivm, int_type, 0.0);
3526
      }
3527
   }
3528
 
3529
   /*
3530
    * if there's no explicit_lod (buffers, rects) queries requiring nr of
3531
    * mips would be illegal.
3532
    */
3533
   if (is_sviewinfo && explicit_lod) {
3534
      struct lp_build_context bld_int_scalar;
3535
      LLVMValueRef num_levels;
3536
      lp_build_context_init(&bld_int_scalar, gallivm, lp_type_int(32));
3537
 
3538
      if (static_state->level_zero_only) {
3539
         num_levels = bld_int_scalar.one;
3540
      }
3541
      else {
3542
         LLVMValueRef last_level;
3543
 
3544
         last_level = dynamic_state->last_level(dynamic_state, gallivm,
3545
                                                context_ptr, texture_unit);
3546
         num_levels = lp_build_sub(&bld_int_scalar, last_level, first_level);
3547
         num_levels = lp_build_add(&bld_int_scalar, num_levels, bld_int_scalar.one);
3548
      }
3549
      sizes_out[3] = lp_build_broadcast(gallivm, lp_build_vec_type(gallivm, int_type),
3550
                                        num_levels);
3551
   }
3552
}