Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2009 VMware, Inc.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28. /**
  29.  * @file
  30.  * Helper functions for swizzling/shuffling.
  31.  *
  32.  * @author Jose Fonseca <jfonseca@vmware.com>
  33.  */
  34.  
  35. #include <inttypes.h>  /* for PRIx64 macro */
  36. #include "util/u_debug.h"
  37.  
  38. #include "lp_bld_type.h"
  39. #include "lp_bld_const.h"
  40. #include "lp_bld_init.h"
  41. #include "lp_bld_logic.h"
  42. #include "lp_bld_swizzle.h"
  43. #include "lp_bld_pack.h"
  44.  
  45.  
  46. LLVMValueRef
  47. lp_build_broadcast(struct gallivm_state *gallivm,
  48.                    LLVMTypeRef vec_type,
  49.                    LLVMValueRef scalar)
  50. {
  51.    LLVMValueRef res;
  52.  
  53.    if (LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind) {
  54.       /* scalar */
  55.       assert(vec_type == LLVMTypeOf(scalar));
  56.       res = scalar;
  57.    } else {
  58.       LLVMBuilderRef builder = gallivm->builder;
  59.       const unsigned length = LLVMGetVectorSize(vec_type);
  60.       LLVMValueRef undef = LLVMGetUndef(vec_type);
  61.       LLVMTypeRef i32_type = LLVMInt32TypeInContext(gallivm->context);
  62.  
  63.       assert(LLVMGetElementType(vec_type) == LLVMTypeOf(scalar));
  64.  
  65.       if (HAVE_LLVM >= 0x207) {
  66.          /* The shuffle vector is always made of int32 elements */
  67.          LLVMTypeRef i32_vec_type = LLVMVectorType(i32_type, length);
  68.          res = LLVMBuildInsertElement(builder, undef, scalar, LLVMConstNull(i32_type), "");
  69.          res = LLVMBuildShuffleVector(builder, res, undef, LLVMConstNull(i32_vec_type), "");
  70.       } else {
  71.          /* XXX: The above path provokes a bug in LLVM 2.6 */
  72.          unsigned i;
  73.          res = undef;
  74.          for(i = 0; i < length; ++i) {
  75.             LLVMValueRef index = lp_build_const_int32(gallivm, i);
  76.             res = LLVMBuildInsertElement(builder, res, scalar, index, "");
  77.          }
  78.       }
  79.    }
  80.  
  81.    return res;
  82. }
  83.  
  84.  
  85. /**
  86.  * Broadcast
  87.  */
  88. LLVMValueRef
  89. lp_build_broadcast_scalar(struct lp_build_context *bld,
  90.                           LLVMValueRef scalar)
  91. {
  92.    assert(lp_check_elem_type(bld->type, LLVMTypeOf(scalar)));
  93.  
  94.    return lp_build_broadcast(bld->gallivm, bld->vec_type, scalar);
  95. }
  96.  
  97.  
  98. /**
  99.  * Combined extract and broadcast (mere shuffle in most cases)
  100.  */
  101. LLVMValueRef
  102. lp_build_extract_broadcast(struct gallivm_state *gallivm,
  103.                            struct lp_type src_type,
  104.                            struct lp_type dst_type,
  105.                            LLVMValueRef vector,
  106.                            LLVMValueRef index)
  107. {
  108.    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
  109.    LLVMValueRef res;
  110.  
  111.    assert(src_type.floating == dst_type.floating);
  112.    assert(src_type.width    == dst_type.width);
  113.  
  114.    assert(lp_check_value(src_type, vector));
  115.    assert(LLVMTypeOf(index) == i32t);
  116.  
  117.    if (src_type.length == 1) {
  118.       if (dst_type.length == 1) {
  119.          /*
  120.           * Trivial scalar -> scalar.
  121.           */
  122.  
  123.          res = vector;
  124.       }
  125.       else {
  126.          /*
  127.           * Broadcast scalar -> vector.
  128.           */
  129.  
  130.          res = lp_build_broadcast(gallivm,
  131.                                   lp_build_vec_type(gallivm, dst_type),
  132.                                   vector);
  133.       }
  134.    }
  135.    else {
  136.       if (dst_type.length > 1) {
  137.          /*
  138.           * shuffle - result can be of different length.
  139.           */
  140.  
  141.          LLVMValueRef shuffle;
  142.          shuffle = lp_build_broadcast(gallivm,
  143.                                       LLVMVectorType(i32t, dst_type.length),
  144.                                       index);
  145.          res = LLVMBuildShuffleVector(gallivm->builder, vector,
  146.                                       LLVMGetUndef(lp_build_vec_type(gallivm, src_type)),
  147.                                       shuffle, "");
  148.       }
  149.       else {
  150.          /*
  151.           * Trivial extract scalar from vector.
  152.           */
  153.           res = LLVMBuildExtractElement(gallivm->builder, vector, index, "");
  154.       }
  155.    }
  156.  
  157.    return res;
  158. }
  159.  
  160.  
  161. /**
  162.  * Swizzle one channel into other channels.
  163.  */
  164. LLVMValueRef
  165. lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
  166.                             LLVMValueRef a,
  167.                             unsigned channel,
  168.                             unsigned num_channels)
  169. {
  170.    LLVMBuilderRef builder = bld->gallivm->builder;
  171.    const struct lp_type type = bld->type;
  172.    const unsigned n = type.length;
  173.    unsigned i, j;
  174.  
  175.    if(a == bld->undef || a == bld->zero || a == bld->one || num_channels == 1)
  176.       return a;
  177.  
  178.    assert(num_channels == 2 || num_channels == 4);
  179.  
  180.    /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
  181.     * using shuffles here actually causes worst results. More investigation is
  182.     * needed. */
  183.    if (type.width >= 16) {
  184.       /*
  185.        * Shuffle.
  186.        */
  187.       LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
  188.       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
  189.  
  190.       for(j = 0; j < n; j += num_channels)
  191.          for(i = 0; i < num_channels; ++i)
  192.             shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0);
  193.  
  194.       return LLVMBuildShuffleVector(builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
  195.    }
  196.    else if (num_channels == 2) {
  197.       /*
  198.        * Bit mask and shifts
  199.        *
  200.        *   XY XY .... XY  <= input
  201.        *   0Y 0Y .... 0Y
  202.        *   YY YY .... YY
  203.        *   YY YY .... YY  <= output
  204.        */
  205.       struct lp_type type2;
  206.       LLVMValueRef tmp = NULL;
  207.       int shift;
  208.  
  209.       a = LLVMBuildAnd(builder, a,
  210.                        lp_build_const_mask_aos(bld->gallivm,
  211.                                                type, 1 << channel, num_channels), "");
  212.  
  213.       type2 = type;
  214.       type2.floating = FALSE;
  215.       type2.width *= 2;
  216.       type2.length /= 2;
  217.  
  218.       a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type2), "");
  219.  
  220.       /*
  221.        * Vector element 0 is always channel X.
  222.        *
  223.        *                        76 54 32 10 (array numbering)
  224.        * Little endian reg in:  YX YX YX YX
  225.        * Little endian reg out: YY YY YY YY if shift right (shift == -1)
  226.        *                        XX XX XX XX if shift left (shift == 1)
  227.        *
  228.        *                        01 23 45 67 (array numbering)
  229.        * Big endian reg in:     XY XY XY XY
  230.        * Big endian reg out:    YY YY YY YY if shift left (shift == 1)
  231.        *                        XX XX XX XX if shift right (shift == -1)
  232.        *
  233.        */
  234. #ifdef PIPE_ARCH_LITTLE_ENDIAN
  235.       shift = channel == 0 ? 1 : -1;
  236. #else
  237.       shift = channel == 0 ? -1 : 1;
  238. #endif
  239.  
  240.       if (shift > 0) {
  241.          tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type2, shift * type.width), "");
  242.       } else if (shift < 0) {
  243.          tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type2, -shift * type.width), "");
  244.       }
  245.  
  246.       assert(tmp);
  247.       if (tmp) {
  248.          a = LLVMBuildOr(builder, a, tmp, "");
  249.       }
  250.  
  251.       return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
  252.    }
  253.    else {
  254.       /*
  255.        * Bit mask and recursive shifts
  256.        *
  257.        * Little-endian registers:
  258.        *
  259.        *   7654 3210
  260.        *   WZYX WZYX .... WZYX  <= input
  261.        *   00Y0 00Y0 .... 00Y0  <= mask
  262.        *   00YY 00YY .... 00YY  <= shift right 1 (shift amount -1)
  263.        *   YYYY YYYY .... YYYY  <= shift left 2 (shift amount 2)
  264.        *
  265.        * Big-endian registers:
  266.        *
  267.        *   0123 4567
  268.        *   XYZW XYZW .... XYZW  <= input
  269.        *   0Y00 0Y00 .... 0Y00  <= mask
  270.        *   YY00 YY00 .... YY00  <= shift left 1 (shift amount 1)
  271.        *   YYYY YYYY .... YYYY  <= shift right 2 (shift amount -2)
  272.        *
  273.        * shifts[] gives little-endian shift amounts; we need to negate for big-endian.
  274.        */
  275.       struct lp_type type4;
  276.       const int shifts[4][2] = {
  277.          { 1,  2},
  278.          {-1,  2},
  279.          { 1, -2},
  280.          {-1, -2}
  281.       };
  282.       unsigned i;
  283.  
  284.       a = LLVMBuildAnd(builder, a,
  285.                        lp_build_const_mask_aos(bld->gallivm,
  286.                                                type, 1 << channel, 4), "");
  287.  
  288.       /*
  289.        * Build a type where each element is an integer that cover the four
  290.        * channels.
  291.        */
  292.  
  293.       type4 = type;
  294.       type4.floating = FALSE;
  295.       type4.width *= 4;
  296.       type4.length /= 4;
  297.  
  298.       a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");
  299.  
  300.       for(i = 0; i < 2; ++i) {
  301.          LLVMValueRef tmp = NULL;
  302.          int shift = shifts[channel][i];
  303.  
  304.          /* See endianness diagram above */
  305. #ifdef PIPE_ARCH_BIG_ENDIAN
  306.          shift = -shift;
  307. #endif
  308.  
  309.          if(shift > 0)
  310.             tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
  311.          if(shift < 0)
  312.             tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
  313.  
  314.          assert(tmp);
  315.          if(tmp)
  316.             a = LLVMBuildOr(builder, a, tmp, "");
  317.       }
  318.  
  319.       return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
  320.    }
  321. }
  322.  
  323.  
  324. /**
  325.  * Swizzle a vector consisting of an array of XYZW structs.
  326.  *
  327.  * This fills a vector of dst_len length with the swizzled channels from src.
  328.  *
  329.  * e.g. with swizzles = { 2, 1, 0 } and swizzle_count = 6 results in
  330.  *      RGBA RGBA = BGR BGR BG
  331.  *
  332.  * @param swizzles        the swizzle array
  333.  * @param num_swizzles    the number of elements in swizzles
  334.  * @param dst_len         the length of the result
  335.  */
  336. LLVMValueRef
  337. lp_build_swizzle_aos_n(struct gallivm_state* gallivm,
  338.                        LLVMValueRef src,
  339.                        const unsigned char* swizzles,
  340.                        unsigned num_swizzles,
  341.                        unsigned dst_len)
  342. {
  343.    LLVMBuilderRef builder = gallivm->builder;
  344.    LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH];
  345.    unsigned i;
  346.  
  347.    assert(dst_len < LP_MAX_VECTOR_WIDTH);
  348.  
  349.    for (i = 0; i < dst_len; ++i) {
  350.       int swizzle = swizzles[i % num_swizzles];
  351.  
  352.       if (swizzle == LP_BLD_SWIZZLE_DONTCARE) {
  353.          shuffles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
  354.       } else {
  355.          shuffles[i] = lp_build_const_int32(gallivm, swizzle);
  356.       }
  357.    }
  358.  
  359.    return LLVMBuildShuffleVector(builder, src, LLVMGetUndef(LLVMTypeOf(src)), LLVMConstVector(shuffles, dst_len), "");
  360. }
  361.  
  362.  
  363. LLVMValueRef
  364. lp_build_swizzle_aos(struct lp_build_context *bld,
  365.                      LLVMValueRef a,
  366.                      const unsigned char swizzles[4])
  367. {
  368.    LLVMBuilderRef builder = bld->gallivm->builder;
  369.    const struct lp_type type = bld->type;
  370.    const unsigned n = type.length;
  371.    unsigned i, j;
  372.  
  373.    if (swizzles[0] == PIPE_SWIZZLE_RED &&
  374.        swizzles[1] == PIPE_SWIZZLE_GREEN &&
  375.        swizzles[2] == PIPE_SWIZZLE_BLUE &&
  376.        swizzles[3] == PIPE_SWIZZLE_ALPHA) {
  377.       return a;
  378.    }
  379.  
  380.    if (swizzles[0] == swizzles[1] &&
  381.        swizzles[1] == swizzles[2] &&
  382.        swizzles[2] == swizzles[3]) {
  383.       switch (swizzles[0]) {
  384.       case PIPE_SWIZZLE_RED:
  385.       case PIPE_SWIZZLE_GREEN:
  386.       case PIPE_SWIZZLE_BLUE:
  387.       case PIPE_SWIZZLE_ALPHA:
  388.          return lp_build_swizzle_scalar_aos(bld, a, swizzles[0], 4);
  389.       case PIPE_SWIZZLE_ZERO:
  390.          return bld->zero;
  391.       case PIPE_SWIZZLE_ONE:
  392.          return bld->one;
  393.       case LP_BLD_SWIZZLE_DONTCARE:
  394.          return bld->undef;
  395.       default:
  396.          assert(0);
  397.          return bld->undef;
  398.       }
  399.    }
  400.  
  401.    if (type.width >= 16) {
  402.       /*
  403.        * Shuffle.
  404.        */
  405.       LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(bld->gallivm, type));
  406.       LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
  407.       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
  408.       LLVMValueRef aux[LP_MAX_VECTOR_LENGTH];
  409.  
  410.       memset(aux, 0, sizeof aux);
  411.  
  412.       for(j = 0; j < n; j += 4) {
  413.          for(i = 0; i < 4; ++i) {
  414.             unsigned shuffle;
  415.             switch (swizzles[i]) {
  416.             default:
  417.                assert(0);
  418.                /* fall through */
  419.             case PIPE_SWIZZLE_RED:
  420.             case PIPE_SWIZZLE_GREEN:
  421.             case PIPE_SWIZZLE_BLUE:
  422.             case PIPE_SWIZZLE_ALPHA:
  423.                shuffle = j + swizzles[i];
  424.                shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
  425.                break;
  426.             case PIPE_SWIZZLE_ZERO:
  427.                shuffle = type.length + 0;
  428.                shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
  429.                if (!aux[0]) {
  430.                   aux[0] = lp_build_const_elem(bld->gallivm, type, 0.0);
  431.                }
  432.                break;
  433.             case PIPE_SWIZZLE_ONE:
  434.                shuffle = type.length + 1;
  435.                shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
  436.                if (!aux[1]) {
  437.                   aux[1] = lp_build_const_elem(bld->gallivm, type, 1.0);
  438.                }
  439.                break;
  440.             case LP_BLD_SWIZZLE_DONTCARE:
  441.                shuffles[j + i] = LLVMGetUndef(i32t);
  442.                break;
  443.             }
  444.          }
  445.       }
  446.  
  447.       for (i = 0; i < n; ++i) {
  448.          if (!aux[i]) {
  449.             aux[i] = undef;
  450.          }
  451.       }
  452.  
  453.       return LLVMBuildShuffleVector(builder, a,
  454.                                     LLVMConstVector(aux, n),
  455.                                     LLVMConstVector(shuffles, n), "");
  456.    } else {
  457.       /*
  458.        * Bit mask and shifts.
  459.        *
  460.        * For example, this will convert BGRA to RGBA by doing
  461.        *
  462.        * Little endian:
  463.        *   rgba = (bgra & 0x00ff0000) >> 16
  464.        *        | (bgra & 0xff00ff00)
  465.        *        | (bgra & 0x000000ff) << 16
  466.        *
  467.        * Big endian:A
  468.        *   rgba = (bgra & 0x0000ff00) << 16
  469.        *        | (bgra & 0x00ff00ff)
  470.        *        | (bgra & 0xff000000) >> 16
  471.        *
  472.        * This is necessary not only for faster cause, but because X86 backend
  473.        * will refuse shuffles of <4 x i8> vectors
  474.        */
  475.       LLVMValueRef res;
  476.       struct lp_type type4;
  477.       unsigned cond = 0;
  478.       unsigned chan;
  479.       int shift;
  480.  
  481.       /*
  482.        * Start with a mixture of 1 and 0.
  483.        */
  484.       for (chan = 0; chan < 4; ++chan) {
  485.          if (swizzles[chan] == PIPE_SWIZZLE_ONE) {
  486.             cond |= 1 << chan;
  487.          }
  488.       }
  489.       res = lp_build_select_aos(bld, cond, bld->one, bld->zero, 4);
  490.  
  491.       /*
  492.        * Build a type where each element is an integer that cover the four
  493.        * channels.
  494.        */
  495.       type4 = type;
  496.       type4.floating = FALSE;
  497.       type4.width *= 4;
  498.       type4.length /= 4;
  499.  
  500.       a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");
  501.       res = LLVMBuildBitCast(builder, res, lp_build_vec_type(bld->gallivm, type4), "");
  502.  
  503.       /*
  504.        * Mask and shift the channels, trying to group as many channels in the
  505.        * same shift as possible.  The shift amount is positive for shifts left
  506.        * and negative for shifts right.
  507.        */
  508.       for (shift = -3; shift <= 3; ++shift) {
  509.          uint64_t mask = 0;
  510.  
  511.          assert(type4.width <= sizeof(mask)*8);
  512.  
  513.          /*
  514.           * Vector element numbers follow the XYZW order, so 0 is always X, etc.
  515.           * After widening 4 times we have:
  516.           *
  517.           *                                3210
  518.           * Little-endian register layout: WZYX
  519.           *
  520.           *                                0123
  521.           * Big-endian register layout:    XYZW
  522.           *
  523.           * For little-endian, higher-numbered channels are obtained by a shift right
  524.           * (negative shift amount) and lower-numbered channels by a shift left
  525.           * (positive shift amount).  The opposite is true for big-endian.
  526.           */
  527.          for (chan = 0; chan < 4; ++chan) {
  528.             if (swizzles[chan] < 4) {
  529.                /* We need to move channel swizzles[chan] into channel chan */
  530. #ifdef PIPE_ARCH_LITTLE_ENDIAN
  531.                if (swizzles[chan] - chan == -shift) {
  532.                   mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width);
  533.                }
  534. #else
  535.                if (swizzles[chan] - chan == shift) {
  536.                   mask |= ((1ULL << type.width) - 1) << (type4.width - type.width) >> (swizzles[chan] * type.width);
  537.                }
  538. #endif
  539.             }
  540.          }
  541.  
  542.          if (mask) {
  543.             LLVMValueRef masked;
  544.             LLVMValueRef shifted;
  545.             if (0)
  546.                debug_printf("shift = %i, mask = %" PRIx64 "\n", shift, mask);
  547.  
  548.             masked = LLVMBuildAnd(builder, a,
  549.                                   lp_build_const_int_vec(bld->gallivm, type4, mask), "");
  550.             if (shift > 0) {
  551.                shifted = LLVMBuildShl(builder, masked,
  552.                                       lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
  553.             } else if (shift < 0) {
  554.                shifted = LLVMBuildLShr(builder, masked,
  555.                                        lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
  556.             } else {
  557.                shifted = masked;
  558.             }
  559.  
  560.             res = LLVMBuildOr(builder, res, shifted, "");
  561.          }
  562.       }
  563.  
  564.       return LLVMBuildBitCast(builder, res,
  565.                               lp_build_vec_type(bld->gallivm, type), "");
  566.    }
  567. }
  568.  
  569.  
  570. /**
  571.  * Extended swizzle of a single channel of a SoA vector.
  572.  *
  573.  * @param bld         building context
  574.  * @param unswizzled  array with the 4 unswizzled values
  575.  * @param swizzle     one of the PIPE_SWIZZLE_*
  576.  *
  577.  * @return  the swizzled value.
  578.  */
  579. LLVMValueRef
  580. lp_build_swizzle_soa_channel(struct lp_build_context *bld,
  581.                              const LLVMValueRef *unswizzled,
  582.                              unsigned swizzle)
  583. {
  584.    switch (swizzle) {
  585.    case PIPE_SWIZZLE_RED:
  586.    case PIPE_SWIZZLE_GREEN:
  587.    case PIPE_SWIZZLE_BLUE:
  588.    case PIPE_SWIZZLE_ALPHA:
  589.       return unswizzled[swizzle];
  590.    case PIPE_SWIZZLE_ZERO:
  591.       return bld->zero;
  592.    case PIPE_SWIZZLE_ONE:
  593.       return bld->one;
  594.    default:
  595.       assert(0);
  596.       return bld->undef;
  597.    }
  598. }
  599.  
  600.  
  601. /**
  602.  * Extended swizzle of a SoA vector.
  603.  *
  604.  * @param bld         building context
  605.  * @param unswizzled  array with the 4 unswizzled values
  606.  * @param swizzles    array of PIPE_SWIZZLE_*
  607.  * @param swizzled    output swizzled values
  608.  */
  609. void
  610. lp_build_swizzle_soa(struct lp_build_context *bld,
  611.                      const LLVMValueRef *unswizzled,
  612.                      const unsigned char swizzles[4],
  613.                      LLVMValueRef *swizzled)
  614. {
  615.    unsigned chan;
  616.  
  617.    for (chan = 0; chan < 4; ++chan) {
  618.       swizzled[chan] = lp_build_swizzle_soa_channel(bld, unswizzled,
  619.                                                     swizzles[chan]);
  620.    }
  621. }
  622.  
  623.  
  624. /**
  625.  * Do an extended swizzle of a SoA vector inplace.
  626.  *
  627.  * @param bld         building context
  628.  * @param values      intput/output array with the 4 values
  629.  * @param swizzles    array of PIPE_SWIZZLE_*
  630.  */
  631. void
  632. lp_build_swizzle_soa_inplace(struct lp_build_context *bld,
  633.                              LLVMValueRef *values,
  634.                              const unsigned char swizzles[4])
  635. {
  636.    LLVMValueRef unswizzled[4];
  637.    unsigned chan;
  638.  
  639.    for (chan = 0; chan < 4; ++chan) {
  640.       unswizzled[chan] = values[chan];
  641.    }
  642.  
  643.    lp_build_swizzle_soa(bld, unswizzled, swizzles, values);
  644. }
  645.  
  646.  
  647. /**
  648.  * Transpose from AOS <-> SOA
  649.  *
  650.  * @param single_type_lp   type of pixels
  651.  * @param src              the 4 * n pixel input
  652.  * @param dst              the 4 * n pixel output
  653.  */
  654. void
  655. lp_build_transpose_aos(struct gallivm_state *gallivm,
  656.                        struct lp_type single_type_lp,
  657.                        const LLVMValueRef src[4],
  658.                        LLVMValueRef dst[4])
  659. {
  660.    struct lp_type double_type_lp = single_type_lp;
  661.    LLVMTypeRef single_type;
  662.    LLVMTypeRef double_type;
  663.    LLVMValueRef t0, t1, t2, t3;
  664.  
  665.    double_type_lp.length >>= 1;
  666.    double_type_lp.width  <<= 1;
  667.  
  668.    double_type = lp_build_vec_type(gallivm, double_type_lp);
  669.    single_type = lp_build_vec_type(gallivm, single_type_lp);
  670.  
  671.    /* Interleave x, y, z, w -> xy and zw */
  672.    t0 = lp_build_interleave2_half(gallivm, single_type_lp, src[0], src[1], 0);
  673.    t1 = lp_build_interleave2_half(gallivm, single_type_lp, src[2], src[3], 0);
  674.    t2 = lp_build_interleave2_half(gallivm, single_type_lp, src[0], src[1], 1);
  675.    t3 = lp_build_interleave2_half(gallivm, single_type_lp, src[2], src[3], 1);
  676.  
  677.    /* Cast to double width type for second interleave */
  678.    t0 = LLVMBuildBitCast(gallivm->builder, t0, double_type, "t0");
  679.    t1 = LLVMBuildBitCast(gallivm->builder, t1, double_type, "t1");
  680.    t2 = LLVMBuildBitCast(gallivm->builder, t2, double_type, "t2");
  681.    t3 = LLVMBuildBitCast(gallivm->builder, t3, double_type, "t3");
  682.  
  683.    /* Interleave xy, zw -> xyzw */
  684.    dst[0] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 0);
  685.    dst[1] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 1);
  686.    dst[2] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 0);
  687.    dst[3] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 1);
  688.  
  689.    /* Cast back to original single width type */
  690.    dst[0] = LLVMBuildBitCast(gallivm->builder, dst[0], single_type, "dst0");
  691.    dst[1] = LLVMBuildBitCast(gallivm->builder, dst[1], single_type, "dst1");
  692.    dst[2] = LLVMBuildBitCast(gallivm->builder, dst[2], single_type, "dst2");
  693.    dst[3] = LLVMBuildBitCast(gallivm->builder, dst[3], single_type, "dst3");
  694. }
  695.  
  696.  
  697. /**
  698.  * Transpose from AOS <-> SOA for num_srcs
  699.  */
  700. void
  701. lp_build_transpose_aos_n(struct gallivm_state *gallivm,
  702.                          struct lp_type type,
  703.                          const LLVMValueRef* src,
  704.                          unsigned num_srcs,
  705.                          LLVMValueRef* dst)
  706. {
  707.    switch (num_srcs) {
  708.       case 1:
  709.          dst[0] = src[0];
  710.          break;
  711.  
  712.       case 2:
  713.       {
  714.          /* Note: we must use a temporary incase src == dst */
  715.          LLVMValueRef lo, hi;
  716.  
  717.          lo = lp_build_interleave2_half(gallivm, type, src[0], src[1], 0);
  718.          hi = lp_build_interleave2_half(gallivm, type, src[0], src[1], 1);
  719.  
  720.          dst[0] = lo;
  721.          dst[1] = hi;
  722.          break;
  723.       }
  724.  
  725.       case 4:
  726.          lp_build_transpose_aos(gallivm, type, src, dst);
  727.          break;
  728.  
  729.       default:
  730.          assert(0);
  731.    };
  732. }
  733.  
  734.  
  735. /**
  736.  * Pack n-th element of aos values,
  737.  * pad out to destination size.
  738.  * i.e. x1 y1 _ _ x2 y2 _ _ will become x1 x2 _ _
  739.  */
  740. LLVMValueRef
  741. lp_build_pack_aos_scalars(struct gallivm_state *gallivm,
  742.                           struct lp_type src_type,
  743.                           struct lp_type dst_type,
  744.                           const LLVMValueRef src,
  745.                           unsigned channel)
  746. {
  747.    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
  748.    LLVMValueRef undef = LLVMGetUndef(i32t);
  749.    LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
  750.    unsigned num_src = src_type.length / 4;
  751.    unsigned num_dst = dst_type.length;
  752.    unsigned i;
  753.  
  754.    assert(num_src <= num_dst);
  755.  
  756.    for (i = 0; i < num_src; i++) {
  757.       shuffles[i] = LLVMConstInt(i32t, i * 4 + channel, 0);
  758.    }
  759.    for (i = num_src; i < num_dst; i++) {
  760.       shuffles[i] = undef;
  761.    }
  762.  
  763.    if (num_dst == 1) {
  764.       return LLVMBuildExtractElement(gallivm->builder, src, shuffles[0], "");
  765.    }
  766.    else {
  767.       return LLVMBuildShuffleVector(gallivm->builder, src, src,
  768.                                     LLVMConstVector(shuffles, num_dst), "");
  769.    }
  770. }
  771.  
  772.  
  773. /**
  774.  * Unpack and broadcast packed aos values consisting of only the
  775.  * first value, i.e. x1 x2 _ _ will become x1 x1 x1 x1 x2 x2 x2 x2
  776.  */
  777. LLVMValueRef
  778. lp_build_unpack_broadcast_aos_scalars(struct gallivm_state *gallivm,
  779.                                       struct lp_type src_type,
  780.                                       struct lp_type dst_type,
  781.                                       const LLVMValueRef src)
  782. {
  783.    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
  784.    LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
  785.    unsigned num_dst = dst_type.length;
  786.    unsigned num_src = dst_type.length / 4;
  787.    unsigned i;
  788.  
  789.    assert(num_dst / 4 <= src_type.length);
  790.  
  791.    for (i = 0; i < num_src; i++) {
  792.       shuffles[i*4] = LLVMConstInt(i32t, i, 0);
  793.       shuffles[i*4+1] = LLVMConstInt(i32t, i, 0);
  794.       shuffles[i*4+2] = LLVMConstInt(i32t, i, 0);
  795.       shuffles[i*4+3] = LLVMConstInt(i32t, i, 0);
  796.    }
  797.  
  798.    if (num_src == 1) {
  799.       return lp_build_extract_broadcast(gallivm, src_type, dst_type,
  800.                                         src, shuffles[0]);
  801.    }
  802.    else {
  803.       return LLVMBuildShuffleVector(gallivm->builder, src, src,
  804.                                     LLVMConstVector(shuffles, num_dst), "");
  805.    }
  806. }
  807.  
  808.