Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2009 VMware, Inc.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28. /**
  29.  * @file
  30.  * Helper functions for swizzling/shuffling.
  31.  *
  32.  * @author Jose Fonseca <jfonseca@vmware.com>
  33.  */
  34.  
  35. #include <inttypes.h>  /* for PRIx64 macro */
  36. #include "util/u_debug.h"
  37.  
  38. #include "lp_bld_type.h"
  39. #include "lp_bld_const.h"
  40. #include "lp_bld_init.h"
  41. #include "lp_bld_logic.h"
  42. #include "lp_bld_swizzle.h"
  43. #include "lp_bld_pack.h"
  44.  
  45.  
  46. LLVMValueRef
  47. lp_build_broadcast(struct gallivm_state *gallivm,
  48.                    LLVMTypeRef vec_type,
  49.                    LLVMValueRef scalar)
  50. {
  51.    LLVMValueRef res;
  52.  
  53.    if (LLVMGetTypeKind(vec_type) != LLVMVectorTypeKind) {
  54.       /* scalar */
  55.       assert(vec_type == LLVMTypeOf(scalar));
  56.       res = scalar;
  57.    } else {
  58.       LLVMBuilderRef builder = gallivm->builder;
  59.       const unsigned length = LLVMGetVectorSize(vec_type);
  60.       LLVMValueRef undef = LLVMGetUndef(vec_type);
  61.       /* The shuffle vector is always made of int32 elements */
  62.       LLVMTypeRef i32_type = LLVMInt32TypeInContext(gallivm->context);
  63.       LLVMTypeRef i32_vec_type = LLVMVectorType(i32_type, length);
  64.  
  65.       assert(LLVMGetElementType(vec_type) == LLVMTypeOf(scalar));
  66.  
  67.       res = LLVMBuildInsertElement(builder, undef, scalar, LLVMConstNull(i32_type), "");
  68.       res = LLVMBuildShuffleVector(builder, res, undef, LLVMConstNull(i32_vec_type), "");
  69.    }
  70.  
  71.    return res;
  72. }
  73.  
  74.  
  75. /**
  76.  * Broadcast
  77.  */
  78. LLVMValueRef
  79. lp_build_broadcast_scalar(struct lp_build_context *bld,
  80.                           LLVMValueRef scalar)
  81. {
  82.    assert(lp_check_elem_type(bld->type, LLVMTypeOf(scalar)));
  83.  
  84.    return lp_build_broadcast(bld->gallivm, bld->vec_type, scalar);
  85. }
  86.  
  87.  
  88. /**
  89.  * Combined extract and broadcast (mere shuffle in most cases)
  90.  */
  91. LLVMValueRef
  92. lp_build_extract_broadcast(struct gallivm_state *gallivm,
  93.                            struct lp_type src_type,
  94.                            struct lp_type dst_type,
  95.                            LLVMValueRef vector,
  96.                            LLVMValueRef index)
  97. {
  98.    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
  99.    LLVMValueRef res;
  100.  
  101.    assert(src_type.floating == dst_type.floating);
  102.    assert(src_type.width    == dst_type.width);
  103.  
  104.    assert(lp_check_value(src_type, vector));
  105.    assert(LLVMTypeOf(index) == i32t);
  106.  
  107.    if (src_type.length == 1) {
  108.       if (dst_type.length == 1) {
  109.          /*
  110.           * Trivial scalar -> scalar.
  111.           */
  112.  
  113.          res = vector;
  114.       }
  115.       else {
  116.          /*
  117.           * Broadcast scalar -> vector.
  118.           */
  119.  
  120.          res = lp_build_broadcast(gallivm,
  121.                                   lp_build_vec_type(gallivm, dst_type),
  122.                                   vector);
  123.       }
  124.    }
  125.    else {
  126.       if (dst_type.length > 1) {
  127.          /*
  128.           * shuffle - result can be of different length.
  129.           */
  130.  
  131.          LLVMValueRef shuffle;
  132.          shuffle = lp_build_broadcast(gallivm,
  133.                                       LLVMVectorType(i32t, dst_type.length),
  134.                                       index);
  135.          res = LLVMBuildShuffleVector(gallivm->builder, vector,
  136.                                       LLVMGetUndef(lp_build_vec_type(gallivm, src_type)),
  137.                                       shuffle, "");
  138.       }
  139.       else {
  140.          /*
  141.           * Trivial extract scalar from vector.
  142.           */
  143.           res = LLVMBuildExtractElement(gallivm->builder, vector, index, "");
  144.       }
  145.    }
  146.  
  147.    return res;
  148. }
  149.  
  150.  
  151. /**
  152.  * Swizzle one channel into other channels.
  153.  */
  154. LLVMValueRef
  155. lp_build_swizzle_scalar_aos(struct lp_build_context *bld,
  156.                             LLVMValueRef a,
  157.                             unsigned channel,
  158.                             unsigned num_channels)
  159. {
  160.    LLVMBuilderRef builder = bld->gallivm->builder;
  161.    const struct lp_type type = bld->type;
  162.    const unsigned n = type.length;
  163.    unsigned i, j;
  164.  
  165.    if(a == bld->undef || a == bld->zero || a == bld->one || num_channels == 1)
  166.       return a;
  167.  
  168.    assert(num_channels == 2 || num_channels == 4);
  169.  
  170.    /* XXX: SSE3 has PSHUFB which should be better than bitmasks, but forcing
  171.     * using shuffles here actually causes worst results. More investigation is
  172.     * needed. */
  173.    if (LLVMIsConstant(a) ||
  174.        type.width >= 16) {
  175.       /*
  176.        * Shuffle.
  177.        */
  178.       LLVMTypeRef elem_type = LLVMInt32TypeInContext(bld->gallivm->context);
  179.       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
  180.  
  181.       for(j = 0; j < n; j += num_channels)
  182.          for(i = 0; i < num_channels; ++i)
  183.             shuffles[j + i] = LLVMConstInt(elem_type, j + channel, 0);
  184.  
  185.       return LLVMBuildShuffleVector(builder, a, bld->undef, LLVMConstVector(shuffles, n), "");
  186.    }
  187.    else if (num_channels == 2) {
  188.       /*
  189.        * Bit mask and shifts
  190.        *
  191.        *   XY XY .... XY  <= input
  192.        *   0Y 0Y .... 0Y
  193.        *   YY YY .... YY
  194.        *   YY YY .... YY  <= output
  195.        */
  196.       struct lp_type type2;
  197.       LLVMValueRef tmp = NULL;
  198.       int shift;
  199.  
  200.       a = LLVMBuildAnd(builder, a,
  201.                        lp_build_const_mask_aos(bld->gallivm,
  202.                                                type, 1 << channel, num_channels), "");
  203.  
  204.       type2 = type;
  205.       type2.floating = FALSE;
  206.       type2.width *= 2;
  207.       type2.length /= 2;
  208.  
  209.       a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type2), "");
  210.  
  211.       /*
  212.        * Vector element 0 is always channel X.
  213.        *
  214.        *                        76 54 32 10 (array numbering)
  215.        * Little endian reg in:  YX YX YX YX
  216.        * Little endian reg out: YY YY YY YY if shift right (shift == -1)
  217.        *                        XX XX XX XX if shift left (shift == 1)
  218.        *
  219.        *                        01 23 45 67 (array numbering)
  220.        * Big endian reg in:     XY XY XY XY
  221.        * Big endian reg out:    YY YY YY YY if shift left (shift == 1)
  222.        *                        XX XX XX XX if shift right (shift == -1)
  223.        *
  224.        */
  225. #ifdef PIPE_ARCH_LITTLE_ENDIAN
  226.       shift = channel == 0 ? 1 : -1;
  227. #else
  228.       shift = channel == 0 ? -1 : 1;
  229. #endif
  230.  
  231.       if (shift > 0) {
  232.          tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type2, shift * type.width), "");
  233.       } else if (shift < 0) {
  234.          tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type2, -shift * type.width), "");
  235.       }
  236.  
  237.       assert(tmp);
  238.       if (tmp) {
  239.          a = LLVMBuildOr(builder, a, tmp, "");
  240.       }
  241.  
  242.       return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
  243.    }
  244.    else {
  245.       /*
  246.        * Bit mask and recursive shifts
  247.        *
  248.        * Little-endian registers:
  249.        *
  250.        *   7654 3210
  251.        *   WZYX WZYX .... WZYX  <= input
  252.        *   00Y0 00Y0 .... 00Y0  <= mask
  253.        *   00YY 00YY .... 00YY  <= shift right 1 (shift amount -1)
  254.        *   YYYY YYYY .... YYYY  <= shift left 2 (shift amount 2)
  255.        *
  256.        * Big-endian registers:
  257.        *
  258.        *   0123 4567
  259.        *   XYZW XYZW .... XYZW  <= input
  260.        *   0Y00 0Y00 .... 0Y00  <= mask
  261.        *   YY00 YY00 .... YY00  <= shift left 1 (shift amount 1)
  262.        *   YYYY YYYY .... YYYY  <= shift right 2 (shift amount -2)
  263.        *
  264.        * shifts[] gives little-endian shift amounts; we need to negate for big-endian.
  265.        */
  266.       struct lp_type type4;
  267.       const int shifts[4][2] = {
  268.          { 1,  2},
  269.          {-1,  2},
  270.          { 1, -2},
  271.          {-1, -2}
  272.       };
  273.       unsigned i;
  274.  
  275.       a = LLVMBuildAnd(builder, a,
  276.                        lp_build_const_mask_aos(bld->gallivm,
  277.                                                type, 1 << channel, 4), "");
  278.  
  279.       /*
  280.        * Build a type where each element is an integer that cover the four
  281.        * channels.
  282.        */
  283.  
  284.       type4 = type;
  285.       type4.floating = FALSE;
  286.       type4.width *= 4;
  287.       type4.length /= 4;
  288.  
  289.       a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");
  290.  
  291.       for(i = 0; i < 2; ++i) {
  292.          LLVMValueRef tmp = NULL;
  293.          int shift = shifts[channel][i];
  294.  
  295.          /* See endianness diagram above */
  296. #ifdef PIPE_ARCH_BIG_ENDIAN
  297.          shift = -shift;
  298. #endif
  299.  
  300.          if(shift > 0)
  301.             tmp = LLVMBuildShl(builder, a, lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
  302.          if(shift < 0)
  303.             tmp = LLVMBuildLShr(builder, a, lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
  304.  
  305.          assert(tmp);
  306.          if(tmp)
  307.             a = LLVMBuildOr(builder, a, tmp, "");
  308.       }
  309.  
  310.       return LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type), "");
  311.    }
  312. }
  313.  
  314.  
  315. /**
  316.  * Swizzle a vector consisting of an array of XYZW structs.
  317.  *
  318.  * This fills a vector of dst_len length with the swizzled channels from src.
  319.  *
  320.  * e.g. with swizzles = { 2, 1, 0 } and swizzle_count = 6 results in
  321.  *      RGBA RGBA = BGR BGR BG
  322.  *
  323.  * @param swizzles        the swizzle array
  324.  * @param num_swizzles    the number of elements in swizzles
  325.  * @param dst_len         the length of the result
  326.  */
  327. LLVMValueRef
  328. lp_build_swizzle_aos_n(struct gallivm_state* gallivm,
  329.                        LLVMValueRef src,
  330.                        const unsigned char* swizzles,
  331.                        unsigned num_swizzles,
  332.                        unsigned dst_len)
  333. {
  334.    LLVMBuilderRef builder = gallivm->builder;
  335.    LLVMValueRef shuffles[LP_MAX_VECTOR_WIDTH];
  336.    unsigned i;
  337.  
  338.    assert(dst_len < LP_MAX_VECTOR_WIDTH);
  339.  
  340.    for (i = 0; i < dst_len; ++i) {
  341.       int swizzle = swizzles[i % num_swizzles];
  342.  
  343.       if (swizzle == LP_BLD_SWIZZLE_DONTCARE) {
  344.          shuffles[i] = LLVMGetUndef(LLVMInt32TypeInContext(gallivm->context));
  345.       } else {
  346.          shuffles[i] = lp_build_const_int32(gallivm, swizzle);
  347.       }
  348.    }
  349.  
  350.    return LLVMBuildShuffleVector(builder, src, LLVMGetUndef(LLVMTypeOf(src)), LLVMConstVector(shuffles, dst_len), "");
  351. }
  352.  
  353.  
  354. LLVMValueRef
  355. lp_build_swizzle_aos(struct lp_build_context *bld,
  356.                      LLVMValueRef a,
  357.                      const unsigned char swizzles[4])
  358. {
  359.    LLVMBuilderRef builder = bld->gallivm->builder;
  360.    const struct lp_type type = bld->type;
  361.    const unsigned n = type.length;
  362.    unsigned i, j;
  363.  
  364.    if (swizzles[0] == PIPE_SWIZZLE_RED &&
  365.        swizzles[1] == PIPE_SWIZZLE_GREEN &&
  366.        swizzles[2] == PIPE_SWIZZLE_BLUE &&
  367.        swizzles[3] == PIPE_SWIZZLE_ALPHA) {
  368.       return a;
  369.    }
  370.  
  371.    if (swizzles[0] == swizzles[1] &&
  372.        swizzles[1] == swizzles[2] &&
  373.        swizzles[2] == swizzles[3]) {
  374.       switch (swizzles[0]) {
  375.       case PIPE_SWIZZLE_RED:
  376.       case PIPE_SWIZZLE_GREEN:
  377.       case PIPE_SWIZZLE_BLUE:
  378.       case PIPE_SWIZZLE_ALPHA:
  379.          return lp_build_swizzle_scalar_aos(bld, a, swizzles[0], 4);
  380.       case PIPE_SWIZZLE_ZERO:
  381.          return bld->zero;
  382.       case PIPE_SWIZZLE_ONE:
  383.          return bld->one;
  384.       case LP_BLD_SWIZZLE_DONTCARE:
  385.          return bld->undef;
  386.       default:
  387.          assert(0);
  388.          return bld->undef;
  389.       }
  390.    }
  391.  
  392.    if (LLVMIsConstant(a) ||
  393.        type.width >= 16) {
  394.       /*
  395.        * Shuffle.
  396.        */
  397.       LLVMValueRef undef = LLVMGetUndef(lp_build_elem_type(bld->gallivm, type));
  398.       LLVMTypeRef i32t = LLVMInt32TypeInContext(bld->gallivm->context);
  399.       LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
  400.       LLVMValueRef aux[LP_MAX_VECTOR_LENGTH];
  401.  
  402.       memset(aux, 0, sizeof aux);
  403.  
  404.       for(j = 0; j < n; j += 4) {
  405.          for(i = 0; i < 4; ++i) {
  406.             unsigned shuffle;
  407.             switch (swizzles[i]) {
  408.             default:
  409.                assert(0);
  410.                /* fall through */
  411.             case PIPE_SWIZZLE_RED:
  412.             case PIPE_SWIZZLE_GREEN:
  413.             case PIPE_SWIZZLE_BLUE:
  414.             case PIPE_SWIZZLE_ALPHA:
  415.                shuffle = j + swizzles[i];
  416.                shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
  417.                break;
  418.             case PIPE_SWIZZLE_ZERO:
  419.                shuffle = type.length + 0;
  420.                shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
  421.                if (!aux[0]) {
  422.                   aux[0] = lp_build_const_elem(bld->gallivm, type, 0.0);
  423.                }
  424.                break;
  425.             case PIPE_SWIZZLE_ONE:
  426.                shuffle = type.length + 1;
  427.                shuffles[j + i] = LLVMConstInt(i32t, shuffle, 0);
  428.                if (!aux[1]) {
  429.                   aux[1] = lp_build_const_elem(bld->gallivm, type, 1.0);
  430.                }
  431.                break;
  432.             case LP_BLD_SWIZZLE_DONTCARE:
  433.                shuffles[j + i] = LLVMGetUndef(i32t);
  434.                break;
  435.             }
  436.          }
  437.       }
  438.  
  439.       for (i = 0; i < n; ++i) {
  440.          if (!aux[i]) {
  441.             aux[i] = undef;
  442.          }
  443.       }
  444.  
  445.       return LLVMBuildShuffleVector(builder, a,
  446.                                     LLVMConstVector(aux, n),
  447.                                     LLVMConstVector(shuffles, n), "");
  448.    } else {
  449.       /*
  450.        * Bit mask and shifts.
  451.        *
  452.        * For example, this will convert BGRA to RGBA by doing
  453.        *
  454.        * Little endian:
  455.        *   rgba = (bgra & 0x00ff0000) >> 16
  456.        *        | (bgra & 0xff00ff00)
  457.        *        | (bgra & 0x000000ff) << 16
  458.        *
  459.        * Big endian:A
  460.        *   rgba = (bgra & 0x0000ff00) << 16
  461.        *        | (bgra & 0x00ff00ff)
  462.        *        | (bgra & 0xff000000) >> 16
  463.        *
  464.        * This is necessary not only for faster cause, but because X86 backend
  465.        * will refuse shuffles of <4 x i8> vectors
  466.        */
  467.       LLVMValueRef res;
  468.       struct lp_type type4;
  469.       unsigned cond = 0;
  470.       unsigned chan;
  471.       int shift;
  472.  
  473.       /*
  474.        * Start with a mixture of 1 and 0.
  475.        */
  476.       for (chan = 0; chan < 4; ++chan) {
  477.          if (swizzles[chan] == PIPE_SWIZZLE_ONE) {
  478.             cond |= 1 << chan;
  479.          }
  480.       }
  481.       res = lp_build_select_aos(bld, cond, bld->one, bld->zero, 4);
  482.  
  483.       /*
  484.        * Build a type where each element is an integer that cover the four
  485.        * channels.
  486.        */
  487.       type4 = type;
  488.       type4.floating = FALSE;
  489.       type4.width *= 4;
  490.       type4.length /= 4;
  491.  
  492.       a = LLVMBuildBitCast(builder, a, lp_build_vec_type(bld->gallivm, type4), "");
  493.       res = LLVMBuildBitCast(builder, res, lp_build_vec_type(bld->gallivm, type4), "");
  494.  
  495.       /*
  496.        * Mask and shift the channels, trying to group as many channels in the
  497.        * same shift as possible.  The shift amount is positive for shifts left
  498.        * and negative for shifts right.
  499.        */
  500.       for (shift = -3; shift <= 3; ++shift) {
  501.          uint64_t mask = 0;
  502.  
  503.          assert(type4.width <= sizeof(mask)*8);
  504.  
  505.          /*
  506.           * Vector element numbers follow the XYZW order, so 0 is always X, etc.
  507.           * After widening 4 times we have:
  508.           *
  509.           *                                3210
  510.           * Little-endian register layout: WZYX
  511.           *
  512.           *                                0123
  513.           * Big-endian register layout:    XYZW
  514.           *
  515.           * For little-endian, higher-numbered channels are obtained by a shift right
  516.           * (negative shift amount) and lower-numbered channels by a shift left
  517.           * (positive shift amount).  The opposite is true for big-endian.
  518.           */
  519.          for (chan = 0; chan < 4; ++chan) {
  520.             if (swizzles[chan] < 4) {
  521.                /* We need to move channel swizzles[chan] into channel chan */
  522. #ifdef PIPE_ARCH_LITTLE_ENDIAN
  523.                if (swizzles[chan] - chan == -shift) {
  524.                   mask |= ((1ULL << type.width) - 1) << (swizzles[chan] * type.width);
  525.                }
  526. #else
  527.                if (swizzles[chan] - chan == shift) {
  528.                   mask |= ((1ULL << type.width) - 1) << (type4.width - type.width) >> (swizzles[chan] * type.width);
  529.                }
  530. #endif
  531.             }
  532.          }
  533.  
  534.          if (mask) {
  535.             LLVMValueRef masked;
  536.             LLVMValueRef shifted;
  537.             if (0)
  538.                debug_printf("shift = %i, mask = %" PRIx64 "\n", shift, mask);
  539.  
  540.             masked = LLVMBuildAnd(builder, a,
  541.                                   lp_build_const_int_vec(bld->gallivm, type4, mask), "");
  542.             if (shift > 0) {
  543.                shifted = LLVMBuildShl(builder, masked,
  544.                                       lp_build_const_int_vec(bld->gallivm, type4, shift*type.width), "");
  545.             } else if (shift < 0) {
  546.                shifted = LLVMBuildLShr(builder, masked,
  547.                                        lp_build_const_int_vec(bld->gallivm, type4, -shift*type.width), "");
  548.             } else {
  549.                shifted = masked;
  550.             }
  551.  
  552.             res = LLVMBuildOr(builder, res, shifted, "");
  553.          }
  554.       }
  555.  
  556.       return LLVMBuildBitCast(builder, res,
  557.                               lp_build_vec_type(bld->gallivm, type), "");
  558.    }
  559. }
  560.  
  561.  
  562. /**
  563.  * Extended swizzle of a single channel of a SoA vector.
  564.  *
  565.  * @param bld         building context
  566.  * @param unswizzled  array with the 4 unswizzled values
  567.  * @param swizzle     one of the PIPE_SWIZZLE_*
  568.  *
  569.  * @return  the swizzled value.
  570.  */
  571. LLVMValueRef
  572. lp_build_swizzle_soa_channel(struct lp_build_context *bld,
  573.                              const LLVMValueRef *unswizzled,
  574.                              unsigned swizzle)
  575. {
  576.    switch (swizzle) {
  577.    case PIPE_SWIZZLE_RED:
  578.    case PIPE_SWIZZLE_GREEN:
  579.    case PIPE_SWIZZLE_BLUE:
  580.    case PIPE_SWIZZLE_ALPHA:
  581.       return unswizzled[swizzle];
  582.    case PIPE_SWIZZLE_ZERO:
  583.       return bld->zero;
  584.    case PIPE_SWIZZLE_ONE:
  585.       return bld->one;
  586.    default:
  587.       assert(0);
  588.       return bld->undef;
  589.    }
  590. }
  591.  
  592.  
  593. /**
  594.  * Extended swizzle of a SoA vector.
  595.  *
  596.  * @param bld         building context
  597.  * @param unswizzled  array with the 4 unswizzled values
  598.  * @param swizzles    array of PIPE_SWIZZLE_*
  599.  * @param swizzled    output swizzled values
  600.  */
  601. void
  602. lp_build_swizzle_soa(struct lp_build_context *bld,
  603.                      const LLVMValueRef *unswizzled,
  604.                      const unsigned char swizzles[4],
  605.                      LLVMValueRef *swizzled)
  606. {
  607.    unsigned chan;
  608.  
  609.    for (chan = 0; chan < 4; ++chan) {
  610.       swizzled[chan] = lp_build_swizzle_soa_channel(bld, unswizzled,
  611.                                                     swizzles[chan]);
  612.    }
  613. }
  614.  
  615.  
  616. /**
  617.  * Do an extended swizzle of a SoA vector inplace.
  618.  *
  619.  * @param bld         building context
  620.  * @param values      intput/output array with the 4 values
  621.  * @param swizzles    array of PIPE_SWIZZLE_*
  622.  */
  623. void
  624. lp_build_swizzle_soa_inplace(struct lp_build_context *bld,
  625.                              LLVMValueRef *values,
  626.                              const unsigned char swizzles[4])
  627. {
  628.    LLVMValueRef unswizzled[4];
  629.    unsigned chan;
  630.  
  631.    for (chan = 0; chan < 4; ++chan) {
  632.       unswizzled[chan] = values[chan];
  633.    }
  634.  
  635.    lp_build_swizzle_soa(bld, unswizzled, swizzles, values);
  636. }
  637.  
  638.  
  639. /**
  640.  * Transpose from AOS <-> SOA
  641.  *
  642.  * @param single_type_lp   type of pixels
  643.  * @param src              the 4 * n pixel input
  644.  * @param dst              the 4 * n pixel output
  645.  */
  646. void
  647. lp_build_transpose_aos(struct gallivm_state *gallivm,
  648.                        struct lp_type single_type_lp,
  649.                        const LLVMValueRef src[4],
  650.                        LLVMValueRef dst[4])
  651. {
  652.    struct lp_type double_type_lp = single_type_lp;
  653.    LLVMTypeRef single_type;
  654.    LLVMTypeRef double_type;
  655.    LLVMValueRef t0, t1, t2, t3;
  656.  
  657.    double_type_lp.length >>= 1;
  658.    double_type_lp.width  <<= 1;
  659.  
  660.    double_type = lp_build_vec_type(gallivm, double_type_lp);
  661.    single_type = lp_build_vec_type(gallivm, single_type_lp);
  662.  
  663.    /* Interleave x, y, z, w -> xy and zw */
  664.    t0 = lp_build_interleave2_half(gallivm, single_type_lp, src[0], src[1], 0);
  665.    t1 = lp_build_interleave2_half(gallivm, single_type_lp, src[2], src[3], 0);
  666.    t2 = lp_build_interleave2_half(gallivm, single_type_lp, src[0], src[1], 1);
  667.    t3 = lp_build_interleave2_half(gallivm, single_type_lp, src[2], src[3], 1);
  668.  
  669.    /* Cast to double width type for second interleave */
  670.    t0 = LLVMBuildBitCast(gallivm->builder, t0, double_type, "t0");
  671.    t1 = LLVMBuildBitCast(gallivm->builder, t1, double_type, "t1");
  672.    t2 = LLVMBuildBitCast(gallivm->builder, t2, double_type, "t2");
  673.    t3 = LLVMBuildBitCast(gallivm->builder, t3, double_type, "t3");
  674.  
  675.    /* Interleave xy, zw -> xyzw */
  676.    dst[0] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 0);
  677.    dst[1] = lp_build_interleave2_half(gallivm, double_type_lp, t0, t1, 1);
  678.    dst[2] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 0);
  679.    dst[3] = lp_build_interleave2_half(gallivm, double_type_lp, t2, t3, 1);
  680.  
  681.    /* Cast back to original single width type */
  682.    dst[0] = LLVMBuildBitCast(gallivm->builder, dst[0], single_type, "dst0");
  683.    dst[1] = LLVMBuildBitCast(gallivm->builder, dst[1], single_type, "dst1");
  684.    dst[2] = LLVMBuildBitCast(gallivm->builder, dst[2], single_type, "dst2");
  685.    dst[3] = LLVMBuildBitCast(gallivm->builder, dst[3], single_type, "dst3");
  686. }
  687.  
  688.  
  689. /**
  690.  * Transpose from AOS <-> SOA for num_srcs
  691.  */
  692. void
  693. lp_build_transpose_aos_n(struct gallivm_state *gallivm,
  694.                          struct lp_type type,
  695.                          const LLVMValueRef* src,
  696.                          unsigned num_srcs,
  697.                          LLVMValueRef* dst)
  698. {
  699.    switch (num_srcs) {
  700.       case 1:
  701.          dst[0] = src[0];
  702.          break;
  703.  
  704.       case 2:
  705.       {
  706.          /* Note: we must use a temporary incase src == dst */
  707.          LLVMValueRef lo, hi;
  708.  
  709.          lo = lp_build_interleave2_half(gallivm, type, src[0], src[1], 0);
  710.          hi = lp_build_interleave2_half(gallivm, type, src[0], src[1], 1);
  711.  
  712.          dst[0] = lo;
  713.          dst[1] = hi;
  714.          break;
  715.       }
  716.  
  717.       case 4:
  718.          lp_build_transpose_aos(gallivm, type, src, dst);
  719.          break;
  720.  
  721.       default:
  722.          assert(0);
  723.    };
  724. }
  725.  
  726.  
  727. /**
  728.  * Pack n-th element of aos values,
  729.  * pad out to destination size.
  730.  * i.e. x1 y1 _ _ x2 y2 _ _ will become x1 x2 _ _
  731.  */
  732. LLVMValueRef
  733. lp_build_pack_aos_scalars(struct gallivm_state *gallivm,
  734.                           struct lp_type src_type,
  735.                           struct lp_type dst_type,
  736.                           const LLVMValueRef src,
  737.                           unsigned channel)
  738. {
  739.    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
  740.    LLVMValueRef undef = LLVMGetUndef(i32t);
  741.    LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
  742.    unsigned num_src = src_type.length / 4;
  743.    unsigned num_dst = dst_type.length;
  744.    unsigned i;
  745.  
  746.    assert(num_src <= num_dst);
  747.  
  748.    for (i = 0; i < num_src; i++) {
  749.       shuffles[i] = LLVMConstInt(i32t, i * 4 + channel, 0);
  750.    }
  751.    for (i = num_src; i < num_dst; i++) {
  752.       shuffles[i] = undef;
  753.    }
  754.  
  755.    if (num_dst == 1) {
  756.       return LLVMBuildExtractElement(gallivm->builder, src, shuffles[0], "");
  757.    }
  758.    else {
  759.       return LLVMBuildShuffleVector(gallivm->builder, src, src,
  760.                                     LLVMConstVector(shuffles, num_dst), "");
  761.    }
  762. }
  763.  
  764.  
  765. /**
  766.  * Unpack and broadcast packed aos values consisting of only the
  767.  * first value, i.e. x1 x2 _ _ will become x1 x1 x1 x1 x2 x2 x2 x2
  768.  */
  769. LLVMValueRef
  770. lp_build_unpack_broadcast_aos_scalars(struct gallivm_state *gallivm,
  771.                                       struct lp_type src_type,
  772.                                       struct lp_type dst_type,
  773.                                       const LLVMValueRef src)
  774. {
  775.    LLVMTypeRef i32t = LLVMInt32TypeInContext(gallivm->context);
  776.    LLVMValueRef shuffles[LP_MAX_VECTOR_LENGTH];
  777.    unsigned num_dst = dst_type.length;
  778.    unsigned num_src = dst_type.length / 4;
  779.    unsigned i;
  780.  
  781.    assert(num_dst / 4 <= src_type.length);
  782.  
  783.    for (i = 0; i < num_src; i++) {
  784.       shuffles[i*4] = LLVMConstInt(i32t, i, 0);
  785.       shuffles[i*4+1] = LLVMConstInt(i32t, i, 0);
  786.       shuffles[i*4+2] = LLVMConstInt(i32t, i, 0);
  787.       shuffles[i*4+3] = LLVMConstInt(i32t, i, 0);
  788.    }
  789.  
  790.    if (num_src == 1) {
  791.       return lp_build_extract_broadcast(gallivm, src_type, dst_type,
  792.                                         src, shuffles[0]);
  793.    }
  794.    else {
  795.       return LLVMBuildShuffleVector(gallivm->builder, src, src,
  796.                                     LLVMConstVector(shuffles, num_dst), "");
  797.    }
  798. }
  799.  
  800.