Subversion Repositories Kolibri OS

Rev

Go to most recent revision | Blame | Last modification | View Log | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
  4.  * All Rights Reserved.
  5.  *
  6.  * Permission is hereby granted, free of charge, to any person obtaining a
  7.  * copy of this software and associated documentation files (the
  8.  * "Software"), to deal in the Software without restriction, including
  9.  * without limitation the rights to use, copy, modify, merge, publish,
  10.  * distribute, sub license, and/or sell copies of the Software, and to
  11.  * permit persons to whom the Software is furnished to do so, subject to
  12.  * the following conditions:
  13.  *
  14.  * The above copyright notice and this permission notice (including the
  15.  * next paragraph) shall be included in all copies or substantial portions
  16.  * of the Software.
  17.  *
  18.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  19.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  20.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  21.  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  22.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  23.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  24.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  25.  *
  26.  **************************************************************************/
  27.  
  28. /**
  29.  * quad blending
  30.  * \author Brian Paul
  31.  */
  32.  
  33. #include "pipe/p_defines.h"
  34. #include "util/u_math.h"
  35. #include "util/u_memory.h"
  36. #include "util/u_format.h"
  37. #include "util/u_dual_blend.h"
  38. #include "sp_context.h"
  39. #include "sp_state.h"
  40. #include "sp_quad.h"
  41. #include "sp_tile_cache.h"
  42. #include "sp_quad_pipe.h"
  43.  
  44.  
  45. enum format
  46. {
  47.    RGBA,
  48.    RGB,
  49.    LUMINANCE,
  50.    LUMINANCE_ALPHA,
  51.    INTENSITY
  52. };
  53.  
  54.  
  55. /** Subclass of quad_stage */
  56. struct blend_quad_stage
  57. {
  58.    struct quad_stage base;
  59.    boolean clamp[PIPE_MAX_COLOR_BUFS];  /**< clamp colors to [0,1]? */
  60.    enum format base_format[PIPE_MAX_COLOR_BUFS];
  61.    enum util_format_type format_type[PIPE_MAX_COLOR_BUFS];
  62. };
  63.  
  64.  
  65. /** cast wrapper */
  66. static INLINE struct blend_quad_stage *
  67. blend_quad_stage(struct quad_stage *stage)
  68. {
  69.    return (struct blend_quad_stage *) stage;
  70. }
  71.  
  72.  
  73. #define VEC4_COPY(DST, SRC) \
  74. do { \
  75.     DST[0] = SRC[0]; \
  76.     DST[1] = SRC[1]; \
  77.     DST[2] = SRC[2]; \
  78.     DST[3] = SRC[3]; \
  79. } while(0)
  80.  
  81. #define VEC4_SCALAR(DST, SRC) \
  82. do { \
  83.     DST[0] = SRC; \
  84.     DST[1] = SRC; \
  85.     DST[2] = SRC; \
  86.     DST[3] = SRC; \
  87. } while(0)
  88.  
  89. #define VEC4_ADD(R, A, B) \
  90. do { \
  91.    R[0] = A[0] + B[0]; \
  92.    R[1] = A[1] + B[1]; \
  93.    R[2] = A[2] + B[2]; \
  94.    R[3] = A[3] + B[3]; \
  95. } while (0)
  96.  
  97. #define VEC4_SUB(R, A, B) \
  98. do { \
  99.    R[0] = A[0] - B[0]; \
  100.    R[1] = A[1] - B[1]; \
  101.    R[2] = A[2] - B[2]; \
  102.    R[3] = A[3] - B[3]; \
  103. } while (0)
  104.  
  105. /** Add and limit result to ceiling of 1.0 */
  106. #define VEC4_ADD_SAT(R, A, B) \
  107. do { \
  108.    R[0] = A[0] + B[0];  if (R[0] > 1.0f) R[0] = 1.0f; \
  109.    R[1] = A[1] + B[1];  if (R[1] > 1.0f) R[1] = 1.0f; \
  110.    R[2] = A[2] + B[2];  if (R[2] > 1.0f) R[2] = 1.0f; \
  111.    R[3] = A[3] + B[3];  if (R[3] > 1.0f) R[3] = 1.0f; \
  112. } while (0)
  113.  
  114. /** Subtract and limit result to floor of 0.0 */
  115. #define VEC4_SUB_SAT(R, A, B) \
  116. do { \
  117.    R[0] = A[0] - B[0];  if (R[0] < 0.0f) R[0] = 0.0f; \
  118.    R[1] = A[1] - B[1];  if (R[1] < 0.0f) R[1] = 0.0f; \
  119.    R[2] = A[2] - B[2];  if (R[2] < 0.0f) R[2] = 0.0f; \
  120.    R[3] = A[3] - B[3];  if (R[3] < 0.0f) R[3] = 0.0f; \
  121. } while (0)
  122.  
  123. #define VEC4_MUL(R, A, B) \
  124. do { \
  125.    R[0] = A[0] * B[0]; \
  126.    R[1] = A[1] * B[1]; \
  127.    R[2] = A[2] * B[2]; \
  128.    R[3] = A[3] * B[3]; \
  129. } while (0)
  130.  
  131. #define VEC4_MIN(R, A, B) \
  132. do { \
  133.    R[0] = (A[0] < B[0]) ? A[0] : B[0]; \
  134.    R[1] = (A[1] < B[1]) ? A[1] : B[1]; \
  135.    R[2] = (A[2] < B[2]) ? A[2] : B[2]; \
  136.    R[3] = (A[3] < B[3]) ? A[3] : B[3]; \
  137. } while (0)
  138.  
  139. #define VEC4_MAX(R, A, B) \
  140. do { \
  141.    R[0] = (A[0] > B[0]) ? A[0] : B[0]; \
  142.    R[1] = (A[1] > B[1]) ? A[1] : B[1]; \
  143.    R[2] = (A[2] > B[2]) ? A[2] : B[2]; \
  144.    R[3] = (A[3] > B[3]) ? A[3] : B[3]; \
  145. } while (0)
  146.  
  147.  
  148.  
  149. static void
  150. logicop_quad(struct quad_stage *qs,
  151.              float (*quadColor)[4],
  152.              float (*dest)[4])
  153. {
  154.    struct softpipe_context *softpipe = qs->softpipe;
  155.    ubyte src[4][4], dst[4][4], res[4][4];
  156.    uint *src4 = (uint *) src;
  157.    uint *dst4 = (uint *) dst;
  158.    uint *res4 = (uint *) res;
  159.    uint j;
  160.  
  161.  
  162.    /* convert to ubyte */
  163.    for (j = 0; j < 4; j++) { /* loop over R,G,B,A channels */
  164.       dst[j][0] = float_to_ubyte(dest[j][0]); /* P0 */
  165.       dst[j][1] = float_to_ubyte(dest[j][1]); /* P1 */
  166.       dst[j][2] = float_to_ubyte(dest[j][2]); /* P2 */
  167.       dst[j][3] = float_to_ubyte(dest[j][3]); /* P3 */
  168.  
  169.       src[j][0] = float_to_ubyte(quadColor[j][0]); /* P0 */
  170.       src[j][1] = float_to_ubyte(quadColor[j][1]); /* P1 */
  171.       src[j][2] = float_to_ubyte(quadColor[j][2]); /* P2 */
  172.       src[j][3] = float_to_ubyte(quadColor[j][3]); /* P3 */
  173.    }
  174.  
  175.    switch (softpipe->blend->logicop_func) {
  176.    case PIPE_LOGICOP_CLEAR:
  177.       for (j = 0; j < 4; j++)
  178.          res4[j] = 0;
  179.       break;
  180.    case PIPE_LOGICOP_NOR:
  181.       for (j = 0; j < 4; j++)
  182.          res4[j] = ~(src4[j] | dst4[j]);
  183.       break;
  184.    case PIPE_LOGICOP_AND_INVERTED:
  185.       for (j = 0; j < 4; j++)
  186.          res4[j] = ~src4[j] & dst4[j];
  187.       break;
  188.    case PIPE_LOGICOP_COPY_INVERTED:
  189.       for (j = 0; j < 4; j++)
  190.          res4[j] = ~src4[j];
  191.       break;
  192.    case PIPE_LOGICOP_AND_REVERSE:
  193.       for (j = 0; j < 4; j++)
  194.          res4[j] = src4[j] & ~dst4[j];
  195.       break;
  196.    case PIPE_LOGICOP_INVERT:
  197.       for (j = 0; j < 4; j++)
  198.          res4[j] = ~dst4[j];
  199.       break;
  200.    case PIPE_LOGICOP_XOR:
  201.       for (j = 0; j < 4; j++)
  202.          res4[j] = dst4[j] ^ src4[j];
  203.       break;
  204.    case PIPE_LOGICOP_NAND:
  205.       for (j = 0; j < 4; j++)
  206.          res4[j] = ~(src4[j] & dst4[j]);
  207.       break;
  208.    case PIPE_LOGICOP_AND:
  209.       for (j = 0; j < 4; j++)
  210.          res4[j] = src4[j] & dst4[j];
  211.       break;
  212.    case PIPE_LOGICOP_EQUIV:
  213.       for (j = 0; j < 4; j++)
  214.          res4[j] = ~(src4[j] ^ dst4[j]);
  215.       break;
  216.    case PIPE_LOGICOP_NOOP:
  217.       for (j = 0; j < 4; j++)
  218.          res4[j] = dst4[j];
  219.       break;
  220.    case PIPE_LOGICOP_OR_INVERTED:
  221.       for (j = 0; j < 4; j++)
  222.          res4[j] = ~src4[j] | dst4[j];
  223.       break;
  224.    case PIPE_LOGICOP_COPY:
  225.       for (j = 0; j < 4; j++)
  226.          res4[j] = src4[j];
  227.       break;
  228.    case PIPE_LOGICOP_OR_REVERSE:
  229.       for (j = 0; j < 4; j++)
  230.          res4[j] = src4[j] | ~dst4[j];
  231.       break;
  232.    case PIPE_LOGICOP_OR:
  233.       for (j = 0; j < 4; j++)
  234.          res4[j] = src4[j] | dst4[j];
  235.       break;
  236.    case PIPE_LOGICOP_SET:
  237.       for (j = 0; j < 4; j++)
  238.          res4[j] = ~0;
  239.       break;
  240.    default:
  241.       assert(0 && "invalid logicop mode");
  242.    }
  243.  
  244.    for (j = 0; j < 4; j++) {
  245.       quadColor[j][0] = ubyte_to_float(res[j][0]);
  246.       quadColor[j][1] = ubyte_to_float(res[j][1]);
  247.       quadColor[j][2] = ubyte_to_float(res[j][2]);
  248.       quadColor[j][3] = ubyte_to_float(res[j][3]);
  249.    }
  250. }
  251.  
  252.  
  253.  
  254. /**
  255.  * Do blending for a 2x2 quad for one color buffer.
  256.  * \param quadColor  the incoming quad colors
  257.  * \param dest  the destination/framebuffer quad colors
  258.  * \param const_blend_color  the constant blend color
  259.  * \param blend_index  which set of blending terms to use
  260.  */
  261. static void
  262. blend_quad(struct quad_stage *qs,
  263.            float (*quadColor)[4],
  264.            float (*quadColor2)[4],
  265.            float (*dest)[4],
  266.            const float const_blend_color[4],
  267.            unsigned blend_index)
  268. {
  269.    static const float zero[4] = { 0, 0, 0, 0 };
  270.    static const float one[4] = { 1, 1, 1, 1 };
  271.    struct softpipe_context *softpipe = qs->softpipe;
  272.    float source[4][TGSI_QUAD_SIZE] = { { 0 } };
  273.    float blend_dest[4][TGSI_QUAD_SIZE];
  274.  
  275.    /*
  276.     * Compute src/first term RGB
  277.     */
  278.    switch (softpipe->blend->rt[blend_index].rgb_src_factor) {
  279.    case PIPE_BLENDFACTOR_ONE:
  280.       VEC4_COPY(source[0], quadColor[0]); /* R */
  281.       VEC4_COPY(source[1], quadColor[1]); /* G */
  282.       VEC4_COPY(source[2], quadColor[2]); /* B */
  283.       break;
  284.    case PIPE_BLENDFACTOR_SRC_COLOR:
  285.       VEC4_MUL(source[0], quadColor[0], quadColor[0]); /* R */
  286.       VEC4_MUL(source[1], quadColor[1], quadColor[1]); /* G */
  287.       VEC4_MUL(source[2], quadColor[2], quadColor[2]); /* B */
  288.       break;
  289.    case PIPE_BLENDFACTOR_SRC_ALPHA:
  290.       {
  291.          const float *alpha = quadColor[3];
  292.          VEC4_MUL(source[0], quadColor[0], alpha); /* R */
  293.          VEC4_MUL(source[1], quadColor[1], alpha); /* G */
  294.          VEC4_MUL(source[2], quadColor[2], alpha); /* B */
  295.       }
  296.       break;
  297.    case PIPE_BLENDFACTOR_DST_COLOR:
  298.       VEC4_MUL(source[0], quadColor[0], dest[0]); /* R */
  299.       VEC4_MUL(source[1], quadColor[1], dest[1]); /* G */
  300.       VEC4_MUL(source[2], quadColor[2], dest[2]); /* B */
  301.       break;
  302.    case PIPE_BLENDFACTOR_DST_ALPHA:
  303.       {
  304.          const float *alpha = dest[3];
  305.          VEC4_MUL(source[0], quadColor[0], alpha); /* R */
  306.          VEC4_MUL(source[1], quadColor[1], alpha); /* G */
  307.          VEC4_MUL(source[2], quadColor[2], alpha); /* B */
  308.       }
  309.       break;
  310.    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
  311.       {
  312.          const float *alpha = quadColor[3];
  313.          float diff[4], temp[4];
  314.          VEC4_SUB(diff, one, dest[3]);
  315.          VEC4_MIN(temp, alpha, diff);
  316.          VEC4_MUL(source[0], quadColor[0], temp); /* R */
  317.          VEC4_MUL(source[1], quadColor[1], temp); /* G */
  318.          VEC4_MUL(source[2], quadColor[2], temp); /* B */
  319.       }
  320.       break;
  321.    case PIPE_BLENDFACTOR_CONST_COLOR:
  322.       {
  323.          float comp[4];
  324.          VEC4_SCALAR(comp, const_blend_color[0]); /* R */
  325.          VEC4_MUL(source[0], quadColor[0], comp); /* R */
  326.          VEC4_SCALAR(comp, const_blend_color[1]); /* G */
  327.          VEC4_MUL(source[1], quadColor[1], comp); /* G */
  328.          VEC4_SCALAR(comp, const_blend_color[2]); /* B */
  329.          VEC4_MUL(source[2], quadColor[2], comp); /* B */
  330.       }
  331.       break;
  332.    case PIPE_BLENDFACTOR_CONST_ALPHA:
  333.       {
  334.          float alpha[4];
  335.          VEC4_SCALAR(alpha, const_blend_color[3]);
  336.          VEC4_MUL(source[0], quadColor[0], alpha); /* R */
  337.          VEC4_MUL(source[1], quadColor[1], alpha); /* G */
  338.          VEC4_MUL(source[2], quadColor[2], alpha); /* B */
  339.       }
  340.       break;
  341.    case PIPE_BLENDFACTOR_SRC1_COLOR:
  342.       VEC4_MUL(source[0], quadColor[0], quadColor2[0]); /* R */
  343.       VEC4_MUL(source[1], quadColor[1], quadColor2[1]); /* G */
  344.       VEC4_MUL(source[2], quadColor[2], quadColor2[2]); /* B */  
  345.       break;
  346.    case PIPE_BLENDFACTOR_SRC1_ALPHA:
  347.       {
  348.          const float *alpha = quadColor2[3];
  349.          VEC4_MUL(source[0], quadColor[0], alpha); /* R */
  350.          VEC4_MUL(source[1], quadColor[1], alpha); /* G */
  351.          VEC4_MUL(source[2], quadColor[2], alpha); /* B */
  352.       }
  353.       break;
  354.    case PIPE_BLENDFACTOR_ZERO:
  355.       VEC4_COPY(source[0], zero); /* R */
  356.       VEC4_COPY(source[1], zero); /* G */
  357.       VEC4_COPY(source[2], zero); /* B */
  358.       break;
  359.    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
  360.       {
  361.          float inv_comp[4];
  362.          VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
  363.          VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
  364.          VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
  365.          VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
  366.          VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
  367.          VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
  368.       }
  369.       break;
  370.    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
  371.       {
  372.          float inv_alpha[4];
  373.          VEC4_SUB(inv_alpha, one, quadColor[3]);
  374.          VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
  375.          VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
  376.          VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
  377.       }
  378.       break;
  379.    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
  380.       {
  381.          float inv_alpha[4];
  382.          VEC4_SUB(inv_alpha, one, dest[3]);
  383.          VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
  384.          VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
  385.          VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
  386.       }
  387.       break;
  388.    case PIPE_BLENDFACTOR_INV_DST_COLOR:
  389.       {
  390.          float inv_comp[4];
  391.          VEC4_SUB(inv_comp, one, dest[0]); /* R */
  392.          VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
  393.          VEC4_SUB(inv_comp, one, dest[1]); /* G */
  394.          VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
  395.          VEC4_SUB(inv_comp, one, dest[2]); /* B */
  396.          VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
  397.       }
  398.       break;
  399.    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
  400.       {
  401.          float inv_comp[4];
  402.          /* R */
  403.          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[0]);
  404.          VEC4_MUL(source[0], quadColor[0], inv_comp);
  405.          /* G */
  406.          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[1]);
  407.          VEC4_MUL(source[1], quadColor[1], inv_comp);
  408.          /* B */
  409.          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[2]);
  410.          VEC4_MUL(source[2], quadColor[2], inv_comp);
  411.       }
  412.       break;
  413.    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
  414.       {
  415.          float inv_alpha[4];
  416.          VEC4_SCALAR(inv_alpha, 1.0f - const_blend_color[3]);
  417.          VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
  418.          VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
  419.          VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
  420.       }
  421.       break;
  422.    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
  423.       {
  424.          float inv_comp[4];
  425.          VEC4_SUB(inv_comp, one, quadColor2[0]); /* R */
  426.          VEC4_MUL(source[0], quadColor[0], inv_comp); /* R */
  427.          VEC4_SUB(inv_comp, one, quadColor2[1]); /* G */
  428.          VEC4_MUL(source[1], quadColor[1], inv_comp); /* G */
  429.          VEC4_SUB(inv_comp, one, quadColor2[2]); /* B */
  430.          VEC4_MUL(source[2], quadColor[2], inv_comp); /* B */
  431.       }
  432.       break;
  433.    case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
  434.       {
  435.          float inv_alpha[4];
  436.          VEC4_SUB(inv_alpha, one, quadColor2[3]);
  437.          VEC4_MUL(source[0], quadColor[0], inv_alpha); /* R */
  438.          VEC4_MUL(source[1], quadColor[1], inv_alpha); /* G */
  439.          VEC4_MUL(source[2], quadColor[2], inv_alpha); /* B */
  440.       }
  441.       break;
  442.    default:
  443.       assert(0 && "invalid rgb src factor");
  444.    }
  445.  
  446.    /*
  447.     * Compute src/first term A
  448.     */
  449.    switch (softpipe->blend->rt[blend_index].alpha_src_factor) {
  450.    case PIPE_BLENDFACTOR_ONE:
  451.       VEC4_COPY(source[3], quadColor[3]); /* A */
  452.       break;
  453.    case PIPE_BLENDFACTOR_SRC_COLOR:
  454.       /* fall-through */
  455.    case PIPE_BLENDFACTOR_SRC_ALPHA:
  456.       {
  457.          const float *alpha = quadColor[3];
  458.          VEC4_MUL(source[3], quadColor[3], alpha); /* A */
  459.       }
  460.       break;
  461.    case PIPE_BLENDFACTOR_DST_COLOR:
  462.       /* fall-through */
  463.    case PIPE_BLENDFACTOR_DST_ALPHA:
  464.       VEC4_MUL(source[3], quadColor[3], dest[3]); /* A */
  465.       break;
  466.    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
  467.       /* multiply alpha by 1.0 */
  468.       VEC4_COPY(source[3], quadColor[3]); /* A */
  469.       break;
  470.    case PIPE_BLENDFACTOR_CONST_COLOR:
  471.       /* fall-through */
  472.    case PIPE_BLENDFACTOR_CONST_ALPHA:
  473.       {
  474.          float comp[4];
  475.          VEC4_SCALAR(comp, const_blend_color[3]); /* A */
  476.          VEC4_MUL(source[3], quadColor[3], comp); /* A */
  477.       }
  478.       break;
  479.    case PIPE_BLENDFACTOR_ZERO:
  480.       VEC4_COPY(source[3], zero); /* A */
  481.       break;
  482.    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
  483.       /* fall-through */
  484.    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
  485.       {
  486.          float inv_alpha[4];
  487.          VEC4_SUB(inv_alpha, one, quadColor[3]);
  488.          VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
  489.       }
  490.       break;
  491.    case PIPE_BLENDFACTOR_INV_DST_COLOR:
  492.       /* fall-through */
  493.    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
  494.       {
  495.          float inv_alpha[4];
  496.          VEC4_SUB(inv_alpha, one, dest[3]);
  497.          VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
  498.       }
  499.       break;
  500.    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
  501.       /* fall-through */
  502.    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
  503.       {
  504.          float inv_comp[4];
  505.          /* A */
  506.          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
  507.          VEC4_MUL(source[3], quadColor[3], inv_comp);
  508.       }
  509.       break;
  510.    case PIPE_BLENDFACTOR_SRC1_COLOR:
  511.       /* fall-through */
  512.    case PIPE_BLENDFACTOR_SRC1_ALPHA:
  513.       {
  514.          const float *alpha = quadColor2[3];
  515.          VEC4_MUL(source[3], quadColor[3], alpha); /* A */
  516.       }
  517.       break;
  518.    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
  519.       /* fall-through */
  520.    case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
  521.       {
  522.          float inv_alpha[4];
  523.          VEC4_SUB(inv_alpha, one, quadColor2[3]);
  524.          VEC4_MUL(source[3], quadColor[3], inv_alpha); /* A */
  525.       }
  526.       break;
  527.    default:
  528.       assert(0 && "invalid alpha src factor");
  529.    }
  530.  
  531.    /* Save the original dest for use in masking */
  532.    VEC4_COPY(blend_dest[0], dest[0]);
  533.    VEC4_COPY(blend_dest[1], dest[1]);
  534.    VEC4_COPY(blend_dest[2], dest[2]);
  535.    VEC4_COPY(blend_dest[3], dest[3]);
  536.  
  537.  
  538.    /*
  539.     * Compute blend_dest/second term RGB
  540.     */
  541.    switch (softpipe->blend->rt[blend_index].rgb_dst_factor) {
  542.    case PIPE_BLENDFACTOR_ONE:
  543.       /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
  544.       break;
  545.    case PIPE_BLENDFACTOR_SRC_COLOR:
  546.       VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[0]); /* R */
  547.       VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[1]); /* G */
  548.       VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[2]); /* B */
  549.       break;
  550.    case PIPE_BLENDFACTOR_SRC_ALPHA:
  551.       VEC4_MUL(blend_dest[0], blend_dest[0], quadColor[3]); /* R * A */
  552.       VEC4_MUL(blend_dest[1], blend_dest[1], quadColor[3]); /* G * A */
  553.       VEC4_MUL(blend_dest[2], blend_dest[2], quadColor[3]); /* B * A */
  554.       break;
  555.    case PIPE_BLENDFACTOR_DST_ALPHA:
  556.       VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[3]); /* R * A */
  557.       VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[3]); /* G * A */
  558.       VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[3]); /* B * A */
  559.       break;
  560.    case PIPE_BLENDFACTOR_DST_COLOR:
  561.       VEC4_MUL(blend_dest[0], blend_dest[0], blend_dest[0]); /* R */
  562.       VEC4_MUL(blend_dest[1], blend_dest[1], blend_dest[1]); /* G */
  563.       VEC4_MUL(blend_dest[2], blend_dest[2], blend_dest[2]); /* B */
  564.       break;
  565.    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
  566.       {
  567.          const float *alpha = quadColor[3];
  568.          float diff[4], temp[4];
  569.          VEC4_SUB(diff, one, blend_dest[3]);
  570.          VEC4_MIN(temp, alpha, diff);
  571.          VEC4_MUL(blend_dest[0], blend_dest[0], temp); /* R */
  572.          VEC4_MUL(blend_dest[1], blend_dest[1], temp); /* G */
  573.          VEC4_MUL(blend_dest[2], blend_dest[2], temp); /* B */
  574.       }
  575.       break;
  576.    case PIPE_BLENDFACTOR_CONST_COLOR:
  577.       {
  578.          float comp[4];
  579.          VEC4_SCALAR(comp, const_blend_color[0]); /* R */
  580.          VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */
  581.          VEC4_SCALAR(comp, const_blend_color[1]); /* G */
  582.          VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */
  583.          VEC4_SCALAR(comp, const_blend_color[2]); /* B */
  584.          VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */
  585.       }
  586.       break;
  587.    case PIPE_BLENDFACTOR_CONST_ALPHA:
  588.       {
  589.          float comp[4];
  590.          VEC4_SCALAR(comp, const_blend_color[3]); /* A */
  591.          VEC4_MUL(blend_dest[0], blend_dest[0], comp); /* R */
  592.          VEC4_MUL(blend_dest[1], blend_dest[1], comp); /* G */
  593.          VEC4_MUL(blend_dest[2], blend_dest[2], comp); /* B */
  594.       }
  595.       break;
  596.    case PIPE_BLENDFACTOR_ZERO:
  597.       VEC4_COPY(blend_dest[0], zero); /* R */
  598.       VEC4_COPY(blend_dest[1], zero); /* G */
  599.       VEC4_COPY(blend_dest[2], zero); /* B */
  600.       break;
  601.    case PIPE_BLENDFACTOR_SRC1_COLOR:
  602.       VEC4_MUL(blend_dest[0], blend_dest[0], quadColor2[0]); /* R */
  603.       VEC4_MUL(blend_dest[1], blend_dest[1], quadColor2[1]); /* G */
  604.       VEC4_MUL(blend_dest[2], blend_dest[2], quadColor2[2]); /* B */
  605.       break;
  606.    case PIPE_BLENDFACTOR_SRC1_ALPHA:
  607.       VEC4_MUL(blend_dest[0], blend_dest[0], quadColor2[3]); /* R * A */
  608.       VEC4_MUL(blend_dest[1], blend_dest[1], quadColor2[3]); /* G * A */
  609.       VEC4_MUL(blend_dest[2], blend_dest[2], quadColor2[3]); /* B * A */
  610.       break;
  611.    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
  612.       {
  613.          float inv_comp[4];
  614.          VEC4_SUB(inv_comp, one, quadColor[0]); /* R */
  615.          VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
  616.          VEC4_SUB(inv_comp, one, quadColor[1]); /* G */
  617.          VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
  618.          VEC4_SUB(inv_comp, one, quadColor[2]); /* B */
  619.          VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
  620.       }
  621.       break;
  622.    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
  623.       {
  624.          float one_minus_alpha[TGSI_QUAD_SIZE];
  625.          VEC4_SUB(one_minus_alpha, one, quadColor[3]);
  626.          VEC4_MUL(blend_dest[0], blend_dest[0], one_minus_alpha); /* R */
  627.          VEC4_MUL(blend_dest[1], blend_dest[1], one_minus_alpha); /* G */
  628.          VEC4_MUL(blend_dest[2], blend_dest[2], one_minus_alpha); /* B */
  629.       }
  630.       break;
  631.    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
  632.       {
  633.          float inv_comp[4];
  634.          VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */
  635.          VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
  636.          VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
  637.          VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
  638.       }
  639.       break;
  640.    case PIPE_BLENDFACTOR_INV_DST_COLOR:
  641.       {
  642.          float inv_comp[4];
  643.          VEC4_SUB(inv_comp, one, blend_dest[0]); /* R */
  644.          VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp); /* R */
  645.          VEC4_SUB(inv_comp, one, blend_dest[1]); /* G */
  646.          VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp); /* G */
  647.          VEC4_SUB(inv_comp, one, blend_dest[2]); /* B */
  648.          VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp); /* B */
  649.       }
  650.       break;
  651.    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
  652.       {
  653.          float inv_comp[4];
  654.          /* R */
  655.          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[0]);
  656.          VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp);
  657.          /* G */
  658.          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[1]);
  659.          VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp);
  660.          /* B */
  661.          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[2]);
  662.          VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp);
  663.       }
  664.       break;
  665.    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
  666.       {
  667.          float inv_comp[4];
  668.          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
  669.          VEC4_MUL(blend_dest[0], blend_dest[0], inv_comp);
  670.          VEC4_MUL(blend_dest[1], blend_dest[1], inv_comp);
  671.          VEC4_MUL(blend_dest[2], blend_dest[2], inv_comp);
  672.       }
  673.       break;
  674.    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
  675.       {
  676.          float inv_comp[4];
  677.          VEC4_SUB(inv_comp, one, quadColor2[0]); /* R */
  678.          VEC4_MUL(blend_dest[0], inv_comp, blend_dest[0]); /* R */
  679.          VEC4_SUB(inv_comp, one, quadColor2[1]); /* G */
  680.          VEC4_MUL(blend_dest[1], inv_comp, blend_dest[1]); /* G */
  681.          VEC4_SUB(inv_comp, one, quadColor2[2]); /* B */
  682.          VEC4_MUL(blend_dest[2], inv_comp, blend_dest[2]); /* B */
  683.       }
  684.       break;
  685.    case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
  686.       {
  687.          float one_minus_alpha[TGSI_QUAD_SIZE];
  688.          VEC4_SUB(one_minus_alpha, one, quadColor2[3]);
  689.          VEC4_MUL(blend_dest[0], blend_dest[0], one_minus_alpha); /* R */
  690.          VEC4_MUL(blend_dest[1], blend_dest[1], one_minus_alpha); /* G */
  691.          VEC4_MUL(blend_dest[2], blend_dest[2], one_minus_alpha); /* B */
  692.       }
  693.       break;
  694.    default:
  695.       assert(0 && "invalid rgb dst factor");
  696.    }
  697.  
  698.    /*
  699.     * Compute blend_dest/second term A
  700.     */
  701.    switch (softpipe->blend->rt[blend_index].alpha_dst_factor) {
  702.    case PIPE_BLENDFACTOR_ONE:
  703.       /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
  704.       break;
  705.    case PIPE_BLENDFACTOR_SRC_COLOR:
  706.       /* fall-through */
  707.    case PIPE_BLENDFACTOR_SRC_ALPHA:
  708.       VEC4_MUL(blend_dest[3], blend_dest[3], quadColor[3]); /* A * A */
  709.       break;
  710.    case PIPE_BLENDFACTOR_DST_COLOR:
  711.       /* fall-through */
  712.    case PIPE_BLENDFACTOR_DST_ALPHA:
  713.       VEC4_MUL(blend_dest[3], blend_dest[3], blend_dest[3]); /* A */
  714.       break;
  715.    case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE:
  716.       /* blend_dest = blend_dest * 1   NO-OP, leave blend_dest as-is */
  717.       break;
  718.    case PIPE_BLENDFACTOR_CONST_COLOR:
  719.       /* fall-through */
  720.    case PIPE_BLENDFACTOR_CONST_ALPHA:
  721.       {
  722.          float comp[4];
  723.          VEC4_SCALAR(comp, const_blend_color[3]); /* A */
  724.          VEC4_MUL(blend_dest[3], blend_dest[3], comp); /* A */
  725.       }
  726.       break;
  727.    case PIPE_BLENDFACTOR_ZERO:
  728.       VEC4_COPY(blend_dest[3], zero); /* A */
  729.       break;
  730.    case PIPE_BLENDFACTOR_INV_SRC_COLOR:
  731.       /* fall-through */
  732.    case PIPE_BLENDFACTOR_INV_SRC_ALPHA:
  733.       {
  734.          float one_minus_alpha[TGSI_QUAD_SIZE];
  735.          VEC4_SUB(one_minus_alpha, one, quadColor[3]);
  736.          VEC4_MUL(blend_dest[3], blend_dest[3], one_minus_alpha); /* A */
  737.       }
  738.       break;
  739.    case PIPE_BLENDFACTOR_INV_DST_COLOR:
  740.       /* fall-through */
  741.    case PIPE_BLENDFACTOR_INV_DST_ALPHA:
  742.       {
  743.          float inv_comp[4];
  744.          VEC4_SUB(inv_comp, one, blend_dest[3]); /* A */
  745.          VEC4_MUL(blend_dest[3], inv_comp, blend_dest[3]); /* A */
  746.       }
  747.       break;
  748.    case PIPE_BLENDFACTOR_INV_CONST_COLOR:
  749.       /* fall-through */
  750.    case PIPE_BLENDFACTOR_INV_CONST_ALPHA:
  751.       {
  752.          float inv_comp[4];
  753.          VEC4_SCALAR(inv_comp, 1.0f - const_blend_color[3]);
  754.          VEC4_MUL(blend_dest[3], blend_dest[3], inv_comp);
  755.       }
  756.       break;
  757.    case PIPE_BLENDFACTOR_SRC1_COLOR:
  758.       /* fall-through */
  759.    case PIPE_BLENDFACTOR_SRC1_ALPHA:
  760.       VEC4_MUL(blend_dest[3], blend_dest[3], quadColor2[3]); /* A * A */
  761.       break;
  762.    case PIPE_BLENDFACTOR_INV_SRC1_COLOR:
  763.       /* fall-through */
  764.    case PIPE_BLENDFACTOR_INV_SRC1_ALPHA:
  765.       {
  766.          float one_minus_alpha[TGSI_QUAD_SIZE];
  767.          VEC4_SUB(one_minus_alpha, one, quadColor2[3]);
  768.          VEC4_MUL(blend_dest[3], blend_dest[3], one_minus_alpha); /* A */
  769.       }
  770.       break;
  771.    default:
  772.       assert(0 && "invalid alpha dst factor");
  773.    }
  774.  
  775.    /*
  776.     * Combine RGB terms
  777.     */
  778.    switch (softpipe->blend->rt[blend_index].rgb_func) {
  779.    case PIPE_BLEND_ADD:
  780.       VEC4_ADD(quadColor[0], source[0], blend_dest[0]); /* R */
  781.       VEC4_ADD(quadColor[1], source[1], blend_dest[1]); /* G */
  782.       VEC4_ADD(quadColor[2], source[2], blend_dest[2]); /* B */
  783.       break;
  784.    case PIPE_BLEND_SUBTRACT:
  785.       VEC4_SUB(quadColor[0], source[0], blend_dest[0]); /* R */
  786.       VEC4_SUB(quadColor[1], source[1], blend_dest[1]); /* G */
  787.       VEC4_SUB(quadColor[2], source[2], blend_dest[2]); /* B */
  788.       break;
  789.    case PIPE_BLEND_REVERSE_SUBTRACT:
  790.       VEC4_SUB(quadColor[0], blend_dest[0], source[0]); /* R */
  791.       VEC4_SUB(quadColor[1], blend_dest[1], source[1]); /* G */
  792.       VEC4_SUB(quadColor[2], blend_dest[2], source[2]); /* B */
  793.       break;
  794.    case PIPE_BLEND_MIN:
  795.       VEC4_MIN(quadColor[0], source[0], blend_dest[0]); /* R */
  796.       VEC4_MIN(quadColor[1], source[1], blend_dest[1]); /* G */
  797.       VEC4_MIN(quadColor[2], source[2], blend_dest[2]); /* B */
  798.       break;
  799.    case PIPE_BLEND_MAX:
  800.       VEC4_MAX(quadColor[0], source[0], blend_dest[0]); /* R */
  801.       VEC4_MAX(quadColor[1], source[1], blend_dest[1]); /* G */
  802.       VEC4_MAX(quadColor[2], source[2], blend_dest[2]); /* B */
  803.       break;
  804.    default:
  805.       assert(0 && "invalid rgb blend func");
  806.    }
  807.  
  808.    /*
  809.     * Combine A terms
  810.     */
  811.    switch (softpipe->blend->rt[blend_index].alpha_func) {
  812.    case PIPE_BLEND_ADD:
  813.       VEC4_ADD(quadColor[3], source[3], blend_dest[3]); /* A */
  814.       break;
  815.    case PIPE_BLEND_SUBTRACT:
  816.       VEC4_SUB(quadColor[3], source[3], blend_dest[3]); /* A */
  817.       break;
  818.    case PIPE_BLEND_REVERSE_SUBTRACT:
  819.       VEC4_SUB(quadColor[3], blend_dest[3], source[3]); /* A */
  820.       break;
  821.    case PIPE_BLEND_MIN:
  822.       VEC4_MIN(quadColor[3], source[3], blend_dest[3]); /* A */
  823.       break;
  824.    case PIPE_BLEND_MAX:
  825.       VEC4_MAX(quadColor[3], source[3], blend_dest[3]); /* A */
  826.       break;
  827.    default:
  828.       assert(0 && "invalid alpha blend func");
  829.    }
  830. }
  831.  
  832. static void
  833. colormask_quad(unsigned colormask,
  834.                float (*quadColor)[4],
  835.                float (*dest)[4])
  836. {
  837.    /* R */
  838.    if (!(colormask & PIPE_MASK_R))
  839.       COPY_4V(quadColor[0], dest[0]);
  840.  
  841.    /* G */
  842.    if (!(colormask & PIPE_MASK_G))
  843.       COPY_4V(quadColor[1], dest[1]);
  844.  
  845.    /* B */
  846.    if (!(colormask & PIPE_MASK_B))
  847.       COPY_4V(quadColor[2], dest[2]);
  848.  
  849.    /* A */
  850.    if (!(colormask & PIPE_MASK_A))
  851.       COPY_4V(quadColor[3], dest[3]);
  852. }
  853.  
  854.  
  855. /**
  856.  * Clamp all colors in a quad to [0, 1]
  857.  */
  858. static void
  859. clamp_colors(float (*quadColor)[4])
  860. {
  861.    unsigned i, j;
  862.  
  863.    for (j = 0; j < TGSI_QUAD_SIZE; j++) {
  864.       for (i = 0; i < 4; i++) {
  865.          quadColor[i][j] = CLAMP(quadColor[i][j], 0.0F, 1.0F);
  866.       }
  867.    }
  868. }
  869.  
  870.  
  871. /**
  872.  * If we're drawing to a luminance, luminance/alpha or intensity surface
  873.  * we have to adjust (rebase) the fragment/quad colors before writing them
  874.  * to the tile cache.  The tile cache always stores RGBA colors but if
  875.  * we're caching a L/A surface (for example) we need to be sure that R=G=B
  876.  * so that subsequent reads from the surface cache appear to return L/A
  877.  * values.
  878.  * The piglit fbo-blending-formats test will exercise this.
  879.  */
  880. static void
  881. rebase_colors(enum format base_format, float (*quadColor)[4])
  882. {
  883.    unsigned i;
  884.  
  885.    switch (base_format) {
  886.    case RGB:
  887.       for (i = 0; i < 4; i++) {
  888.          /* A = 1 */
  889.          quadColor[3][i] = 1.0F;
  890.       }
  891.       break;
  892.    case LUMINANCE:
  893.       for (i = 0; i < 4; i++) {
  894.          /* B = G = R */
  895.          quadColor[2][i] = quadColor[1][i] = quadColor[0][i];
  896.          /* A = 1 */
  897.          quadColor[3][i] = 1.0F;
  898.       }
  899.       break;
  900.    case LUMINANCE_ALPHA:
  901.       for (i = 0; i < 4; i++) {
  902.          /* B = G = R */
  903.          quadColor[2][i] = quadColor[1][i] = quadColor[0][i];
  904.       }
  905.       break;
  906.    case INTENSITY:
  907.       for (i = 0; i < 4; i++) {
  908.          /* A = B = G = R */
  909.          quadColor[3][i] = quadColor[2][i] = quadColor[1][i] = quadColor[0][i];
  910.       }
  911.       break;
  912.    default:
  913.       ; /* nothing */
  914.    }
  915. }
  916.  
  917. static void
  918. blend_fallback(struct quad_stage *qs,
  919.                struct quad_header *quads[],
  920.                unsigned nr)
  921. {
  922.    const struct blend_quad_stage *bqs = blend_quad_stage(qs);
  923.    struct softpipe_context *softpipe = qs->softpipe;
  924.    const struct pipe_blend_state *blend = softpipe->blend;
  925.    unsigned cbuf;
  926.    boolean write_all;
  927.  
  928.    write_all = softpipe->fs_variant->info.color0_writes_all_cbufs;
  929.  
  930.    for (cbuf = 0; cbuf < softpipe->framebuffer.nr_cbufs; cbuf++)
  931.    {
  932.       /* which blend/mask state index to use: */
  933.       const uint blend_buf = blend->independent_blend_enable ? cbuf : 0;
  934.       float dest[4][TGSI_QUAD_SIZE];
  935.       struct softpipe_cached_tile *tile
  936.          = sp_get_cached_tile(softpipe->cbuf_cache[cbuf],
  937.                               quads[0]->input.x0,
  938.                               quads[0]->input.y0);
  939.       const boolean clamp = bqs->clamp[cbuf];
  940.       const float *blend_color;
  941.       const boolean dual_source_blend = util_blend_state_is_dual(blend, cbuf);
  942.       uint q, i, j;
  943.  
  944.       if (clamp)
  945.          blend_color = softpipe->blend_color_clamped.color;
  946.       else
  947.          blend_color = softpipe->blend_color.color;
  948.  
  949.       for (q = 0; q < nr; q++) {
  950.          struct quad_header *quad = quads[q];
  951.          float (*quadColor)[4];
  952.          float (*quadColor2)[4] = NULL;
  953.          float temp_quad_color[TGSI_QUAD_SIZE][4];
  954.          const int itx = (quad->input.x0 & (TILE_SIZE-1));
  955.          const int ity = (quad->input.y0 & (TILE_SIZE-1));
  956.  
  957.          if (write_all) {
  958.             for (j = 0; j < TGSI_QUAD_SIZE; j++) {
  959.                for (i = 0; i < 4; i++) {
  960.                   temp_quad_color[i][j] = quad->output.color[0][i][j];
  961.                }
  962.             }
  963.             quadColor = temp_quad_color;
  964.          } else {
  965.             quadColor = quad->output.color[cbuf];
  966.             if (dual_source_blend)
  967.                quadColor2 = quad->output.color[cbuf + 1];
  968.          }
  969.  
  970.          /* If fixed-point dest color buffer, need to clamp the incoming
  971.           * fragment colors now.
  972.           */
  973.          if (clamp || softpipe->rasterizer->clamp_fragment_color) {
  974.             clamp_colors(quadColor);
  975.          }
  976.  
  977.          /* get/swizzle dest colors
  978.           */
  979.          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
  980.             int x = itx + (j & 1);
  981.             int y = ity + (j >> 1);
  982.             for (i = 0; i < 4; i++) {
  983.                dest[i][j] = tile->data.color[y][x][i];
  984.             }
  985.          }
  986.  
  987.  
  988.          if (blend->logicop_enable) {
  989.             if (bqs->format_type[cbuf] != UTIL_FORMAT_TYPE_FLOAT) {
  990.                logicop_quad( qs, quadColor, dest );
  991.             }
  992.          }
  993.          else if (blend->rt[blend_buf].blend_enable) {
  994.             blend_quad(qs, quadColor, quadColor2, dest, blend_color, blend_buf);
  995.  
  996.             /* If fixed-point dest color buffer, need to clamp the outgoing
  997.              * fragment colors now.
  998.              */
  999.             if (clamp) {
  1000.                clamp_colors(quadColor);
  1001.             }
  1002.          }
  1003.  
  1004.          rebase_colors(bqs->base_format[cbuf], quadColor);
  1005.  
  1006.          if (blend->rt[blend_buf].colormask != 0xf)
  1007.             colormask_quad( blend->rt[cbuf].colormask, quadColor, dest);
  1008.    
  1009.          /* Output color values
  1010.           */
  1011.          for (j = 0; j < TGSI_QUAD_SIZE; j++) {
  1012.             if (quad->inout.mask & (1 << j)) {
  1013.                int x = itx + (j & 1);
  1014.                int y = ity + (j >> 1);
  1015.                for (i = 0; i < 4; i++) { /* loop over color chans */
  1016.                   tile->data.color[y][x][i] = quadColor[i][j];
  1017.                }
  1018.             }
  1019.          }
  1020.       }
  1021.    }
  1022. }
  1023.  
  1024.  
  1025. static void
  1026. blend_single_add_src_alpha_inv_src_alpha(struct quad_stage *qs,
  1027.                                          struct quad_header *quads[],
  1028.                                          unsigned nr)
  1029. {
  1030.    const struct blend_quad_stage *bqs = blend_quad_stage(qs);
  1031.    static const float one[4] = { 1, 1, 1, 1 };
  1032.    float one_minus_alpha[TGSI_QUAD_SIZE];
  1033.    float dest[4][TGSI_QUAD_SIZE];
  1034.    float source[4][TGSI_QUAD_SIZE];
  1035.    uint i, j, q;
  1036.  
  1037.    struct softpipe_cached_tile *tile
  1038.       = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
  1039.                            quads[0]->input.x0,
  1040.                            quads[0]->input.y0);
  1041.  
  1042.    for (q = 0; q < nr; q++) {
  1043.       struct quad_header *quad = quads[q];
  1044.       float (*quadColor)[4] = quad->output.color[0];
  1045.       const float *alpha = quadColor[3];
  1046.       const int itx = (quad->input.x0 & (TILE_SIZE-1));
  1047.       const int ity = (quad->input.y0 & (TILE_SIZE-1));
  1048.      
  1049.       /* get/swizzle dest colors */
  1050.       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
  1051.          int x = itx + (j & 1);
  1052.          int y = ity + (j >> 1);
  1053.          for (i = 0; i < 4; i++) {
  1054.             dest[i][j] = tile->data.color[y][x][i];
  1055.          }
  1056.       }
  1057.  
  1058.       /* If fixed-point dest color buffer, need to clamp the incoming
  1059.        * fragment colors now.
  1060.        */
  1061.       if (bqs->clamp[0] || qs->softpipe->rasterizer->clamp_fragment_color) {
  1062.          clamp_colors(quadColor);
  1063.       }
  1064.  
  1065.       VEC4_MUL(source[0], quadColor[0], alpha); /* R */
  1066.       VEC4_MUL(source[1], quadColor[1], alpha); /* G */
  1067.       VEC4_MUL(source[2], quadColor[2], alpha); /* B */
  1068.       VEC4_MUL(source[3], quadColor[3], alpha); /* A */
  1069.  
  1070.       VEC4_SUB(one_minus_alpha, one, alpha);
  1071.       VEC4_MUL(dest[0], dest[0], one_minus_alpha); /* R */
  1072.       VEC4_MUL(dest[1], dest[1], one_minus_alpha); /* G */
  1073.       VEC4_MUL(dest[2], dest[2], one_minus_alpha); /* B */
  1074.       VEC4_MUL(dest[3], dest[3], one_minus_alpha); /* A */
  1075.  
  1076.       VEC4_ADD(quadColor[0], source[0], dest[0]); /* R */
  1077.       VEC4_ADD(quadColor[1], source[1], dest[1]); /* G */
  1078.       VEC4_ADD(quadColor[2], source[2], dest[2]); /* B */
  1079.       VEC4_ADD(quadColor[3], source[3], dest[3]); /* A */
  1080.  
  1081.       /* If fixed-point dest color buffer, need to clamp the outgoing
  1082.        * fragment colors now.
  1083.        */
  1084.       if (bqs->clamp[0]) {
  1085.          clamp_colors(quadColor);
  1086.       }
  1087.  
  1088.       rebase_colors(bqs->base_format[0], quadColor);
  1089.  
  1090.       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
  1091.          if (quad->inout.mask & (1 << j)) {
  1092.             int x = itx + (j & 1);
  1093.             int y = ity + (j >> 1);
  1094.             for (i = 0; i < 4; i++) { /* loop over color chans */
  1095.                tile->data.color[y][x][i] = quadColor[i][j];
  1096.             }
  1097.          }
  1098.       }
  1099.    }
  1100. }
  1101.  
  1102. static void
  1103. blend_single_add_one_one(struct quad_stage *qs,
  1104.                          struct quad_header *quads[],
  1105.                          unsigned nr)
  1106. {
  1107.    const struct blend_quad_stage *bqs = blend_quad_stage(qs);
  1108.    float dest[4][TGSI_QUAD_SIZE];
  1109.    uint i, j, q;
  1110.  
  1111.    struct softpipe_cached_tile *tile
  1112.       = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
  1113.                            quads[0]->input.x0,
  1114.                            quads[0]->input.y0);
  1115.  
  1116.    for (q = 0; q < nr; q++) {
  1117.       struct quad_header *quad = quads[q];
  1118.       float (*quadColor)[4] = quad->output.color[0];
  1119.       const int itx = (quad->input.x0 & (TILE_SIZE-1));
  1120.       const int ity = (quad->input.y0 & (TILE_SIZE-1));
  1121.      
  1122.       /* get/swizzle dest colors */
  1123.       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
  1124.          int x = itx + (j & 1);
  1125.          int y = ity + (j >> 1);
  1126.          for (i = 0; i < 4; i++) {
  1127.             dest[i][j] = tile->data.color[y][x][i];
  1128.          }
  1129.       }
  1130.      
  1131.       /* If fixed-point dest color buffer, need to clamp the incoming
  1132.        * fragment colors now.
  1133.        */
  1134.       if (bqs->clamp[0] || qs->softpipe->rasterizer->clamp_fragment_color) {
  1135.          clamp_colors(quadColor);
  1136.       }
  1137.  
  1138.       VEC4_ADD(quadColor[0], quadColor[0], dest[0]); /* R */
  1139.       VEC4_ADD(quadColor[1], quadColor[1], dest[1]); /* G */
  1140.       VEC4_ADD(quadColor[2], quadColor[2], dest[2]); /* B */
  1141.       VEC4_ADD(quadColor[3], quadColor[3], dest[3]); /* A */
  1142.  
  1143.       /* If fixed-point dest color buffer, need to clamp the outgoing
  1144.        * fragment colors now.
  1145.        */
  1146.       if (bqs->clamp[0]) {
  1147.          clamp_colors(quadColor);
  1148.       }
  1149.  
  1150.       rebase_colors(bqs->base_format[0], quadColor);
  1151.  
  1152.       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
  1153.          if (quad->inout.mask & (1 << j)) {
  1154.             int x = itx + (j & 1);
  1155.             int y = ity + (j >> 1);
  1156.             for (i = 0; i < 4; i++) { /* loop over color chans */
  1157.                tile->data.color[y][x][i] = quadColor[i][j];
  1158.             }
  1159.          }
  1160.       }
  1161.    }
  1162. }
  1163.  
  1164.  
  1165. /**
  1166.  * Just copy the quad color to the framebuffer tile (respecting the writemask),
  1167.  * for one color buffer.
  1168.  * Clamping will be done, if needed (depending on the color buffer's
  1169.  * datatype) when we write/pack the colors later.
  1170.  */
  1171. static void
  1172. single_output_color(struct quad_stage *qs,
  1173.                     struct quad_header *quads[],
  1174.                     unsigned nr)
  1175. {
  1176.    const struct blend_quad_stage *bqs = blend_quad_stage(qs);
  1177.    uint i, j, q;
  1178.  
  1179.    struct softpipe_cached_tile *tile
  1180.       = sp_get_cached_tile(qs->softpipe->cbuf_cache[0],
  1181.                            quads[0]->input.x0,
  1182.                            quads[0]->input.y0);
  1183.  
  1184.    for (q = 0; q < nr; q++) {
  1185.       struct quad_header *quad = quads[q];
  1186.       float (*quadColor)[4] = quad->output.color[0];
  1187.       const int itx = (quad->input.x0 & (TILE_SIZE-1));
  1188.       const int ity = (quad->input.y0 & (TILE_SIZE-1));
  1189.  
  1190.       if (qs->softpipe->rasterizer->clamp_fragment_color)
  1191.          clamp_colors(quadColor);
  1192.  
  1193.       rebase_colors(bqs->base_format[0], quadColor);
  1194.  
  1195.       for (j = 0; j < TGSI_QUAD_SIZE; j++) {
  1196.          if (quad->inout.mask & (1 << j)) {
  1197.             int x = itx + (j & 1);
  1198.             int y = ity + (j >> 1);
  1199.             for (i = 0; i < 4; i++) { /* loop over color chans */
  1200.                tile->data.color[y][x][i] = quadColor[i][j];
  1201.             }
  1202.          }
  1203.       }
  1204.    }
  1205. }
  1206.  
  1207. static void
  1208. blend_noop(struct quad_stage *qs,
  1209.            struct quad_header *quads[],
  1210.            unsigned nr)
  1211. {
  1212. }
  1213.  
  1214.  
  1215. static void
  1216. choose_blend_quad(struct quad_stage *qs,
  1217.                   struct quad_header *quads[],
  1218.                   unsigned nr)
  1219. {
  1220.    struct blend_quad_stage *bqs = blend_quad_stage(qs);
  1221.    struct softpipe_context *softpipe = qs->softpipe;
  1222.    const struct pipe_blend_state *blend = softpipe->blend;
  1223.    unsigned i;
  1224.  
  1225.    qs->run = blend_fallback;
  1226.    
  1227.    if (softpipe->framebuffer.nr_cbufs == 0) {
  1228.       qs->run = blend_noop;
  1229.    }
  1230.    else if (!softpipe->blend->logicop_enable &&
  1231.             softpipe->blend->rt[0].colormask == 0xf &&
  1232.             softpipe->framebuffer.nr_cbufs == 1)
  1233.    {
  1234.       if (!blend->rt[0].blend_enable) {
  1235.          qs->run = single_output_color;
  1236.       }
  1237.       else if (blend->rt[0].rgb_src_factor == blend->rt[0].alpha_src_factor &&
  1238.                blend->rt[0].rgb_dst_factor == blend->rt[0].alpha_dst_factor &&
  1239.                blend->rt[0].rgb_func == blend->rt[0].alpha_func)
  1240.       {
  1241.          if (blend->rt[0].alpha_func == PIPE_BLEND_ADD) {
  1242.             if (blend->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_ONE &&
  1243.                 blend->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_ONE) {
  1244.                qs->run = blend_single_add_one_one;
  1245.             }
  1246.             else if (blend->rt[0].rgb_src_factor == PIPE_BLENDFACTOR_SRC_ALPHA &&
  1247.                 blend->rt[0].rgb_dst_factor == PIPE_BLENDFACTOR_INV_SRC_ALPHA)
  1248.                qs->run = blend_single_add_src_alpha_inv_src_alpha;
  1249.  
  1250.          }
  1251.       }
  1252.    }
  1253.  
  1254.    /* For each color buffer, determine if the buffer has destination alpha and
  1255.     * whether color clamping is needed.
  1256.     */
  1257.    for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) {
  1258.       const enum pipe_format format = softpipe->framebuffer.cbufs[i]->format;
  1259.       const struct util_format_description *desc =
  1260.          util_format_description(format);
  1261.       /* assuming all or no color channels are normalized: */
  1262.       bqs->clamp[i] = desc->channel[0].normalized;
  1263.       bqs->format_type[i] = desc->channel[0].type;
  1264.  
  1265.       if (util_format_is_intensity(format))
  1266.          bqs->base_format[i] = INTENSITY;
  1267.       else if (util_format_is_luminance(format))
  1268.          bqs->base_format[i] = LUMINANCE;
  1269.       else if (util_format_is_luminance_alpha(format))
  1270.          bqs->base_format[i] = LUMINANCE_ALPHA;
  1271.       else if (!util_format_has_alpha(format))
  1272.          bqs->base_format[i] = RGB;
  1273.       else
  1274.          bqs->base_format[i] = RGBA;
  1275.    }
  1276.  
  1277.    qs->run(qs, quads, nr);
  1278. }
  1279.  
  1280.  
  1281. static void blend_begin(struct quad_stage *qs)
  1282. {
  1283.    qs->run = choose_blend_quad;
  1284. }
  1285.  
  1286.  
  1287. static void blend_destroy(struct quad_stage *qs)
  1288. {
  1289.    FREE( qs );
  1290. }
  1291.  
  1292.  
  1293. struct quad_stage *sp_quad_blend_stage( struct softpipe_context *softpipe )
  1294. {
  1295.    struct blend_quad_stage *stage = CALLOC_STRUCT(blend_quad_stage);
  1296.  
  1297.    if (!stage)
  1298.       return NULL;
  1299.  
  1300.    stage->base.softpipe = softpipe;
  1301.    stage->base.begin = blend_begin;
  1302.    stage->base.run = choose_blend_quad;
  1303.    stage->base.destroy = blend_destroy;
  1304.  
  1305.    return &stage->base;
  1306. }
  1307.