Subversion Repositories Kolibri OS

Rev

Blame | Last modification | View Log | Download | RSS feed

  1. /**************************************************************************
  2.  *
  3.  * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
  4.  * All Rights Reserved.
  5.  * Copyright 2008  VMware, Inc.  All rights reserved.
  6.  *
  7.  * Permission is hereby granted, free of charge, to any person obtaining a
  8.  * copy of this software and associated documentation files (the
  9.  * "Software"), to deal in the Software without restriction, including
  10.  * without limitation the rights to use, copy, modify, merge, publish,
  11.  * distribute, sub license, and/or sell copies of the Software, and to
  12.  * permit persons to whom the Software is furnished to do so, subject to
  13.  * the following conditions:
  14.  *
  15.  * The above copyright notice and this permission notice (including the
  16.  * next paragraph) shall be included in all copies or substantial portions
  17.  * of the Software.
  18.  *
  19.  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  20.  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  21.  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
  22.  * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
  23.  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  24.  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  25.  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
  26.  *
  27.  **************************************************************************/
  28.  
  29. /**
  30.  * @file
  31.  * Mipmap generation utility
  32.  *  
  33.  * @author Brian Paul
  34.  */
  35.  
  36.  
  37. #include "pipe/p_context.h"
  38. #include "util/u_debug.h"
  39. #include "pipe/p_defines.h"
  40. #include "util/u_inlines.h"
  41. #include "pipe/p_shader_tokens.h"
  42. #include "pipe/p_state.h"
  43.  
  44. #include "util/u_format.h"
  45. #include "util/u_memory.h"
  46. #include "util/u_draw_quad.h"
  47. #include "util/u_gen_mipmap.h"
  48. #include "util/u_simple_shaders.h"
  49. #include "util/u_math.h"
  50. #include "util/u_texture.h"
  51. #include "util/u_half.h"
  52. #include "util/u_surface.h"
  53.  
  54. #include "cso_cache/cso_context.h"
  55.  
  56.  
  57. struct gen_mipmap_state
  58. {
  59.    struct pipe_context *pipe;
  60.    struct cso_context *cso;
  61.  
  62.    struct pipe_blend_state blend_keep_color, blend_write_color;
  63.    struct pipe_depth_stencil_alpha_state dsa_keep_depth, dsa_write_depth;
  64.    struct pipe_rasterizer_state rasterizer;
  65.    struct pipe_sampler_state sampler;
  66.    struct pipe_vertex_element velem[2];
  67.  
  68.    void *vs;
  69.  
  70.    /** Not all are used, but simplifies code */
  71.    void *fs_color[TGSI_TEXTURE_COUNT];
  72.    void *fs_depth[TGSI_TEXTURE_COUNT];
  73.  
  74.    struct pipe_resource *vbuf;  /**< quad vertices */
  75.    unsigned vbuf_slot;
  76.  
  77.    float vertices[4][2][4];   /**< vertex/texcoords for quad */
  78. };
  79.  
  80.  
  81.  
  82. enum dtype
  83. {
  84.    DTYPE_UBYTE,
  85.    DTYPE_UBYTE_3_3_2,
  86.    DTYPE_USHORT,
  87.    DTYPE_USHORT_4_4_4_4,
  88.    DTYPE_USHORT_5_6_5,
  89.    DTYPE_USHORT_1_5_5_5_REV,
  90.    DTYPE_UINT,
  91.    DTYPE_FLOAT,
  92.    DTYPE_HALF_FLOAT
  93. };
  94.  
  95.  
  96. typedef uint16_t half_float;
  97.  
  98.  
  99. /**
  100.  * \name Support macros for do_row and do_row_3d
  101.  *
  102.  * The macro madness is here for two reasons.  First, it compacts the code
  103.  * slightly.  Second, it makes it much easier to adjust the specifics of the
  104.  * filter to tune the rounding characteristics.
  105.  */
  106. /*@{*/
  107. #define DECLARE_ROW_POINTERS(t, e) \
  108.       const t(*rowA)[e] = (const t(*)[e]) srcRowA; \
  109.       const t(*rowB)[e] = (const t(*)[e]) srcRowB; \
  110.       const t(*rowC)[e] = (const t(*)[e]) srcRowC; \
  111.       const t(*rowD)[e] = (const t(*)[e]) srcRowD; \
  112.       t(*dst)[e] = (t(*)[e]) dstRow
  113.  
  114. #define DECLARE_ROW_POINTERS0(t) \
  115.       const t *rowA = (const t *) srcRowA; \
  116.       const t *rowB = (const t *) srcRowB; \
  117.       const t *rowC = (const t *) srcRowC; \
  118.       const t *rowD = (const t *) srcRowD; \
  119.       t *dst = (t *) dstRow
  120.  
  121. #define FILTER_SUM_3D(Aj, Ak, Bj, Bk, Cj, Ck, Dj, Dk) \
  122.    ((unsigned) Aj + (unsigned) Ak \
  123.     + (unsigned) Bj + (unsigned) Bk \
  124.     + (unsigned) Cj + (unsigned) Ck \
  125.     + (unsigned) Dj + (unsigned) Dk \
  126.     + 4) >> 3
  127.  
  128. #define FILTER_3D(e) \
  129.    do { \
  130.       dst[i][e] = FILTER_SUM_3D(rowA[j][e], rowA[k][e], \
  131.                                 rowB[j][e], rowB[k][e], \
  132.                                 rowC[j][e], rowC[k][e], \
  133.                                 rowD[j][e], rowD[k][e]); \
  134.    } while(0)
  135.  
  136. #define FILTER_F_3D(e) \
  137.    do { \
  138.       dst[i][e] = (rowA[j][e] + rowA[k][e] \
  139.                    + rowB[j][e] + rowB[k][e] \
  140.                    + rowC[j][e] + rowC[k][e] \
  141.                    + rowD[j][e] + rowD[k][e]) * 0.125F; \
  142.    } while(0)
  143.  
  144. #define FILTER_HF_3D(e) \
  145.    do { \
  146.       const float aj = util_half_to_float(rowA[j][e]); \
  147.       const float ak = util_half_to_float(rowA[k][e]); \
  148.       const float bj = util_half_to_float(rowB[j][e]); \
  149.       const float bk = util_half_to_float(rowB[k][e]); \
  150.       const float cj = util_half_to_float(rowC[j][e]); \
  151.       const float ck = util_half_to_float(rowC[k][e]); \
  152.       const float dj = util_half_to_float(rowD[j][e]); \
  153.       const float dk = util_half_to_float(rowD[k][e]); \
  154.       dst[i][e] = util_float_to_half((aj + ak + bj + bk + cj + ck + dj + dk) \
  155.                                       * 0.125F); \
  156.    } while(0)
  157. /*@}*/
  158.  
  159.  
  160. /**
  161.  * Average together two rows of a source image to produce a single new
  162.  * row in the dest image.  It's legal for the two source rows to point
  163.  * to the same data.  The source width must be equal to either the
  164.  * dest width or two times the dest width.
  165.  * \param datatype  GL_UNSIGNED_BYTE, GL_UNSIGNED_SHORT, GL_FLOAT, etc.
  166.  * \param comps  number of components per pixel (1..4)
  167.  */
  168. static void
  169. do_row(enum dtype datatype, uint comps, int srcWidth,
  170.        const void *srcRowA, const void *srcRowB,
  171.        int dstWidth, void *dstRow)
  172. {
  173.    const uint k0 = (srcWidth == dstWidth) ? 0 : 1;
  174.    const uint colStride = (srcWidth == dstWidth) ? 1 : 2;
  175.  
  176.    assert(comps >= 1);
  177.    assert(comps <= 4);
  178.  
  179.    /* This assertion is no longer valid with non-power-of-2 textures
  180.    assert(srcWidth == dstWidth || srcWidth == 2 * dstWidth);
  181.    */
  182.  
  183.    if (datatype == DTYPE_UBYTE && comps == 4) {
  184.       uint i, j, k;
  185.       const ubyte(*rowA)[4] = (const ubyte(*)[4]) srcRowA;
  186.       const ubyte(*rowB)[4] = (const ubyte(*)[4]) srcRowB;
  187.       ubyte(*dst)[4] = (ubyte(*)[4]) dstRow;
  188.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  189.            i++, j += colStride, k += colStride) {
  190.          dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4;
  191.          dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4;
  192.          dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4;
  193.          dst[i][3] = (rowA[j][3] + rowA[k][3] + rowB[j][3] + rowB[k][3]) / 4;
  194.       }
  195.    }
  196.    else if (datatype == DTYPE_UBYTE && comps == 3) {
  197.       uint i, j, k;
  198.       const ubyte(*rowA)[3] = (const ubyte(*)[3]) srcRowA;
  199.       const ubyte(*rowB)[3] = (const ubyte(*)[3]) srcRowB;
  200.       ubyte(*dst)[3] = (ubyte(*)[3]) dstRow;
  201.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  202.            i++, j += colStride, k += colStride) {
  203.          dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4;
  204.          dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4;
  205.          dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4;
  206.       }
  207.    }
  208.    else if (datatype == DTYPE_UBYTE && comps == 2) {
  209.       uint i, j, k;
  210.       const ubyte(*rowA)[2] = (const ubyte(*)[2]) srcRowA;
  211.       const ubyte(*rowB)[2] = (const ubyte(*)[2]) srcRowB;
  212.       ubyte(*dst)[2] = (ubyte(*)[2]) dstRow;
  213.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  214.            i++, j += colStride, k += colStride) {
  215.          dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) >> 2;
  216.          dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) >> 2;
  217.       }
  218.    }
  219.    else if (datatype == DTYPE_UBYTE && comps == 1) {
  220.       uint i, j, k;
  221.       const ubyte *rowA = (const ubyte *) srcRowA;
  222.       const ubyte *rowB = (const ubyte *) srcRowB;
  223.       ubyte *dst = (ubyte *) dstRow;
  224.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  225.            i++, j += colStride, k += colStride) {
  226.          dst[i] = (rowA[j] + rowA[k] + rowB[j] + rowB[k]) >> 2;
  227.       }
  228.    }
  229.  
  230.    else if (datatype == DTYPE_USHORT && comps == 4) {
  231.       uint i, j, k;
  232.       const ushort(*rowA)[4] = (const ushort(*)[4]) srcRowA;
  233.       const ushort(*rowB)[4] = (const ushort(*)[4]) srcRowB;
  234.       ushort(*dst)[4] = (ushort(*)[4]) dstRow;
  235.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  236.            i++, j += colStride, k += colStride) {
  237.          dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4;
  238.          dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4;
  239.          dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4;
  240.          dst[i][3] = (rowA[j][3] + rowA[k][3] + rowB[j][3] + rowB[k][3]) / 4;
  241.       }
  242.    }
  243.    else if (datatype == DTYPE_USHORT && comps == 3) {
  244.       uint i, j, k;
  245.       const ushort(*rowA)[3] = (const ushort(*)[3]) srcRowA;
  246.       const ushort(*rowB)[3] = (const ushort(*)[3]) srcRowB;
  247.       ushort(*dst)[3] = (ushort(*)[3]) dstRow;
  248.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  249.            i++, j += colStride, k += colStride) {
  250.          dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4;
  251.          dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4;
  252.          dst[i][2] = (rowA[j][2] + rowA[k][2] + rowB[j][2] + rowB[k][2]) / 4;
  253.       }
  254.    }
  255.    else if (datatype == DTYPE_USHORT && comps == 2) {
  256.       uint i, j, k;
  257.       const ushort(*rowA)[2] = (const ushort(*)[2]) srcRowA;
  258.       const ushort(*rowB)[2] = (const ushort(*)[2]) srcRowB;
  259.       ushort(*dst)[2] = (ushort(*)[2]) dstRow;
  260.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  261.            i++, j += colStride, k += colStride) {
  262.          dst[i][0] = (rowA[j][0] + rowA[k][0] + rowB[j][0] + rowB[k][0]) / 4;
  263.          dst[i][1] = (rowA[j][1] + rowA[k][1] + rowB[j][1] + rowB[k][1]) / 4;
  264.       }
  265.    }
  266.    else if (datatype == DTYPE_USHORT && comps == 1) {
  267.       uint i, j, k;
  268.       const ushort *rowA = (const ushort *) srcRowA;
  269.       const ushort *rowB = (const ushort *) srcRowB;
  270.       ushort *dst = (ushort *) dstRow;
  271.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  272.            i++, j += colStride, k += colStride) {
  273.          dst[i] = (rowA[j] + rowA[k] + rowB[j] + rowB[k]) / 4;
  274.       }
  275.    }
  276.  
  277.    else if (datatype == DTYPE_FLOAT && comps == 4) {
  278.       uint i, j, k;
  279.       const float(*rowA)[4] = (const float(*)[4]) srcRowA;
  280.       const float(*rowB)[4] = (const float(*)[4]) srcRowB;
  281.       float(*dst)[4] = (float(*)[4]) dstRow;
  282.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  283.            i++, j += colStride, k += colStride) {
  284.          dst[i][0] = (rowA[j][0] + rowA[k][0] +
  285.                       rowB[j][0] + rowB[k][0]) * 0.25F;
  286.          dst[i][1] = (rowA[j][1] + rowA[k][1] +
  287.                       rowB[j][1] + rowB[k][1]) * 0.25F;
  288.          dst[i][2] = (rowA[j][2] + rowA[k][2] +
  289.                       rowB[j][2] + rowB[k][2]) * 0.25F;
  290.          dst[i][3] = (rowA[j][3] + rowA[k][3] +
  291.                       rowB[j][3] + rowB[k][3]) * 0.25F;
  292.       }
  293.    }
  294.    else if (datatype == DTYPE_FLOAT && comps == 3) {
  295.       uint i, j, k;
  296.       const float(*rowA)[3] = (const float(*)[3]) srcRowA;
  297.       const float(*rowB)[3] = (const float(*)[3]) srcRowB;
  298.       float(*dst)[3] = (float(*)[3]) dstRow;
  299.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  300.            i++, j += colStride, k += colStride) {
  301.          dst[i][0] = (rowA[j][0] + rowA[k][0] +
  302.                       rowB[j][0] + rowB[k][0]) * 0.25F;
  303.          dst[i][1] = (rowA[j][1] + rowA[k][1] +
  304.                       rowB[j][1] + rowB[k][1]) * 0.25F;
  305.          dst[i][2] = (rowA[j][2] + rowA[k][2] +
  306.                       rowB[j][2] + rowB[k][2]) * 0.25F;
  307.       }
  308.    }
  309.    else if (datatype == DTYPE_FLOAT && comps == 2) {
  310.       uint i, j, k;
  311.       const float(*rowA)[2] = (const float(*)[2]) srcRowA;
  312.       const float(*rowB)[2] = (const float(*)[2]) srcRowB;
  313.       float(*dst)[2] = (float(*)[2]) dstRow;
  314.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  315.            i++, j += colStride, k += colStride) {
  316.          dst[i][0] = (rowA[j][0] + rowA[k][0] +
  317.                       rowB[j][0] + rowB[k][0]) * 0.25F;
  318.          dst[i][1] = (rowA[j][1] + rowA[k][1] +
  319.                       rowB[j][1] + rowB[k][1]) * 0.25F;
  320.       }
  321.    }
  322.    else if (datatype == DTYPE_FLOAT && comps == 1) {
  323.       uint i, j, k;
  324.       const float *rowA = (const float *) srcRowA;
  325.       const float *rowB = (const float *) srcRowB;
  326.       float *dst = (float *) dstRow;
  327.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  328.            i++, j += colStride, k += colStride) {
  329.          dst[i] = (rowA[j] + rowA[k] + rowB[j] + rowB[k]) * 0.25F;
  330.       }
  331.    }
  332.  
  333.    else if (datatype == DTYPE_HALF_FLOAT && comps == 4) {
  334.       uint i, j, k, comp;
  335.       const half_float(*rowA)[4] = (const half_float(*)[4]) srcRowA;
  336.       const half_float(*rowB)[4] = (const half_float(*)[4]) srcRowB;
  337.       half_float(*dst)[4] = (half_float(*)[4]) dstRow;
  338.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  339.            i++, j += colStride, k += colStride) {
  340.          for (comp = 0; comp < 4; comp++) {
  341.             float aj, ak, bj, bk;
  342.             aj = util_half_to_float(rowA[j][comp]);
  343.             ak = util_half_to_float(rowA[k][comp]);
  344.             bj = util_half_to_float(rowB[j][comp]);
  345.             bk = util_half_to_float(rowB[k][comp]);
  346.             dst[i][comp] = util_float_to_half((aj + ak + bj + bk) * 0.25F);
  347.          }
  348.       }
  349.    }
  350.    else if (datatype == DTYPE_HALF_FLOAT && comps == 3) {
  351.       uint i, j, k, comp;
  352.       const half_float(*rowA)[3] = (const half_float(*)[3]) srcRowA;
  353.       const half_float(*rowB)[3] = (const half_float(*)[3]) srcRowB;
  354.       half_float(*dst)[3] = (half_float(*)[3]) dstRow;
  355.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  356.            i++, j += colStride, k += colStride) {
  357.          for (comp = 0; comp < 3; comp++) {
  358.             float aj, ak, bj, bk;
  359.             aj = util_half_to_float(rowA[j][comp]);
  360.             ak = util_half_to_float(rowA[k][comp]);
  361.             bj = util_half_to_float(rowB[j][comp]);
  362.             bk = util_half_to_float(rowB[k][comp]);
  363.             dst[i][comp] = util_float_to_half((aj + ak + bj + bk) * 0.25F);
  364.          }
  365.       }
  366.    }
  367.    else if (datatype == DTYPE_HALF_FLOAT && comps == 2) {
  368.       uint i, j, k, comp;
  369.       const half_float(*rowA)[2] = (const half_float(*)[2]) srcRowA;
  370.       const half_float(*rowB)[2] = (const half_float(*)[2]) srcRowB;
  371.       half_float(*dst)[2] = (half_float(*)[2]) dstRow;
  372.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  373.            i++, j += colStride, k += colStride) {
  374.          for (comp = 0; comp < 2; comp++) {
  375.             float aj, ak, bj, bk;
  376.             aj = util_half_to_float(rowA[j][comp]);
  377.             ak = util_half_to_float(rowA[k][comp]);
  378.             bj = util_half_to_float(rowB[j][comp]);
  379.             bk = util_half_to_float(rowB[k][comp]);
  380.             dst[i][comp] = util_float_to_half((aj + ak + bj + bk) * 0.25F);
  381.          }
  382.       }
  383.    }
  384.    else if (datatype == DTYPE_HALF_FLOAT && comps == 1) {
  385.       uint i, j, k;
  386.       const half_float *rowA = (const half_float *) srcRowA;
  387.       const half_float *rowB = (const half_float *) srcRowB;
  388.       half_float *dst = (half_float *) dstRow;
  389.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  390.            i++, j += colStride, k += colStride) {
  391.          float aj, ak, bj, bk;
  392.          aj = util_half_to_float(rowA[j]);
  393.          ak = util_half_to_float(rowA[k]);
  394.          bj = util_half_to_float(rowB[j]);
  395.          bk = util_half_to_float(rowB[k]);
  396.          dst[i] = util_float_to_half((aj + ak + bj + bk) * 0.25F);
  397.       }
  398.    }
  399.  
  400.    else if (datatype == DTYPE_UINT && comps == 1) {
  401.       uint i, j, k;
  402.       const uint *rowA = (const uint *) srcRowA;
  403.       const uint *rowB = (const uint *) srcRowB;
  404.       uint *dst = (uint *) dstRow;
  405.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  406.            i++, j += colStride, k += colStride) {
  407.          dst[i] = rowA[j] / 4 + rowA[k] / 4 + rowB[j] / 4 + rowB[k] / 4;
  408.       }
  409.    }
  410.  
  411.    else if (datatype == DTYPE_USHORT_5_6_5 && comps == 3) {
  412.       uint i, j, k;
  413.       const ushort *rowA = (const ushort *) srcRowA;
  414.       const ushort *rowB = (const ushort *) srcRowB;
  415.       ushort *dst = (ushort *) dstRow;
  416.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  417.            i++, j += colStride, k += colStride) {
  418.          const int rowAr0 = rowA[j] & 0x1f;
  419.          const int rowAr1 = rowA[k] & 0x1f;
  420.          const int rowBr0 = rowB[j] & 0x1f;
  421.          const int rowBr1 = rowB[k] & 0x1f;
  422.          const int rowAg0 = (rowA[j] >> 5) & 0x3f;
  423.          const int rowAg1 = (rowA[k] >> 5) & 0x3f;
  424.          const int rowBg0 = (rowB[j] >> 5) & 0x3f;
  425.          const int rowBg1 = (rowB[k] >> 5) & 0x3f;
  426.          const int rowAb0 = (rowA[j] >> 11) & 0x1f;
  427.          const int rowAb1 = (rowA[k] >> 11) & 0x1f;
  428.          const int rowBb0 = (rowB[j] >> 11) & 0x1f;
  429.          const int rowBb1 = (rowB[k] >> 11) & 0x1f;
  430.          const int red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2;
  431.          const int green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2;
  432.          const int blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2;
  433.          dst[i] = (blue << 11) | (green << 5) | red;
  434.       }
  435.    }
  436.    else if (datatype == DTYPE_USHORT_4_4_4_4 && comps == 4) {
  437.       uint i, j, k;
  438.       const ushort *rowA = (const ushort *) srcRowA;
  439.       const ushort *rowB = (const ushort *) srcRowB;
  440.       ushort *dst = (ushort *) dstRow;
  441.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  442.            i++, j += colStride, k += colStride) {
  443.          const int rowAr0 = rowA[j] & 0xf;
  444.          const int rowAr1 = rowA[k] & 0xf;
  445.          const int rowBr0 = rowB[j] & 0xf;
  446.          const int rowBr1 = rowB[k] & 0xf;
  447.          const int rowAg0 = (rowA[j] >> 4) & 0xf;
  448.          const int rowAg1 = (rowA[k] >> 4) & 0xf;
  449.          const int rowBg0 = (rowB[j] >> 4) & 0xf;
  450.          const int rowBg1 = (rowB[k] >> 4) & 0xf;
  451.          const int rowAb0 = (rowA[j] >> 8) & 0xf;
  452.          const int rowAb1 = (rowA[k] >> 8) & 0xf;
  453.          const int rowBb0 = (rowB[j] >> 8) & 0xf;
  454.          const int rowBb1 = (rowB[k] >> 8) & 0xf;
  455.          const int rowAa0 = (rowA[j] >> 12) & 0xf;
  456.          const int rowAa1 = (rowA[k] >> 12) & 0xf;
  457.          const int rowBa0 = (rowB[j] >> 12) & 0xf;
  458.          const int rowBa1 = (rowB[k] >> 12) & 0xf;
  459.          const int red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2;
  460.          const int green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2;
  461.          const int blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2;
  462.          const int alpha = (rowAa0 + rowAa1 + rowBa0 + rowBa1) >> 2;
  463.          dst[i] = (alpha << 12) | (blue << 8) | (green << 4) | red;
  464.       }
  465.    }
  466.    else if (datatype == DTYPE_USHORT_1_5_5_5_REV && comps == 4) {
  467.       uint i, j, k;
  468.       const ushort *rowA = (const ushort *) srcRowA;
  469.       const ushort *rowB = (const ushort *) srcRowB;
  470.       ushort *dst = (ushort *) dstRow;
  471.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  472.            i++, j += colStride, k += colStride) {
  473.          const int rowAr0 = rowA[j] & 0x1f;
  474.          const int rowAr1 = rowA[k] & 0x1f;
  475.          const int rowBr0 = rowB[j] & 0x1f;
  476.          const int rowBr1 = rowB[k] & 0x1f;
  477.          const int rowAg0 = (rowA[j] >> 5) & 0x1f;
  478.          const int rowAg1 = (rowA[k] >> 5) & 0x1f;
  479.          const int rowBg0 = (rowB[j] >> 5) & 0x1f;
  480.          const int rowBg1 = (rowB[k] >> 5) & 0x1f;
  481.          const int rowAb0 = (rowA[j] >> 10) & 0x1f;
  482.          const int rowAb1 = (rowA[k] >> 10) & 0x1f;
  483.          const int rowBb0 = (rowB[j] >> 10) & 0x1f;
  484.          const int rowBb1 = (rowB[k] >> 10) & 0x1f;
  485.          const int rowAa0 = (rowA[j] >> 15) & 0x1;
  486.          const int rowAa1 = (rowA[k] >> 15) & 0x1;
  487.          const int rowBa0 = (rowB[j] >> 15) & 0x1;
  488.          const int rowBa1 = (rowB[k] >> 15) & 0x1;
  489.          const int red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2;
  490.          const int green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2;
  491.          const int blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2;
  492.          const int alpha = (rowAa0 + rowAa1 + rowBa0 + rowBa1) >> 2;
  493.          dst[i] = (alpha << 15) | (blue << 10) | (green << 5) | red;
  494.       }
  495.    }
  496.    else if (datatype == DTYPE_UBYTE_3_3_2 && comps == 3) {
  497.       uint i, j, k;
  498.       const ubyte *rowA = (const ubyte *) srcRowA;
  499.       const ubyte *rowB = (const ubyte *) srcRowB;
  500.       ubyte *dst = (ubyte *) dstRow;
  501.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  502.            i++, j += colStride, k += colStride) {
  503.          const int rowAr0 = rowA[j] & 0x3;
  504.          const int rowAr1 = rowA[k] & 0x3;
  505.          const int rowBr0 = rowB[j] & 0x3;
  506.          const int rowBr1 = rowB[k] & 0x3;
  507.          const int rowAg0 = (rowA[j] >> 2) & 0x7;
  508.          const int rowAg1 = (rowA[k] >> 2) & 0x7;
  509.          const int rowBg0 = (rowB[j] >> 2) & 0x7;
  510.          const int rowBg1 = (rowB[k] >> 2) & 0x7;
  511.          const int rowAb0 = (rowA[j] >> 5) & 0x7;
  512.          const int rowAb1 = (rowA[k] >> 5) & 0x7;
  513.          const int rowBb0 = (rowB[j] >> 5) & 0x7;
  514.          const int rowBb1 = (rowB[k] >> 5) & 0x7;
  515.          const int red = (rowAr0 + rowAr1 + rowBr0 + rowBr1) >> 2;
  516.          const int green = (rowAg0 + rowAg1 + rowBg0 + rowBg1) >> 2;
  517.          const int blue = (rowAb0 + rowAb1 + rowBb0 + rowBb1) >> 2;
  518.          dst[i] = (blue << 5) | (green << 2) | red;
  519.       }
  520.    }
  521.    else {
  522.       debug_printf("bad format in do_row()");
  523.    }
  524. }
  525.  
  526.  
  527. /**
  528.  * Average together four rows of a source image to produce a single new
  529.  * row in the dest image.  It's legal for the two source rows to point
  530.  * to the same data.  The source width must be equal to either the
  531.  * dest width or two times the dest width.
  532.  *
  533.  * \param datatype  GL pixel type \c GL_UNSIGNED_BYTE, \c GL_UNSIGNED_SHORT,
  534.  *                  \c GL_FLOAT, etc.
  535.  * \param comps     number of components per pixel (1..4)
  536.  * \param srcWidth  Width of a row in the source data
  537.  * \param srcRowA   Pointer to one of the rows of source data
  538.  * \param srcRowB   Pointer to one of the rows of source data
  539.  * \param srcRowC   Pointer to one of the rows of source data
  540.  * \param srcRowD   Pointer to one of the rows of source data
  541.  * \param dstWidth  Width of a row in the destination data
  542.  * \param srcRowA   Pointer to the row of destination data
  543.  */
  544. static void
  545. do_row_3D(enum dtype datatype, uint comps, int srcWidth,
  546.           const void *srcRowA, const void *srcRowB,
  547.           const void *srcRowC, const void *srcRowD,
  548.           int dstWidth, void *dstRow)
  549. {
  550.    const uint k0 = (srcWidth == dstWidth) ? 0 : 1;
  551.    const uint colStride = (srcWidth == dstWidth) ? 1 : 2;
  552.    uint i, j, k;
  553.  
  554.    assert(comps >= 1);
  555.    assert(comps <= 4);
  556.  
  557.    if ((datatype == DTYPE_UBYTE) && (comps == 4)) {
  558.       DECLARE_ROW_POINTERS(ubyte, 4);
  559.  
  560.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  561.            i++, j += colStride, k += colStride) {
  562.          FILTER_3D(0);
  563.          FILTER_3D(1);
  564.          FILTER_3D(2);
  565.          FILTER_3D(3);
  566.       }
  567.    }
  568.    else if ((datatype == DTYPE_UBYTE) && (comps == 3)) {
  569.       DECLARE_ROW_POINTERS(ubyte, 3);
  570.  
  571.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  572.            i++, j += colStride, k += colStride) {
  573.          FILTER_3D(0);
  574.          FILTER_3D(1);
  575.          FILTER_3D(2);
  576.       }
  577.    }
  578.    else if ((datatype == DTYPE_UBYTE) && (comps == 2)) {
  579.       DECLARE_ROW_POINTERS(ubyte, 2);
  580.  
  581.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  582.            i++, j += colStride, k += colStride) {
  583.          FILTER_3D(0);
  584.          FILTER_3D(1);
  585.       }
  586.    }
  587.    else if ((datatype == DTYPE_UBYTE) && (comps == 1)) {
  588.       DECLARE_ROW_POINTERS(ubyte, 1);
  589.  
  590.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  591.            i++, j += colStride, k += colStride) {
  592.          FILTER_3D(0);
  593.       }
  594.    }
  595.    else if ((datatype == DTYPE_USHORT) && (comps == 4)) {
  596.       DECLARE_ROW_POINTERS(ushort, 4);
  597.  
  598.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  599.            i++, j += colStride, k += colStride) {
  600.          FILTER_3D(0);
  601.          FILTER_3D(1);
  602.          FILTER_3D(2);
  603.          FILTER_3D(3);
  604.       }
  605.    }
  606.    else if ((datatype == DTYPE_USHORT) && (comps == 3)) {
  607.       DECLARE_ROW_POINTERS(ushort, 3);
  608.  
  609.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  610.            i++, j += colStride, k += colStride) {
  611.          FILTER_3D(0);
  612.          FILTER_3D(1);
  613.          FILTER_3D(2);
  614.       }
  615.    }
  616.    else if ((datatype == DTYPE_USHORT) && (comps == 2)) {
  617.       DECLARE_ROW_POINTERS(ushort, 2);
  618.  
  619.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  620.            i++, j += colStride, k += colStride) {
  621.          FILTER_3D(0);
  622.          FILTER_3D(1);
  623.       }
  624.    }
  625.    else if ((datatype == DTYPE_USHORT) && (comps == 1)) {
  626.       DECLARE_ROW_POINTERS(ushort, 1);
  627.  
  628.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  629.            i++, j += colStride, k += colStride) {
  630.          FILTER_3D(0);
  631.       }
  632.    }
  633.    else if ((datatype == DTYPE_FLOAT) && (comps == 4)) {
  634.       DECLARE_ROW_POINTERS(float, 4);
  635.  
  636.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  637.            i++, j += colStride, k += colStride) {
  638.          FILTER_F_3D(0);
  639.          FILTER_F_3D(1);
  640.          FILTER_F_3D(2);
  641.          FILTER_F_3D(3);
  642.       }
  643.    }
  644.    else if ((datatype == DTYPE_FLOAT) && (comps == 3)) {
  645.       DECLARE_ROW_POINTERS(float, 3);
  646.  
  647.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  648.            i++, j += colStride, k += colStride) {
  649.          FILTER_F_3D(0);
  650.          FILTER_F_3D(1);
  651.          FILTER_F_3D(2);
  652.       }
  653.    }
  654.    else if ((datatype == DTYPE_FLOAT) && (comps == 2)) {
  655.       DECLARE_ROW_POINTERS(float, 2);
  656.  
  657.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  658.            i++, j += colStride, k += colStride) {
  659.          FILTER_F_3D(0);
  660.          FILTER_F_3D(1);
  661.       }
  662.    }
  663.    else if ((datatype == DTYPE_FLOAT) && (comps == 1)) {
  664.       DECLARE_ROW_POINTERS(float, 1);
  665.  
  666.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  667.            i++, j += colStride, k += colStride) {
  668.          FILTER_F_3D(0);
  669.       }
  670.    }
  671.    else if ((datatype == DTYPE_HALF_FLOAT) && (comps == 4)) {
  672.       DECLARE_ROW_POINTERS(half_float, 4);
  673.  
  674.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  675.            i++, j += colStride, k += colStride) {
  676.          FILTER_HF_3D(0);
  677.          FILTER_HF_3D(1);
  678.          FILTER_HF_3D(2);
  679.          FILTER_HF_3D(3);
  680.       }
  681.    }
  682.    else if ((datatype == DTYPE_HALF_FLOAT) && (comps == 3)) {
  683.       DECLARE_ROW_POINTERS(half_float, 4);
  684.  
  685.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  686.            i++, j += colStride, k += colStride) {
  687.          FILTER_HF_3D(0);
  688.          FILTER_HF_3D(1);
  689.          FILTER_HF_3D(2);
  690.       }
  691.    }
  692.    else if ((datatype == DTYPE_HALF_FLOAT) && (comps == 2)) {
  693.       DECLARE_ROW_POINTERS(half_float, 4);
  694.  
  695.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  696.            i++, j += colStride, k += colStride) {
  697.          FILTER_HF_3D(0);
  698.          FILTER_HF_3D(1);
  699.       }
  700.    }
  701.    else if ((datatype == DTYPE_HALF_FLOAT) && (comps == 1)) {
  702.       DECLARE_ROW_POINTERS(half_float, 4);
  703.  
  704.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  705.            i++, j += colStride, k += colStride) {
  706.          FILTER_HF_3D(0);
  707.       }
  708.    }
  709.    else if ((datatype == DTYPE_UINT) && (comps == 1)) {
  710.       const uint *rowA = (const uint *) srcRowA;
  711.       const uint *rowB = (const uint *) srcRowB;
  712.       const uint *rowC = (const uint *) srcRowC;
  713.       const uint *rowD = (const uint *) srcRowD;
  714.       float *dst = (float *) dstRow;
  715.  
  716.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  717.            i++, j += colStride, k += colStride) {
  718.          const uint64_t tmp = (((uint64_t) rowA[j] + (uint64_t) rowA[k])
  719.                                + ((uint64_t) rowB[j] + (uint64_t) rowB[k])
  720.                                + ((uint64_t) rowC[j] + (uint64_t) rowC[k])
  721.                                + ((uint64_t) rowD[j] + (uint64_t) rowD[k]));
  722.          dst[i] = (float)((double) tmp * 0.125);
  723.       }
  724.    }
  725.    else if ((datatype == DTYPE_USHORT_5_6_5) && (comps == 3)) {
  726.       DECLARE_ROW_POINTERS0(ushort);
  727.  
  728.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  729.            i++, j += colStride, k += colStride) {
  730.          const int rowAr0 = rowA[j] & 0x1f;
  731.          const int rowAr1 = rowA[k] & 0x1f;
  732.          const int rowBr0 = rowB[j] & 0x1f;
  733.          const int rowBr1 = rowB[k] & 0x1f;
  734.          const int rowCr0 = rowC[j] & 0x1f;
  735.          const int rowCr1 = rowC[k] & 0x1f;
  736.          const int rowDr0 = rowD[j] & 0x1f;
  737.          const int rowDr1 = rowD[k] & 0x1f;
  738.          const int rowAg0 = (rowA[j] >> 5) & 0x3f;
  739.          const int rowAg1 = (rowA[k] >> 5) & 0x3f;
  740.          const int rowBg0 = (rowB[j] >> 5) & 0x3f;
  741.          const int rowBg1 = (rowB[k] >> 5) & 0x3f;
  742.          const int rowCg0 = (rowC[j] >> 5) & 0x3f;
  743.          const int rowCg1 = (rowC[k] >> 5) & 0x3f;
  744.          const int rowDg0 = (rowD[j] >> 5) & 0x3f;
  745.          const int rowDg1 = (rowD[k] >> 5) & 0x3f;
  746.          const int rowAb0 = (rowA[j] >> 11) & 0x1f;
  747.          const int rowAb1 = (rowA[k] >> 11) & 0x1f;
  748.          const int rowBb0 = (rowB[j] >> 11) & 0x1f;
  749.          const int rowBb1 = (rowB[k] >> 11) & 0x1f;
  750.          const int rowCb0 = (rowC[j] >> 11) & 0x1f;
  751.          const int rowCb1 = (rowC[k] >> 11) & 0x1f;
  752.          const int rowDb0 = (rowD[j] >> 11) & 0x1f;
  753.          const int rowDb1 = (rowD[k] >> 11) & 0x1f;
  754.          const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1,
  755.                                        rowCr0, rowCr1, rowDr0, rowDr1);
  756.          const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1,
  757.                                        rowCg0, rowCg1, rowDg0, rowDg1);
  758.          const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1,
  759.                                        rowCb0, rowCb1, rowDb0, rowDb1);
  760.          dst[i] = (b << 11) | (g << 5) | r;
  761.       }
  762.    }
  763.    else if ((datatype == DTYPE_USHORT_4_4_4_4) && (comps == 4)) {
  764.       DECLARE_ROW_POINTERS0(ushort);
  765.  
  766.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  767.            i++, j += colStride, k += colStride) {
  768.          const int rowAr0 = rowA[j] & 0xf;
  769.          const int rowAr1 = rowA[k] & 0xf;
  770.          const int rowBr0 = rowB[j] & 0xf;
  771.          const int rowBr1 = rowB[k] & 0xf;
  772.          const int rowCr0 = rowC[j] & 0xf;
  773.          const int rowCr1 = rowC[k] & 0xf;
  774.          const int rowDr0 = rowD[j] & 0xf;
  775.          const int rowDr1 = rowD[k] & 0xf;
  776.          const int rowAg0 = (rowA[j] >> 4) & 0xf;
  777.          const int rowAg1 = (rowA[k] >> 4) & 0xf;
  778.          const int rowBg0 = (rowB[j] >> 4) & 0xf;
  779.          const int rowBg1 = (rowB[k] >> 4) & 0xf;
  780.          const int rowCg0 = (rowC[j] >> 4) & 0xf;
  781.          const int rowCg1 = (rowC[k] >> 4) & 0xf;
  782.          const int rowDg0 = (rowD[j] >> 4) & 0xf;
  783.          const int rowDg1 = (rowD[k] >> 4) & 0xf;
  784.          const int rowAb0 = (rowA[j] >> 8) & 0xf;
  785.          const int rowAb1 = (rowA[k] >> 8) & 0xf;
  786.          const int rowBb0 = (rowB[j] >> 8) & 0xf;
  787.          const int rowBb1 = (rowB[k] >> 8) & 0xf;
  788.          const int rowCb0 = (rowC[j] >> 8) & 0xf;
  789.          const int rowCb1 = (rowC[k] >> 8) & 0xf;
  790.          const int rowDb0 = (rowD[j] >> 8) & 0xf;
  791.          const int rowDb1 = (rowD[k] >> 8) & 0xf;
  792.          const int rowAa0 = (rowA[j] >> 12) & 0xf;
  793.          const int rowAa1 = (rowA[k] >> 12) & 0xf;
  794.          const int rowBa0 = (rowB[j] >> 12) & 0xf;
  795.          const int rowBa1 = (rowB[k] >> 12) & 0xf;
  796.          const int rowCa0 = (rowC[j] >> 12) & 0xf;
  797.          const int rowCa1 = (rowC[k] >> 12) & 0xf;
  798.          const int rowDa0 = (rowD[j] >> 12) & 0xf;
  799.          const int rowDa1 = (rowD[k] >> 12) & 0xf;
  800.          const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1,
  801.                                        rowCr0, rowCr1, rowDr0, rowDr1);
  802.          const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1,
  803.                                        rowCg0, rowCg1, rowDg0, rowDg1);
  804.          const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1,
  805.                                        rowCb0, rowCb1, rowDb0, rowDb1);
  806.          const int a = FILTER_SUM_3D(rowAa0, rowAa1, rowBa0, rowBa1,
  807.                                        rowCa0, rowCa1, rowDa0, rowDa1);
  808.  
  809.          dst[i] = (a << 12) | (b << 8) | (g << 4) | r;
  810.       }
  811.    }
  812.    else if ((datatype == DTYPE_USHORT_1_5_5_5_REV) && (comps == 4)) {
  813.       DECLARE_ROW_POINTERS0(ushort);
  814.  
  815.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  816.            i++, j += colStride, k += colStride) {
  817.          const int rowAr0 = rowA[j] & 0x1f;
  818.          const int rowAr1 = rowA[k] & 0x1f;
  819.          const int rowBr0 = rowB[j] & 0x1f;
  820.          const int rowBr1 = rowB[k] & 0x1f;
  821.          const int rowCr0 = rowC[j] & 0x1f;
  822.          const int rowCr1 = rowC[k] & 0x1f;
  823.          const int rowDr0 = rowD[j] & 0x1f;
  824.          const int rowDr1 = rowD[k] & 0x1f;
  825.          const int rowAg0 = (rowA[j] >> 5) & 0x1f;
  826.          const int rowAg1 = (rowA[k] >> 5) & 0x1f;
  827.          const int rowBg0 = (rowB[j] >> 5) & 0x1f;
  828.          const int rowBg1 = (rowB[k] >> 5) & 0x1f;
  829.          const int rowCg0 = (rowC[j] >> 5) & 0x1f;
  830.          const int rowCg1 = (rowC[k] >> 5) & 0x1f;
  831.          const int rowDg0 = (rowD[j] >> 5) & 0x1f;
  832.          const int rowDg1 = (rowD[k] >> 5) & 0x1f;
  833.          const int rowAb0 = (rowA[j] >> 10) & 0x1f;
  834.          const int rowAb1 = (rowA[k] >> 10) & 0x1f;
  835.          const int rowBb0 = (rowB[j] >> 10) & 0x1f;
  836.          const int rowBb1 = (rowB[k] >> 10) & 0x1f;
  837.          const int rowCb0 = (rowC[j] >> 10) & 0x1f;
  838.          const int rowCb1 = (rowC[k] >> 10) & 0x1f;
  839.          const int rowDb0 = (rowD[j] >> 10) & 0x1f;
  840.          const int rowDb1 = (rowD[k] >> 10) & 0x1f;
  841.          const int rowAa0 = (rowA[j] >> 15) & 0x1;
  842.          const int rowAa1 = (rowA[k] >> 15) & 0x1;
  843.          const int rowBa0 = (rowB[j] >> 15) & 0x1;
  844.          const int rowBa1 = (rowB[k] >> 15) & 0x1;
  845.          const int rowCa0 = (rowC[j] >> 15) & 0x1;
  846.          const int rowCa1 = (rowC[k] >> 15) & 0x1;
  847.          const int rowDa0 = (rowD[j] >> 15) & 0x1;
  848.          const int rowDa1 = (rowD[k] >> 15) & 0x1;
  849.          const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1,
  850.                                        rowCr0, rowCr1, rowDr0, rowDr1);
  851.          const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1,
  852.                                        rowCg0, rowCg1, rowDg0, rowDg1);
  853.          const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1,
  854.                                        rowCb0, rowCb1, rowDb0, rowDb1);
  855.          const int a = FILTER_SUM_3D(rowAa0, rowAa1, rowBa0, rowBa1,
  856.                                        rowCa0, rowCa1, rowDa0, rowDa1);
  857.  
  858.          dst[i] = (a << 15) | (b << 10) | (g << 5) | r;
  859.       }
  860.    }
  861.    else if ((datatype == DTYPE_UBYTE_3_3_2) && (comps == 3)) {
  862.       DECLARE_ROW_POINTERS0(ushort);
  863.  
  864.       for (i = j = 0, k = k0; i < (uint) dstWidth;
  865.            i++, j += colStride, k += colStride) {
  866.          const int rowAr0 = rowA[j] & 0x3;
  867.          const int rowAr1 = rowA[k] & 0x3;
  868.          const int rowBr0 = rowB[j] & 0x3;
  869.          const int rowBr1 = rowB[k] & 0x3;
  870.          const int rowCr0 = rowC[j] & 0x3;
  871.          const int rowCr1 = rowC[k] & 0x3;
  872.          const int rowDr0 = rowD[j] & 0x3;
  873.          const int rowDr1 = rowD[k] & 0x3;
  874.          const int rowAg0 = (rowA[j] >> 2) & 0x7;
  875.          const int rowAg1 = (rowA[k] >> 2) & 0x7;
  876.          const int rowBg0 = (rowB[j] >> 2) & 0x7;
  877.          const int rowBg1 = (rowB[k] >> 2) & 0x7;
  878.          const int rowCg0 = (rowC[j] >> 2) & 0x7;
  879.          const int rowCg1 = (rowC[k] >> 2) & 0x7;
  880.          const int rowDg0 = (rowD[j] >> 2) & 0x7;
  881.          const int rowDg1 = (rowD[k] >> 2) & 0x7;
  882.          const int rowAb0 = (rowA[j] >> 5) & 0x7;
  883.          const int rowAb1 = (rowA[k] >> 5) & 0x7;
  884.          const int rowBb0 = (rowB[j] >> 5) & 0x7;
  885.          const int rowBb1 = (rowB[k] >> 5) & 0x7;
  886.          const int rowCb0 = (rowC[j] >> 5) & 0x7;
  887.          const int rowCb1 = (rowC[k] >> 5) & 0x7;
  888.          const int rowDb0 = (rowD[j] >> 5) & 0x7;
  889.          const int rowDb1 = (rowD[k] >> 5) & 0x7;
  890.          const int r = FILTER_SUM_3D(rowAr0, rowAr1, rowBr0, rowBr1,
  891.                                        rowCr0, rowCr1, rowDr0, rowDr1);
  892.          const int g = FILTER_SUM_3D(rowAg0, rowAg1, rowBg0, rowBg1,
  893.                                        rowCg0, rowCg1, rowDg0, rowDg1);
  894.          const int b = FILTER_SUM_3D(rowAb0, rowAb1, rowBb0, rowBb1,
  895.                                        rowCb0, rowCb1, rowDb0, rowDb1);
  896.          dst[i] = (b << 5) | (g << 2) | r;
  897.       }
  898.    }
  899.    else {
  900.       debug_printf("bad format in do_row_3D()");
  901.    }
  902. }
  903.  
  904.  
  905.  
  906. static void
  907. format_to_type_comps(enum pipe_format pformat,
  908.                      enum dtype *datatype, uint *comps)
  909. {
  910.    /* XXX I think this could be implemented in terms of the pf_*() functions */
  911.    switch (pformat) {
  912.    case PIPE_FORMAT_B8G8R8A8_UNORM:
  913.    case PIPE_FORMAT_B8G8R8X8_UNORM:
  914.    case PIPE_FORMAT_A8R8G8B8_UNORM:
  915.    case PIPE_FORMAT_X8R8G8B8_UNORM:
  916.    case PIPE_FORMAT_A8B8G8R8_SRGB:
  917.    case PIPE_FORMAT_X8B8G8R8_SRGB:
  918.    case PIPE_FORMAT_B8G8R8A8_SRGB:
  919.    case PIPE_FORMAT_B8G8R8X8_SRGB:
  920.    case PIPE_FORMAT_A8R8G8B8_SRGB:
  921.    case PIPE_FORMAT_X8R8G8B8_SRGB:
  922.    case PIPE_FORMAT_R8G8B8_SRGB:
  923.       *datatype = DTYPE_UBYTE;
  924.       *comps = 4;
  925.       return;
  926.    case PIPE_FORMAT_B5G5R5X1_UNORM:
  927.    case PIPE_FORMAT_B5G5R5A1_UNORM:
  928.       *datatype = DTYPE_USHORT_1_5_5_5_REV;
  929.       *comps = 4;
  930.       return;
  931.    case PIPE_FORMAT_B4G4R4A4_UNORM:
  932.       *datatype = DTYPE_USHORT_4_4_4_4;
  933.       *comps = 4;
  934.       return;
  935.    case PIPE_FORMAT_B5G6R5_UNORM:
  936.       *datatype = DTYPE_USHORT_5_6_5;
  937.       *comps = 3;
  938.       return;
  939.    case PIPE_FORMAT_L8_UNORM:
  940.    case PIPE_FORMAT_L8_SRGB:
  941.    case PIPE_FORMAT_A8_UNORM:
  942.    case PIPE_FORMAT_I8_UNORM:
  943.       *datatype = DTYPE_UBYTE;
  944.       *comps = 1;
  945.       return;
  946.    case PIPE_FORMAT_L8A8_UNORM:
  947.    case PIPE_FORMAT_L8A8_SRGB:
  948.       *datatype = DTYPE_UBYTE;
  949.       *comps = 2;
  950.       return;
  951.    default:
  952.       assert(0);
  953.       *datatype = DTYPE_UBYTE;
  954.       *comps = 0;
  955.       break;
  956.    }
  957. }
  958.  
  959.  
  960. static void
  961. reduce_1d(enum pipe_format pformat,
  962.           int srcWidth, const ubyte *srcPtr,
  963.           int dstWidth, ubyte *dstPtr)
  964. {
  965.    enum dtype datatype;
  966.    uint comps;
  967.  
  968.    format_to_type_comps(pformat, &datatype, &comps);
  969.  
  970.    /* we just duplicate the input row, kind of hack, saves code */
  971.    do_row(datatype, comps,
  972.           srcWidth, srcPtr, srcPtr,
  973.           dstWidth, dstPtr);
  974. }
  975.  
  976.  
  977. /**
  978.  * Strides are in bytes.  If zero, it'll be computed as width * bpp.
  979.  */
  980. static void
  981. reduce_2d(enum pipe_format pformat,
  982.           int srcWidth, int srcHeight,
  983.           int srcRowStride, const ubyte *srcPtr,
  984.           int dstWidth, int dstHeight,
  985.           int dstRowStride, ubyte *dstPtr)
  986. {
  987.    enum dtype datatype;
  988.    uint comps;
  989.    const int bpt = util_format_get_blocksize(pformat);
  990.    const ubyte *srcA, *srcB;
  991.    ubyte *dst;
  992.    int row;
  993.  
  994.    format_to_type_comps(pformat, &datatype, &comps);
  995.  
  996.    if (!srcRowStride)
  997.       srcRowStride = bpt * srcWidth;
  998.  
  999.    if (!dstRowStride)
  1000.       dstRowStride = bpt * dstWidth;
  1001.  
  1002.    /* Compute src and dst pointers */
  1003.    srcA = srcPtr;
  1004.    if (srcHeight > 1)
  1005.       srcB = srcA + srcRowStride;
  1006.    else
  1007.       srcB = srcA;
  1008.    dst = dstPtr;
  1009.  
  1010.    for (row = 0; row < dstHeight; row++) {
  1011.       do_row(datatype, comps,
  1012.              srcWidth, srcA, srcB,
  1013.              dstWidth, dst);
  1014.       srcA += 2 * srcRowStride;
  1015.       srcB += 2 * srcRowStride;
  1016.       dst += dstRowStride;
  1017.    }
  1018. }
  1019.  
  1020.  
  1021. static void
  1022. reduce_3d(enum pipe_format pformat,
  1023.           int srcWidth, int srcHeight, int srcDepth,
  1024.           int srcRowStride, int srcImageStride, const ubyte *srcPtr,
  1025.           int dstWidth, int dstHeight, int dstDepth,
  1026.           int dstRowStride, int dstImageStride, ubyte *dstPtr)
  1027. {
  1028.    const int bpt = util_format_get_blocksize(pformat);
  1029.    int img, row;
  1030.    int srcImageOffset, srcRowOffset;
  1031.    enum dtype datatype;
  1032.    uint comps;
  1033.  
  1034.    format_to_type_comps(pformat, &datatype, &comps);
  1035.  
  1036.    /* XXX I think we should rather assert those strides */
  1037.    if (!srcImageStride)
  1038.       srcImageStride = srcWidth * srcHeight * bpt;
  1039.    if (!dstImageStride)
  1040.       dstImageStride = dstWidth * dstHeight * bpt;
  1041.  
  1042.    if (!srcRowStride)
  1043.       srcRowStride = srcWidth * bpt;
  1044.    if (!dstRowStride)
  1045.       dstRowStride = dstWidth * bpt;
  1046.  
  1047.    /* Offset between adjacent src images to be averaged together */
  1048.    srcImageOffset = (srcDepth == dstDepth) ? 0 : srcImageStride;
  1049.  
  1050.    /* Offset between adjacent src rows to be averaged together */
  1051.    srcRowOffset = (srcHeight == dstHeight) ? 0 : srcRowStride;
  1052.  
  1053.    /*
  1054.     * Need to average together up to 8 src pixels for each dest pixel.
  1055.     * Break that down into 3 operations:
  1056.     *   1. take two rows from source image and average them together.
  1057.     *   2. take two rows from next source image and average them together.
  1058.     *   3. take the two averaged rows and average them for the final dst row.
  1059.     */
  1060.  
  1061.    /*
  1062.    printf("mip3d %d x %d x %d  ->  %d x %d x %d\n",
  1063.           srcWidth, srcHeight, srcDepth, dstWidth, dstHeight, dstDepth);
  1064.    */
  1065.  
  1066.    for (img = 0; img < dstDepth; img++) {
  1067.       /* first source image pointer */
  1068.       const ubyte *imgSrcA = srcPtr
  1069.          + img * (srcImageStride + srcImageOffset);
  1070.       /* second source image pointer */
  1071.       const ubyte *imgSrcB = imgSrcA + srcImageOffset;
  1072.       /* address of the dest image */
  1073.       ubyte *imgDst = dstPtr + img * dstImageStride;
  1074.  
  1075.       /* setup the four source row pointers and the dest row pointer */
  1076.       const ubyte *srcImgARowA = imgSrcA;
  1077.       const ubyte *srcImgARowB = imgSrcA + srcRowOffset;
  1078.       const ubyte *srcImgBRowA = imgSrcB;
  1079.       const ubyte *srcImgBRowB = imgSrcB + srcRowOffset;
  1080.       ubyte *dstImgRow = imgDst;
  1081.  
  1082.       for (row = 0; row < dstHeight; row++) {
  1083.          do_row_3D(datatype, comps, srcWidth,
  1084.                    srcImgARowA, srcImgARowB,
  1085.                    srcImgBRowA, srcImgBRowB,
  1086.                    dstWidth, dstImgRow);
  1087.  
  1088.          /* advance to next rows */
  1089.          srcImgARowA += srcRowStride + srcRowOffset;
  1090.          srcImgARowB += srcRowStride + srcRowOffset;
  1091.          srcImgBRowA += srcRowStride + srcRowOffset;
  1092.          srcImgBRowB += srcRowStride + srcRowOffset;
  1093.          dstImgRow += dstImageStride;
  1094.       }
  1095.    }
  1096. }
  1097.  
  1098.  
  1099.  
  1100.  
  1101. static void
  1102. make_1d_mipmap(struct gen_mipmap_state *ctx,
  1103.                struct pipe_resource *pt,
  1104.                uint layer, uint baseLevel, uint lastLevel)
  1105. {
  1106.    struct pipe_context *pipe = ctx->pipe;
  1107.    uint dstLevel;
  1108.  
  1109.    for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
  1110.       const uint srcLevel = dstLevel - 1;
  1111.       struct pipe_transfer *srcTrans, *dstTrans;
  1112.       void *srcMap, *dstMap;
  1113.  
  1114.       srcMap = pipe_transfer_map(pipe, pt, srcLevel, layer,
  1115.                                  PIPE_TRANSFER_READ, 0, 0,
  1116.                                  u_minify(pt->width0, srcLevel),
  1117.                                  u_minify(pt->height0, srcLevel), &srcTrans);
  1118.       dstMap = pipe_transfer_map(pipe, pt, dstLevel, layer,
  1119.                                  PIPE_TRANSFER_WRITE, 0, 0,
  1120.                                  u_minify(pt->width0, dstLevel),
  1121.                                  u_minify(pt->height0, dstLevel), &dstTrans);
  1122.  
  1123.       reduce_1d(pt->format,
  1124.                 srcTrans->box.width, srcMap,
  1125.                 dstTrans->box.width, dstMap);
  1126.  
  1127.       pipe->transfer_unmap(pipe, srcTrans);
  1128.       pipe->transfer_unmap(pipe, dstTrans);
  1129.    }
  1130. }
  1131.  
  1132.  
  1133. static void
  1134. make_2d_mipmap(struct gen_mipmap_state *ctx,
  1135.                struct pipe_resource *pt,
  1136.                uint layer, uint baseLevel, uint lastLevel)
  1137. {
  1138.    struct pipe_context *pipe = ctx->pipe;
  1139.    uint dstLevel;
  1140.  
  1141.    assert(util_format_get_blockwidth(pt->format) == 1);
  1142.    assert(util_format_get_blockheight(pt->format) == 1);
  1143.  
  1144.    for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
  1145.       const uint srcLevel = dstLevel - 1;
  1146.       struct pipe_transfer *srcTrans, *dstTrans;
  1147.       ubyte *srcMap, *dstMap;
  1148.  
  1149.       srcMap = pipe_transfer_map(pipe, pt, srcLevel, layer,
  1150.                                  PIPE_TRANSFER_READ, 0, 0,
  1151.                                  u_minify(pt->width0, srcLevel),
  1152.                                  u_minify(pt->height0, srcLevel), &srcTrans);
  1153.       dstMap = pipe_transfer_map(pipe, pt, dstLevel, layer,
  1154.                                  PIPE_TRANSFER_WRITE, 0, 0,
  1155.                                  u_minify(pt->width0, dstLevel),
  1156.                                  u_minify(pt->height0, dstLevel), &dstTrans);
  1157.  
  1158.       reduce_2d(pt->format,
  1159.                 srcTrans->box.width, srcTrans->box.height,
  1160.                 srcTrans->stride, srcMap,
  1161.                 dstTrans->box.width, dstTrans->box.height,
  1162.                 dstTrans->stride, dstMap);
  1163.  
  1164.       pipe->transfer_unmap(pipe, srcTrans);
  1165.       pipe->transfer_unmap(pipe, dstTrans);
  1166.    }
  1167. }
  1168.  
  1169.  
  1170. /* XXX looks a bit more like it could work now but need to test */
  1171. static void
  1172. make_3d_mipmap(struct gen_mipmap_state *ctx,
  1173.                struct pipe_resource *pt,
  1174.                uint face, uint baseLevel, uint lastLevel)
  1175. {
  1176.    struct pipe_context *pipe = ctx->pipe;
  1177.    uint dstLevel;
  1178.    struct pipe_box src_box, dst_box;
  1179.  
  1180.    assert(util_format_get_blockwidth(pt->format) == 1);
  1181.    assert(util_format_get_blockheight(pt->format) == 1);
  1182.  
  1183.    src_box.x = src_box.y = src_box.z = 0;
  1184.    dst_box.x = dst_box.y = dst_box.z = 0;
  1185.  
  1186.    for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
  1187.       const uint srcLevel = dstLevel - 1;
  1188.       struct pipe_transfer *srcTrans, *dstTrans;
  1189.       ubyte *srcMap, *dstMap;
  1190.       struct pipe_box src_box, dst_box;
  1191.       src_box.width = u_minify(pt->width0, srcLevel);
  1192.       src_box.height = u_minify(pt->height0, srcLevel);
  1193.       src_box.depth = u_minify(pt->depth0, srcLevel);
  1194.       dst_box.width = u_minify(pt->width0, dstLevel);
  1195.       dst_box.height = u_minify(pt->height0, dstLevel);
  1196.       dst_box.depth = u_minify(pt->depth0, dstLevel);
  1197.  
  1198.       srcMap = pipe->transfer_map(pipe, pt, srcLevel,
  1199.                                   PIPE_TRANSFER_READ,
  1200.                                   &src_box, &srcTrans);
  1201.       dstMap = pipe->transfer_map(pipe, pt, dstLevel,
  1202.                                   PIPE_TRANSFER_WRITE,
  1203.                                   &dst_box, &dstTrans);
  1204.  
  1205.       reduce_3d(pt->format,
  1206.                 srcTrans->box.width, srcTrans->box.height, srcTrans->box.depth,
  1207.                 srcTrans->stride, srcTrans->layer_stride, srcMap,
  1208.                 dstTrans->box.width, dstTrans->box.height, dstTrans->box.depth,
  1209.                 dstTrans->stride, dstTrans->layer_stride, dstMap);
  1210.  
  1211.       pipe->transfer_unmap(pipe, srcTrans);
  1212.       pipe->transfer_unmap(pipe, dstTrans);
  1213.    }
  1214. }
  1215.  
  1216.  
  1217. static void
  1218. fallback_gen_mipmap(struct gen_mipmap_state *ctx,
  1219.                     struct pipe_resource *pt,
  1220.                     uint layer, uint baseLevel, uint lastLevel)
  1221. {
  1222.    switch (pt->target) {
  1223.    case PIPE_TEXTURE_1D:
  1224.       make_1d_mipmap(ctx, pt, layer, baseLevel, lastLevel);
  1225.       break;
  1226.    case PIPE_TEXTURE_2D:
  1227.    case PIPE_TEXTURE_RECT:
  1228.    case PIPE_TEXTURE_CUBE:
  1229.       make_2d_mipmap(ctx, pt, layer, baseLevel, lastLevel);
  1230.       break;
  1231.    case PIPE_TEXTURE_3D:
  1232.       make_3d_mipmap(ctx, pt, layer, baseLevel, lastLevel);
  1233.       break;
  1234.    default:
  1235.       assert(0);
  1236.    }
  1237. }
  1238.  
  1239.  
  1240. /**
  1241.  * Create a mipmap generation context.
  1242.  * The idea is to create one of these and re-use it each time we need to
  1243.  * generate a mipmap.
  1244.  */
  1245. struct gen_mipmap_state *
  1246. util_create_gen_mipmap(struct pipe_context *pipe,
  1247.                        struct cso_context *cso)
  1248. {
  1249.    struct gen_mipmap_state *ctx;
  1250.    uint i;
  1251.  
  1252.    ctx = CALLOC_STRUCT(gen_mipmap_state);
  1253.    if (!ctx)
  1254.       return NULL;
  1255.  
  1256.    ctx->pipe = pipe;
  1257.    ctx->cso = cso;
  1258.  
  1259.    /* disabled blending/masking */
  1260.    memset(&ctx->blend_keep_color, 0, sizeof(ctx->blend_keep_color));
  1261.    memset(&ctx->blend_write_color, 0, sizeof(ctx->blend_write_color));
  1262.    ctx->blend_write_color.rt[0].colormask = PIPE_MASK_RGBA;
  1263.  
  1264.    /* no-op depth/stencil/alpha */
  1265.    memset(&ctx->dsa_keep_depth, 0, sizeof(ctx->dsa_keep_depth));
  1266.    memset(&ctx->dsa_write_depth, 0, sizeof(ctx->dsa_write_depth));
  1267.    ctx->dsa_write_depth.depth.enabled = 1;
  1268.    ctx->dsa_write_depth.depth.func = PIPE_FUNC_ALWAYS;
  1269.    ctx->dsa_write_depth.depth.writemask = 1;
  1270.  
  1271.    /* rasterizer */
  1272.    memset(&ctx->rasterizer, 0, sizeof(ctx->rasterizer));
  1273.    ctx->rasterizer.cull_face = PIPE_FACE_NONE;
  1274.    ctx->rasterizer.half_pixel_center = 1;
  1275.    ctx->rasterizer.bottom_edge_rule = 1;
  1276.    ctx->rasterizer.depth_clip = 1;
  1277.  
  1278.    /* sampler state */
  1279.    memset(&ctx->sampler, 0, sizeof(ctx->sampler));
  1280.    ctx->sampler.wrap_s = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
  1281.    ctx->sampler.wrap_t = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
  1282.    ctx->sampler.wrap_r = PIPE_TEX_WRAP_CLAMP_TO_EDGE;
  1283.    ctx->sampler.min_mip_filter = PIPE_TEX_MIPFILTER_NEAREST;
  1284.    ctx->sampler.normalized_coords = 1;
  1285.  
  1286.    /* vertex elements state */
  1287.    memset(&ctx->velem[0], 0, sizeof(ctx->velem[0]) * 2);
  1288.    for (i = 0; i < 2; i++) {
  1289.       ctx->velem[i].src_offset = i * 4 * sizeof(float);
  1290.       ctx->velem[i].instance_divisor = 0;
  1291.       ctx->velem[i].vertex_buffer_index = cso_get_aux_vertex_buffer_slot(cso);
  1292.       ctx->velem[i].src_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
  1293.    }
  1294.  
  1295.    /* vertex data that doesn't change */
  1296.    for (i = 0; i < 4; i++) {
  1297.       ctx->vertices[i][0][2] = 0.0f; /* z */
  1298.       ctx->vertices[i][0][3] = 1.0f; /* w */
  1299.       ctx->vertices[i][1][3] = 1.0f; /* q */
  1300.    }
  1301.  
  1302.    /* Note: the actual vertex buffer is allocated as needed below */
  1303.  
  1304.    return ctx;
  1305. }
  1306.  
  1307.  
  1308. /**
  1309.  * Helper function to set the fragment shaders.
  1310.  */
  1311. static INLINE void
  1312. set_fragment_shader(struct gen_mipmap_state *ctx, uint type,
  1313.                     boolean output_depth)
  1314. {
  1315.    if (output_depth) {
  1316.       if (!ctx->fs_depth[type])
  1317.          ctx->fs_depth[type] =
  1318.             util_make_fragment_tex_shader_writedepth(ctx->pipe, type,
  1319.                                                      TGSI_INTERPOLATE_LINEAR);
  1320.  
  1321.       cso_set_fragment_shader_handle(ctx->cso, ctx->fs_depth[type]);
  1322.    }
  1323.    else {
  1324.       if (!ctx->fs_color[type])
  1325.          ctx->fs_color[type] =
  1326.             util_make_fragment_tex_shader(ctx->pipe, type,
  1327.                                           TGSI_INTERPOLATE_LINEAR);
  1328.  
  1329.       cso_set_fragment_shader_handle(ctx->cso, ctx->fs_color[type]);
  1330.    }
  1331. }
  1332.  
  1333.  
  1334. /**
  1335.  * Helper function to set the vertex shader.
  1336.  */
  1337. static INLINE void
  1338. set_vertex_shader(struct gen_mipmap_state *ctx)
  1339. {
  1340.    /* vertex shader - still required to provide the linkage between
  1341.     * fragment shader input semantics and vertex_element/buffers.
  1342.     */
  1343.    if (!ctx->vs)
  1344.    {
  1345.       const uint semantic_names[] = { TGSI_SEMANTIC_POSITION,
  1346.                                       TGSI_SEMANTIC_GENERIC };
  1347.       const uint semantic_indexes[] = { 0, 0 };
  1348.       ctx->vs = util_make_vertex_passthrough_shader(ctx->pipe, 2,
  1349.                                                     semantic_names,
  1350.                                                     semantic_indexes);
  1351.    }
  1352.  
  1353.    cso_set_vertex_shader_handle(ctx->cso, ctx->vs);
  1354. }
  1355.  
  1356.  
  1357. /**
  1358.  * Get next "slot" of vertex space in the vertex buffer.
  1359.  * We're allocating one large vertex buffer and using it piece by piece.
  1360.  */
  1361. static unsigned
  1362. get_next_slot(struct gen_mipmap_state *ctx)
  1363. {
  1364.    const unsigned max_slots = 4096 / sizeof ctx->vertices;
  1365.  
  1366.    if (ctx->vbuf_slot >= max_slots) {
  1367.       pipe_resource_reference(&ctx->vbuf, NULL);
  1368.       ctx->vbuf_slot = 0;
  1369.    }
  1370.  
  1371.    if (!ctx->vbuf) {
  1372.       ctx->vbuf = pipe_buffer_create(ctx->pipe->screen,
  1373.                                      PIPE_BIND_VERTEX_BUFFER,
  1374.                                      PIPE_USAGE_STREAM,
  1375.                                      max_slots * sizeof ctx->vertices);
  1376.    }
  1377.    
  1378.    return ctx->vbuf_slot++ * sizeof ctx->vertices;
  1379. }
  1380.  
  1381.  
  1382. static unsigned
  1383. set_vertex_data(struct gen_mipmap_state *ctx,
  1384.                 enum pipe_texture_target tex_target,
  1385.                 uint layer, float r)
  1386. {
  1387.    unsigned offset;
  1388.  
  1389.    /* vert[0].position */
  1390.    ctx->vertices[0][0][0] = -1.0f; /*x*/
  1391.    ctx->vertices[0][0][1] = -1.0f; /*y*/
  1392.  
  1393.    /* vert[1].position */
  1394.    ctx->vertices[1][0][0] = 1.0f;
  1395.    ctx->vertices[1][0][1] = -1.0f;
  1396.  
  1397.    /* vert[2].position */
  1398.    ctx->vertices[2][0][0] = 1.0f;
  1399.    ctx->vertices[2][0][1] = 1.0f;
  1400.  
  1401.    /* vert[3].position */
  1402.    ctx->vertices[3][0][0] = -1.0f;
  1403.    ctx->vertices[3][0][1] = 1.0f;
  1404.  
  1405.    /* Setup vertex texcoords.  This is a little tricky for cube maps. */
  1406.    if (tex_target == PIPE_TEXTURE_CUBE) {
  1407.       static const float st[4][2] = {
  1408.          {0.0f, 0.0f}, {1.0f, 0.0f}, {1.0f, 1.0f}, {0.0f, 1.0f}
  1409.       };
  1410.  
  1411.       util_map_texcoords2d_onto_cubemap(layer, &st[0][0], 2,
  1412.                                         &ctx->vertices[0][1][0], 8);
  1413.    }
  1414.    else if (tex_target == PIPE_TEXTURE_1D_ARRAY) {
  1415.       /* 1D texture array  */
  1416.       ctx->vertices[0][1][0] = 0.0f; /*s*/
  1417.       ctx->vertices[0][1][1] = r; /*t*/
  1418.       ctx->vertices[0][1][2] = 0.0f;    /*r*/
  1419.  
  1420.       ctx->vertices[1][1][0] = 1.0f;
  1421.       ctx->vertices[1][1][1] = r;
  1422.       ctx->vertices[1][1][2] = 0.0f;
  1423.  
  1424.       ctx->vertices[2][1][0] = 1.0f;
  1425.       ctx->vertices[2][1][1] = r;
  1426.       ctx->vertices[2][1][2] = 0.0f;
  1427.  
  1428.       ctx->vertices[3][1][0] = 0.0f;
  1429.       ctx->vertices[3][1][1] = r;
  1430.       ctx->vertices[3][1][2] = 0.0f;
  1431.    } else {
  1432.       /* 1D/2D/3D/2D array */
  1433.       ctx->vertices[0][1][0] = 0.0f; /*s*/
  1434.       ctx->vertices[0][1][1] = 0.0f; /*t*/
  1435.       ctx->vertices[0][1][2] = r;    /*r*/
  1436.  
  1437.       ctx->vertices[1][1][0] = 1.0f;
  1438.       ctx->vertices[1][1][1] = 0.0f;
  1439.       ctx->vertices[1][1][2] = r;
  1440.  
  1441.       ctx->vertices[2][1][0] = 1.0f;
  1442.       ctx->vertices[2][1][1] = 1.0f;
  1443.       ctx->vertices[2][1][2] = r;
  1444.  
  1445.       ctx->vertices[3][1][0] = 0.0f;
  1446.       ctx->vertices[3][1][1] = 1.0f;
  1447.       ctx->vertices[3][1][2] = r;
  1448.    }
  1449.  
  1450.    offset = get_next_slot( ctx );
  1451.  
  1452.    pipe_buffer_write_nooverlap(ctx->pipe, ctx->vbuf,
  1453.                                offset, sizeof(ctx->vertices), ctx->vertices);
  1454.  
  1455.    return offset;
  1456. }
  1457.  
  1458.  
  1459.  
  1460. /**
  1461.  * Destroy a mipmap generation context
  1462.  */
  1463. void
  1464. util_destroy_gen_mipmap(struct gen_mipmap_state *ctx)
  1465. {
  1466.    struct pipe_context *pipe = ctx->pipe;
  1467.    unsigned i;
  1468.  
  1469.    for (i = 0; i < Elements(ctx->fs_color); i++)
  1470.       if (ctx->fs_color[i])
  1471.          pipe->delete_fs_state(pipe, ctx->fs_color[i]);
  1472.  
  1473.    for (i = 0; i < Elements(ctx->fs_depth); i++)
  1474.       if (ctx->fs_depth[i])
  1475.          pipe->delete_fs_state(pipe, ctx->fs_depth[i]);
  1476.  
  1477.    if (ctx->vs)
  1478.       pipe->delete_vs_state(pipe, ctx->vs);
  1479.  
  1480.    pipe_resource_reference(&ctx->vbuf, NULL);
  1481.  
  1482.    FREE(ctx);
  1483. }
  1484.  
  1485.  
  1486. /**
  1487.  * Generate mipmap images.  It's assumed all needed texture memory is
  1488.  * already allocated.
  1489.  *
  1490.  * \param psv  the sampler view to the texture to generate mipmap levels for
  1491.  * \param face  which cube face to generate mipmaps for (0 for non-cube maps)
  1492.  * \param baseLevel  the first mipmap level to use as a src
  1493.  * \param lastLevel  the last mipmap level to generate
  1494.  * \param filter  the minification filter used to generate mipmap levels with
  1495.  * \param filter  one of PIPE_TEX_FILTER_LINEAR, PIPE_TEX_FILTER_NEAREST
  1496.  */
  1497. void
  1498. util_gen_mipmap(struct gen_mipmap_state *ctx,
  1499.                 struct pipe_sampler_view *psv,
  1500.                 uint face, uint baseLevel, uint lastLevel, uint filter)
  1501. {
  1502.    struct pipe_context *pipe = ctx->pipe;
  1503.    struct pipe_screen *screen = pipe->screen;
  1504.    struct pipe_framebuffer_state fb;
  1505.    struct pipe_resource *pt = psv->texture;
  1506.    uint dstLevel;
  1507.    uint offset;
  1508.    uint type;
  1509.    boolean is_depth = util_format_is_depth_or_stencil(psv->format);
  1510.  
  1511.    /* The texture object should have room for the levels which we're
  1512.     * about to generate.
  1513.     */
  1514.    assert(lastLevel <= pt->last_level);
  1515.  
  1516.    /* If this fails, why are we here? */
  1517.    assert(lastLevel > baseLevel);
  1518.  
  1519.    assert(filter == PIPE_TEX_FILTER_LINEAR ||
  1520.           filter == PIPE_TEX_FILTER_NEAREST);
  1521.  
  1522.    switch (pt->target) {
  1523.    case PIPE_TEXTURE_1D:
  1524.       type = TGSI_TEXTURE_1D;
  1525.       break;
  1526.    case PIPE_TEXTURE_2D:
  1527.       type = TGSI_TEXTURE_2D;
  1528.       break;
  1529.    case PIPE_TEXTURE_3D:
  1530.       type = TGSI_TEXTURE_3D;
  1531.       break;
  1532.    case PIPE_TEXTURE_CUBE:
  1533.       type = TGSI_TEXTURE_CUBE;
  1534.       break;
  1535.    case PIPE_TEXTURE_1D_ARRAY:
  1536.       type = TGSI_TEXTURE_1D_ARRAY;
  1537.       break;
  1538.    case PIPE_TEXTURE_2D_ARRAY:
  1539.       type = TGSI_TEXTURE_2D_ARRAY;
  1540.       break;
  1541.    default:
  1542.       assert(0);
  1543.       type = TGSI_TEXTURE_2D;
  1544.    }
  1545.  
  1546.    /* check if we can render in the texture's format */
  1547.    if (!screen->is_format_supported(screen, psv->format, pt->target,
  1548.                                     pt->nr_samples,
  1549.                                     is_depth ? PIPE_BIND_DEPTH_STENCIL :
  1550.                                                PIPE_BIND_RENDER_TARGET)) {
  1551.       fallback_gen_mipmap(ctx, pt, face, baseLevel, lastLevel);
  1552.       return;
  1553.    }
  1554.  
  1555.    /* save state (restored below) */
  1556.    cso_save_blend(ctx->cso);
  1557.    cso_save_depth_stencil_alpha(ctx->cso);
  1558.    cso_save_rasterizer(ctx->cso);
  1559.    cso_save_sample_mask(ctx->cso);
  1560.    cso_save_samplers(ctx->cso, PIPE_SHADER_FRAGMENT);
  1561.    cso_save_sampler_views(ctx->cso, PIPE_SHADER_FRAGMENT);
  1562.    cso_save_stream_outputs(ctx->cso);
  1563.    cso_save_framebuffer(ctx->cso);
  1564.    cso_save_fragment_shader(ctx->cso);
  1565.    cso_save_vertex_shader(ctx->cso);
  1566.    cso_save_geometry_shader(ctx->cso);
  1567.    cso_save_viewport(ctx->cso);
  1568.    cso_save_vertex_elements(ctx->cso);
  1569.    cso_save_aux_vertex_buffer_slot(ctx->cso);
  1570.    cso_save_render_condition(ctx->cso);
  1571.  
  1572.    /* bind our state */
  1573.    cso_set_blend(ctx->cso, is_depth ? &ctx->blend_keep_color :
  1574.                                       &ctx->blend_write_color);
  1575.    cso_set_depth_stencil_alpha(ctx->cso, is_depth ? &ctx->dsa_write_depth :
  1576.                                                     &ctx->dsa_keep_depth);
  1577.    cso_set_rasterizer(ctx->cso, &ctx->rasterizer);
  1578.    cso_set_sample_mask(ctx->cso, ~0);
  1579.    cso_set_vertex_elements(ctx->cso, 2, ctx->velem);
  1580.    cso_set_stream_outputs(ctx->cso, 0, NULL, 0);
  1581.    cso_set_render_condition(ctx->cso, NULL, FALSE, 0);
  1582.  
  1583.    set_fragment_shader(ctx, type, is_depth);
  1584.    set_vertex_shader(ctx);
  1585.    cso_set_geometry_shader_handle(ctx->cso, NULL);
  1586.  
  1587.    /* init framebuffer state */
  1588.    memset(&fb, 0, sizeof(fb));
  1589.  
  1590.    /* set min/mag to same filter for faster sw speed */
  1591.    ctx->sampler.mag_img_filter = filter;
  1592.    ctx->sampler.min_img_filter = filter;
  1593.  
  1594.    for (dstLevel = baseLevel + 1; dstLevel <= lastLevel; dstLevel++) {
  1595.       const uint srcLevel = dstLevel - 1;
  1596.       struct pipe_viewport_state vp;
  1597.       unsigned nr_layers, layer, i;
  1598.       float rcoord = 0.0f;
  1599.  
  1600.       if (pt->target == PIPE_TEXTURE_3D)
  1601.          nr_layers = u_minify(pt->depth0, dstLevel);
  1602.       else if (pt->target == PIPE_TEXTURE_2D_ARRAY || pt->target == PIPE_TEXTURE_1D_ARRAY)
  1603.          nr_layers = pt->array_size;
  1604.       else
  1605.          nr_layers = 1;
  1606.  
  1607.       for (i = 0; i < nr_layers; i++) {
  1608.          struct pipe_surface *surf, surf_templ;
  1609.          if (pt->target == PIPE_TEXTURE_3D) {
  1610.             /* in theory with geom shaders and driver with full layer support
  1611.                could do that in one go. */
  1612.             layer = i;
  1613.             /* XXX hmm really? */
  1614.             rcoord = (float)layer / (float)nr_layers + 1.0f / (float)(nr_layers * 2);
  1615.          } else if (pt->target == PIPE_TEXTURE_2D_ARRAY || pt->target == PIPE_TEXTURE_1D_ARRAY) {
  1616.             layer = i;
  1617.             rcoord = (float)layer;
  1618.          } else
  1619.             layer = face;
  1620.  
  1621.          u_surface_default_template(&surf_templ, pt);
  1622.          surf_templ.u.tex.level = dstLevel;
  1623.          surf_templ.u.tex.first_layer = layer;
  1624.          surf_templ.u.tex.last_layer = layer;
  1625.          surf = pipe->create_surface(pipe, pt, &surf_templ);
  1626.  
  1627.          /*
  1628.           * Setup framebuffer / dest surface
  1629.           */
  1630.          if (is_depth) {
  1631.             fb.nr_cbufs = 0;
  1632.             fb.zsbuf = surf;
  1633.          }
  1634.          else {
  1635.             fb.nr_cbufs = 1;
  1636.             fb.cbufs[0] = surf;
  1637.          }
  1638.          fb.width = u_minify(pt->width0, dstLevel);
  1639.          fb.height = u_minify(pt->height0, dstLevel);
  1640.          cso_set_framebuffer(ctx->cso, &fb);
  1641.  
  1642.          /* viewport */
  1643.          vp.scale[0] = 0.5f * fb.width;
  1644.          vp.scale[1] = 0.5f * fb.height;
  1645.          vp.scale[2] = 1.0f;
  1646.          vp.scale[3] = 1.0f;
  1647.          vp.translate[0] = 0.5f * fb.width;
  1648.          vp.translate[1] = 0.5f * fb.height;
  1649.          vp.translate[2] = 0.0f;
  1650.          vp.translate[3] = 0.0f;
  1651.          cso_set_viewport(ctx->cso, &vp);
  1652.  
  1653.          /*
  1654.           * Setup sampler state
  1655.           * Note: we should only have to set the min/max LOD clamps to ensure
  1656.           * we grab texels from the right mipmap level.  But some hardware
  1657.           * has trouble with min clamping so we also set the lod_bias to
  1658.           * try to work around that.
  1659.           */
  1660.          ctx->sampler.min_lod = ctx->sampler.max_lod = (float) srcLevel;
  1661.          ctx->sampler.lod_bias = (float) srcLevel;
  1662.          cso_single_sampler(ctx->cso, PIPE_SHADER_FRAGMENT, 0, &ctx->sampler);
  1663.          cso_single_sampler_done(ctx->cso, PIPE_SHADER_FRAGMENT);
  1664.  
  1665.          cso_set_sampler_views(ctx->cso, PIPE_SHADER_FRAGMENT, 1, &psv);
  1666.  
  1667.          /* quad coords in clip coords */
  1668.          offset = set_vertex_data(ctx,
  1669.                                   pt->target,
  1670.                                   face,
  1671.                                   rcoord);
  1672.  
  1673.          util_draw_vertex_buffer(ctx->pipe,
  1674.                                  ctx->cso,
  1675.                                  ctx->vbuf,
  1676.                                  cso_get_aux_vertex_buffer_slot(ctx->cso),
  1677.                                  offset,
  1678.                                  PIPE_PRIM_TRIANGLE_FAN,
  1679.                                  4,  /* verts */
  1680.                                  2); /* attribs/vert */
  1681.  
  1682.          /* need to signal that the texture has changed _after_ rendering to it */
  1683.          pipe_surface_reference( &surf, NULL );
  1684.       }
  1685.    }
  1686.  
  1687.    /* restore state we changed */
  1688.    cso_restore_blend(ctx->cso);
  1689.    cso_restore_depth_stencil_alpha(ctx->cso);
  1690.    cso_restore_rasterizer(ctx->cso);
  1691.    cso_restore_sample_mask(ctx->cso);
  1692.    cso_restore_samplers(ctx->cso, PIPE_SHADER_FRAGMENT);
  1693.    cso_restore_sampler_views(ctx->cso, PIPE_SHADER_FRAGMENT);
  1694.    cso_restore_framebuffer(ctx->cso);
  1695.    cso_restore_fragment_shader(ctx->cso);
  1696.    cso_restore_vertex_shader(ctx->cso);
  1697.    cso_restore_geometry_shader(ctx->cso);
  1698.    cso_restore_viewport(ctx->cso);
  1699.    cso_restore_vertex_elements(ctx->cso);
  1700.    cso_restore_stream_outputs(ctx->cso);
  1701.    cso_restore_aux_vertex_buffer_slot(ctx->cso);
  1702.    cso_restore_render_condition(ctx->cso);
  1703. }
  1704.